pigs<-read.csv("pigs_pheno.csv") #Imagine we explored our variables, cleaned it, harmonized it. #We did histograms and boxplots and now came to the part where we have to check normality of the data. #As you remember, it is one of the assumptions for the parametric tests, in our case t-test. #Let's look at the sample size. str(pigs) #Which ways do we have to check the normality? hist(pigs$Bodyweight, breaks=20) #not normally distributed qqnorm (pigs$Bodyweight) qqline(pigs$Bodyweight, col = "red", lwd=3) ks.test(pigs$Bodyweight, "pnorm", mean(pigs$Bodyweight), sd (pigs$Bodyweight)) library(tidyverse) hf<- pigs %>% filter(Diet=="hf") chow<- pigs %>% filter(Diet=="chow") hist(hf$Bodyweight, breaks=20) #is normally distributed qqnorm(hf$Bodyweight) qqline(hf$Bodyweight, col = "red", lwd=3) ks.test(hf$Bodyweight, "pnorm", mean(hf$Bodyweight), sd (hf$Bodyweight)) hist(chow$Bodyweight, breaks=20) #is normally distributed qqnorm(chow$Bodyweight) qqline(chow$Bodyweight, col = "red", lwd=3) ks.test(chow$Bodyweight, "pnorm", mean(chow$Bodyweight), sd (chow$Bodyweight)) #We fail in one assumption - normality of the data. #Let's check variance if it's similar or not between the groups. boxplot(pigs$Bodyweight ~ pigs$Diet) #it's fine #But because we failed with normality we have two options: #1. log transform data and see if we fit the t-test assumptions after and use the t-test that is parametric. #2. use non-parametric Mann_Whitney. There is no right answer and both ways could be correct. Let's try both. #Let's use nonparametric Mann_Whitney. #First, check one of the assumptions about similar distribution shapes of our groups. par(mfcol = c(1,2)) hist(hf$Bodyweight, breaks=20) hist(chow$Bodyweight, breaks=20) par(mfcol = c(1,1)) #The test itself. wilcox.test(pigs$Bodyweight ~ pigs$Diet) #There is a statistically significant difference in pigs bodyweight between hf and chow diet (p-value<0.001). #Now let's try the log transformation. pigs_transf<-log10(pigs$Bodyweight) #Check the assumptions now, after transformation. hist(pigs_transf, breaks=20) #normally distributed qqnorm (pigs_transf) qqline(pigs_transf, col = "red", lwd=3) ks.test(pigs_transf, "pnorm", mean(pigs_transf), sd (pigs_transf)) boxplot(log10(pigs$Bodyweight) ~ pigs$Diet) #Let's do the t-test. t.test(log10(pigs$Bodyweight) ~ pigs$Diet) #significant as well #Thus, we can say that there is a significant difference in the pigs weight between chow and hf diets, where the average weight is higher in the hf group. #The interpretation of log transformed data isn't easy. Mean of the log values #are not the same as the log of the mean, that is we can't just unlog #these values, in this case we will deal with geomtric means, not simple means. So, I would go to a mathematician #or more experienced statistician to help me with that. #How I would report the results: #There is a statistically significant difference in pigs bodyweight between high fat #and chow diet (p-value<0.001). #If we want to see the relative difference we unlog the #output (26.91/29.51). It's ratio between the hf and chow diet is 1.097. #The relative difference in pigs bodyweight between hf and chow diet is 1.097. #Between genders female<- pigs %>% filter (Gender=="F") male<-pigs %>% filter (Gender=="M") hist(female$Bodyweight, breaks=20) #is not normally distributed qqnorm(female$Bodyweight) qqline(female$Bodyweight, col = "red", lwd=3) ks.test(female$Bodyweight, "pnorm", mean(female$Bodyweight), sd (female$Bodyweight)) hist(male$Bodyweight, breaks=20) #is not normally distributed qqnorm(male$Bodyweight) qqline(male$Bodyweight, col = "red", lwd=3) ks.test(male$Bodyweight, "pnorm", mean(chow$Bodyweight), sd (chow$Bodyweight)) boxplot(pigs$Bodyweight ~ pigs$Gender)#it's fine #Total fail of the normality assumptions #Let's perform the nonparametric Mann Whitney. For this test we have to check #similarity of the data spread between the groups. par(mfcol = c(1,2)) hist(male$Bodyweight, breaks=20) hist(female$Bodyweight, breaks=20) par(mfcol = c(1,1)) wilcox.test(pigs$Bodyweight ~ pigs$Gender)