#1 hair<-read.delim2("clipboard") summary(hair) library(ggplot2) ggplot(hair, aes(x=faculty, y=hair.length))+geom_boxplot() # It is evident from the plot that 1. variance is # strongly heterogeneous and 2. residual distribution in strongly skewed ggplot(hair, aes(x=faculty, y=hair.length))+geom_boxplot()+ scale_y_continuous(trans="log2") # This looks much better. Thus, we # will use the log-transformation in analysis t.test(log(hair.length)~faculty, data=hair) # Welch Two Sample t-test # # data: log(hair.length) by faculty # t = -5.2944, df = 21.075, p-value = 2.967e-05 # alternative hypothesis: true difference in means between group Law and group Sci is not equal to 0 # 95 percent confidence interval: # -1.8933169 -0.8255749 # sample estimates: # mean in group Law mean in group Sci # 1.623870 2.983316 #2 ## This task covers the topic of species-area relationship (SAR) - one of the principal ecological laws. # See the theory here: https://en.wikipedia.org/wiki/Species%E2%80%93area_relationship # The relationship is described by the non-linear equation S=c * A^z # Our task is to find the c and z parameters by fitting a statistical model # this can be done by fitting a linear regression on log-transformed data because, # after log-transformation, we change the SAR to a linear function: # S=c * A^z -> log(S) = log(c) + z * log(A), with log(c) being the intercept and z the slope sar<-read.delim2("clipboard") summary(sar) plot(sp~area, data=sar) plot(sp~area, data=sar, log="xy") lm.1<-lm(log(sp)~log(area), data=sar) summary(lm.1) # Call: # lm(formula = log(sp) ~ log(area), data = sar) # # Residuals: # Min 1Q Median 3Q Max # -0.271520 -0.033880 0.005651 0.072867 0.250488 # # Coefficients: # Estimate Std. Error t value Pr(>|t|) # (Intercept) 5.35276 0.09983 53.62 1.17e-14 *** # log(area) 0.23335 0.01076 21.68 2.24e-10 *** # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # # Residual standard error: 0.1425 on 11 degrees of freedom # Multiple R-squared: 0.9771, Adjusted R-squared: 0.9751 # F-statistic: 470.1 on 1 and 11 DF, p-value: 2.244e-10 new.area<-0:200000 s.fit<-exp(coef(lm.1)[1]+coef(lm.1)[2]*log(new.area)) lines(new.area, s.fit) plot(sp~area, data=sar) lines(new.area, s.fit) #3 football<-read.delim2("clipboard") summary(football) # In this case we should perform a correlation analysis but the data are clearly not # suitable for Pearson correlation beacuase like many semiquantitative measures, the variables cannot have the normal distribution. # Therefore, we can use the Spearman correlation here cor.test(football$league, football$aggressiveness, method="spearman") # Spearman's rank correlation rho # # data: football$league and football$aggressiveness # S = 1789.1, p-value = 0.136 # alternative hypothesis: true rho is not equal to 0 # sample estimates: # rho # -0.3452061 # # Warning message: # In cor.test.default(football$league, football$aggressiveness, method = "spearman") : # Cannot compute exact p-value with ties # Note the warning message here - it says that there are ties (same values repeated # several times, which prevents perfect ordering of the values); as a result, the # p-values are approximated. # Conclusion: there is no significant association between league level and fan # aggressiveness. #4 pizza<-read.delim2("clipboard") summary(pizza) wilcox.test(score~cook, data=pizza, paired=T) # Wilcoxon signed rank test with continuity correction # # data: score by cook # V = 9.5, p-value = 0.4821 # alternative hypothesis: true location shift is not equal to 0 # # Warning messages: # 1: In wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...) : # cannot compute exact p-value with ties # 2: In wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...) : # cannot compute exact p-value with zeroes tapply(pizza$score, pizza$cook, mean) # Fra Gia # 1.5 1.8 #5 lettuce<-read.delim2("clipboard") summary(lettuce) wilcox.test(taste~leaf.col, data=lettuce, paired=F) # Wilcoxon rank sum test with continuity correction # # data: taste by leaf.col # W = 2378, p-value = 0.001308 # alternative hypothesis: true location shift is not equal to 0 install.packages("coin") library(coin) oneway_test(taste~as.factor(leaf.col), data=lettuce) # Asymptotic Two-Sample Fisher-Pitman Permutation Test # # data: taste by as.factor(leaf.col) (green, red) # Z = 3.2398, p-value = 0.001196 # alternative hypothesis: true mu is not equal to 0 #6 books<-read.delim2("clipboard") summary(books) library(ggplot2) ggplot(data=books, aes(x=Author, y=grade))+geom_boxplot() kt.1<-kruskal.test(grade~Author, data=books) kt.1 # Kruskal-Wallis rank sum test # # data: grade by Author # Kruskal-Wallis chi-squared = 9.1957, df = 3, p-value = # 0.0268 ### Permutation test as a suitable alternative oneway_test(grade~as.factor(Author), data=books) # Asymptotic K-Sample Fisher-Pitman Permutation Test # # data: grade by # as.factor(Author) (Dickens, Hemingway, Pushkin, Tolstoy) # chi-squared = 9.4135, df = 3, p-value = 0.02427 install.packages("FSA") library(FSA) #Post-hoc mu;ltiple comparisons dunnTest(books$grade, books$Author) # Dunn (1964) Kruskal-Wallis multiple comparison # p-values adjusted with the Holm method. # # Comparison Z P.unadj P.adj # 1 Dickens - Hemingway -0.50658151 0.612448485 1.00000000 # 2 Dickens - Pushkin 0.08443025 0.932714356 0.93271436 # 3 Hemingway - Pushkin 0.59101176 0.554512541 1.00000000 # 4 Dickens - Tolstoy 2.27961679 0.022630426 0.11315213 # 5 Hemingway - Tolstoy 2.78619830 0.005333024 0.03199814 # 6 Pushkin - Tolstoy 2.19518654 0.028150219 0.11260088