#1
hair<-read.delim2("clipboard")
summary(hair)

library(ggplot2)
ggplot(hair, aes(x=faculty, y=hair.length))+geom_boxplot()
# It is evident from the plot that 1. variance is 
# strongly heterogeneous and 2. residual distribution in strongly skewed

ggplot(hair, aes(x=faculty, y=hair.length))+geom_boxplot()+
  scale_y_continuous(trans="log2") # This looks much better. Thus, we 
# will use the log-transformation in analysis

t.test(log(hair.length)~faculty, data=hair)
# 	Welch Two Sample t-test
# 
# data:  log(hair.length) by faculty
# t = -5.2944, df = 21.075, p-value = 2.967e-05
# alternative hypothesis: true difference in means between group Law and group Sci is not equal to 0
# 95 percent confidence interval:
#  -1.8933169 -0.8255749
# sample estimates:
# mean in group Law mean in group Sci 
#          1.623870          2.983316 

#2 

## This task covers the topic of species-area relationship (SAR) - one of the principal ecological laws.
# See the theory here: https://en.wikipedia.org/wiki/Species%E2%80%93area_relationship

# The relationship is described by the non-linear equation S=c * A^z
# Our task is to find the c and z parameters by fitting a statistical model
# this can be done by fitting a linear regression on log-transformed data because, 
# after log-transformation, we change the SAR to a linear function:
# S=c * A^z -> log(S) = log(c) + z * log(A), with log(c) being the intercept and z the slope

sar<-read.delim2("clipboard")
summary(sar)

plot(sp~area, data=sar)
plot(sp~area, data=sar, log="xy")

lm.1<-lm(log(sp)~log(area), data=sar)
summary(lm.1)
# Call:
# lm(formula = log(sp) ~ log(area), data = sar)
# 
# Residuals:
#       Min        1Q    Median        3Q       Max 
# -0.271520 -0.033880  0.005651  0.072867  0.250488 
# 
# Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept)  5.35276    0.09983   53.62 1.17e-14 ***
# log(area)    0.23335    0.01076   21.68 2.24e-10 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 0.1425 on 11 degrees of freedom
# Multiple R-squared:  0.9771,	Adjusted R-squared:  0.9751 
# F-statistic: 470.1 on 1 and 11 DF,  p-value: 2.244e-10


new.area<-0:200000
s.fit<-exp(coef(lm.1)[1]+coef(lm.1)[2]*log(new.area))
lines(new.area, s.fit)

plot(sp~area, data=sar)
lines(new.area, s.fit)

#3
football<-read.delim2("clipboard")
summary(football)

# In this case we should perform a correlation analysis but the data are clearly not
# suitable for Pearson correlation beacuase like many semiquantitative measures, the variables cannot have the normal distribution.
# Therefore, we can use the Spearman correlation here

cor.test(football$league, football$aggressiveness, 
         method="spearman")
# 	Spearman's rank correlation rho
# 
# data:  football$league and football$aggressiveness
# S = 1789.1, p-value = 0.136
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
#        rho 
# -0.3452061 
# 
# Warning message:
# In cor.test.default(football$league, football$aggressiveness, method = "spearman") :
#   Cannot compute exact p-value with ties

# Note the warning message here - it says that there are ties (same values repeated 
# several times, which prevents perfect ordering of the values); as a result, the
# p-values are approximated.

# Conclusion: there is no significant association between league level and fan 
# aggressiveness. 


#4
pizza<-read.delim2("clipboard")
summary(pizza)

wilcox.test(score~cook, data=pizza, paired=T)
# 	Wilcoxon signed rank test with continuity correction
# 
# data:  score by cook
# V = 9.5, p-value = 0.4821
# alternative hypothesis: true location shift is not equal to 0
# 
# Warning messages:
# 1: In wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...) :
#   cannot compute exact p-value with ties
# 2: In wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...) :
#   cannot compute exact p-value with zeroes

tapply(pizza$score, pizza$cook, mean)
# Fra Gia 
# 1.5 1.8 

#5
lettuce<-read.delim2("clipboard")
summary(lettuce)

wilcox.test(taste~leaf.col, data=lettuce, paired=F)

# 	Wilcoxon rank sum test with continuity correction
# 
# data:  taste by leaf.col
# W = 2378, p-value = 0.001308
# alternative hypothesis: true location shift is not equal to 0

install.packages("coin")
library(coin)

oneway_test(taste~as.factor(leaf.col), data=lettuce)
# Asymptotic Two-Sample Fisher-Pitman Permutation Test
# 
# data:  taste by as.factor(leaf.col) (green, red)
# Z = 3.2398, p-value = 0.001196
# alternative hypothesis: true mu is not equal to 0

#6
books<-read.delim2("clipboard")
summary(books)
library(ggplot2)
ggplot(data=books, aes(x=Author, y=grade))+geom_boxplot()

kt.1<-kruskal.test(grade~Author, data=books)
kt.1
# 	Kruskal-Wallis rank sum test
# 
# data:  grade by Author
# Kruskal-Wallis chi-squared = 9.1957, df = 3, p-value =
# 0.0268

### Permutation test as a suitable alternative
oneway_test(grade~as.factor(Author), data=books)
# 	Asymptotic K-Sample Fisher-Pitman Permutation Test
# 
# data:  grade by
# 	 as.factor(Author) (Dickens, Hemingway, Pushkin, Tolstoy)
# chi-squared = 9.4135, df = 3, p-value = 0.02427

install.packages("FSA")
library(FSA)

#Post-hoc mu;ltiple comparisons
dunnTest(books$grade, books$Author)
# Dunn (1964) Kruskal-Wallis multiple comparison
#   p-values adjusted with the Holm method.
# 
#            Comparison           Z     P.unadj      P.adj
# 1 Dickens - Hemingway -0.50658151 0.612448485 1.00000000
# 2   Dickens - Pushkin  0.08443025 0.932714356 0.93271436
# 3 Hemingway - Pushkin  0.59101176 0.554512541 1.00000000
# 4   Dickens - Tolstoy  2.27961679 0.022630426 0.11315213
# 5 Hemingway - Tolstoy  2.78619830 0.005333024 0.03199814
# 6   Pushkin - Tolstoy  2.19518654 0.028150219 0.11260088