library(readxl)
library(vegan)
library(ggplot2)
library(ggpubr)
library(psych)
library(tidyverse)
library(FD)
library(geepack)

### 1 
# Data import
chiro.spe<-read_xlsx("data/svratka_chironomids/chironomids.xlsx")
chiro.spe<-data.frame(chiro.spe, row.names = 1)[,-1] #Conversion to a data frame to use row names
chiro.spe[is.na(chiro.spe)]<-0 #Replacement of empty values by zeros

chiro.env<-read_xlsx("data/svratka_chironomids/chironomids.xlsx", sheet=2)
chiro.env<-data.frame(chiro.env, row.names = 1)[,-1] #Conversion to a data frame to use row names

table(rownames(chiro.spe)==rownames(chiro.env))# Check of rownames between sp and env tables

#1a
div.df<-tibble(
rich=specnumber(chiro.spe), # Calculation of  species richness
sha=diversity(chiro.spe, index="shannon"), # Shannon index
eve=diversity(chiro.spe, index="shannon")/log(rich), # Pielou evennes
simp=diversity(chiro.spe, index="simp"), # Simpson index (1-D)
simp.i=diversity(chiro.spe, index="invsimp") # Simpson index (1/D)
)
cor(div.df)# Simple correlation matrix among the indices
pairs.panels(div.df)# Pair plot showing the correlations in detail 

chiro.env<-data.frame(chiro.env, div.df)


#1b
# Comparison of biodiversity indices between pools and riffles using boxplots
p.rich<-ggplot(chiro.env, aes(x=hydr, y=rich))+geom_boxplot()+theme_classic()
p.rich
p.sha<-ggplot(chiro.env, aes(x=hydr, y=sha))+geom_boxplot()+theme_classic()
p.sha
p.eve<-ggplot(chiro.env, aes(x=hydr, y=eve))+geom_boxplot()+theme_classic()
p.eve

#Combining the three plots in one
ggarrange(p.rich, p.sha, p.eve, ncol=3)


###2
#2a
sac.coll<-specaccum(chiro.spe, method="collector") # Empirical accumulation curve 
sac.coll.r<-specaccum(chiro.spe[dim(chiro.spe)[1]:1,], method="collector")# ... with sample order reversed  
sac.rand<-specaccum(chiro.spe, method="random") # Randomized accumulation curve
rar<-specaccum(chiro.spe, method="exact") # Rarefaction (sample-based)
sac.fmm.r<-fitspecaccum(chiro.spe, model="mic", method="exact")#Michaelis-Menten fit of SAC based on rarefaction 

# Creating a data frame with values for curve plotting
sac.df<-data.frame(sites=sac.coll$sites,
sac.coll=sac.coll$richness,
sac.coll.r=sac.coll.r$richness,
sac.rand=sac.rand$richness,
sac.rand.sd=sac.rand$sd,
rar=rar$richness,
rar.sd=rar$sd,
sac.fmm.r=sac.fmm.r$richness,
sac.fmm.r.sd=sac.fmm.r$sd)


# Plotting SACs with ggplot
# Comparison of the empirical curves to randomized curve
# Note the 2x multiplication of SD for confidence intervals
ggplot(sac.df, aes(x = sites))+
  geom_line(aes(y = sac.coll), linewidth = 1)+ # empirical accumulation curve
  geom_line(aes(y = sac.coll.r), color = 6, linewidth = 1)+ # empirical accumulation curve, reversed order
  geom_line(aes(y = sac.rand), color = 2, linewidth = 1)+ # randomized accumulation curve
  geom_ribbon(aes(ymin = sac.rand-2*sac.rand.sd, ymax = sac.rand+2*sac.rand.sd), fill = 2, alpha = 0.3)+ # CI for randomized accumulation curve
  labs(x = "Number of sites", y = "Number of species")+theme_classic()

# Comparison of the randomized SAC and rarefaction
ggplot(sac.df, aes(x = sites))+
  geom_line(aes(y = sac.rand), color = 2, linewidth = 1)+ # randomized accumulation curve
  geom_ribbon(aes(ymin = sac.rand-2*sac.rand.sd, ymax = sac.rand+2*sac.rand.sd), fill = 2, alpha = 0.3)+ # CI for randomized accumulation curve
  geom_line(aes(y = rar), color = 3, linewidth = 1)+ # rarefaction curve
  geom_ribbon(aes(ymin = rar-2*rar.sd, ymax = rar+2*rar.sd), fill = 3, alpha = 0.2)+ # sd for randomized accumulation curve
    labs(x = "Number of sites", y = "Number of species")+theme_classic()

# Comparison of the randomized curve with fitted rarefaction curves
ggplot(sac.df, aes(x = sites))+
  geom_line(aes(y = sac.rand), color = 2, linewidth = 1)+ # randomized accumulation curve
  geom_ribbon(aes(ymin = sac.rand-2*sac.rand.sd, ymax = sac.rand+2*sac.rand.sd), fill = 2, alpha = 0.2)+ # sd for randomized accumulation curve
  geom_line(aes(y = sac.fmm.r), color = 4, linewidth = 1)+ # Fitted accumulation curve
  labs(x = "Number of sites", y = "Number of species")+theme_classic()


#2b.
# Selecting the pools and riffles datasets
chiro.p<-chiro.spe[chiro.env$hydr=="pool",]
chiro.r<-chiro.spe[chiro.env$hydr=="riffle",]

# Computation of rarefaction curves
r.p<-specaccum(chiro.p, method="exact")
r.r<-specaccum(chiro.r, method="exact")

p.df<-data.frame(r.p=r.p$richness, p.sd=r.p$sd)
r.df<-data.frame(r.r=r.r$richness, r.sd=r.r$sd)

ggplot(p.df, aes(x = 1:length(r.p)))+
  geom_line(aes(y = r.p), color = 2, linewidth = 1)+ # randomized accumulation curve
  geom_ribbon(aes(ymin = r.p-2*p.sd, ymax = r.p+2*p.sd), fill = 2, alpha = 0.2)+ # sd for randomized accumulation curve
  geom_line(data=r.df, aes(y = r.r, x=1:length(r.r)), color = 3, linewidth = 1)+ # randomized accumulation curve
  geom_ribbon(data=r.df, aes(x=1:length(r.r), ymin = r.r-2*r.sd, ymax = r.r+2*r.sd), fill = 3, alpha = 0.2)+ # sd for randomized accumulation curve
  labs(x = "Number of sites", y = "Number of species")+theme_classic()
  

### Task 3 
# Data import 
dg.spe<-read.csv("data/CNPD/dry.grass_spe.csv", row.names = 1)
dg.area<-read.csv("data/CNPD/dry.grass_plot.areas.csv", row.names = 1)

dg.area$sp.rich<-specnumber(dg.spe) #Computation of species richness 
ggplot(dg.area, aes(x=Releve.area..m2., y=sp.rich))+geom_point()+
  labs(x="Plot area", y="Number of species")+
  theme_classic()

ggplot(dg.area, aes(x=Releve.area..m2., y=sp.rich))+geom_point()+
  labs(x="Plot area", y="Number of species")+
  theme_classic()+scale_x_log10()+scale_y_log10()


lm.1<-lm(log(sp.rich)~log(Releve.area..m2.), data=dg.area)# Linear model with log-log transformation
summary(lm.1) # Summary of the model

cfs<-coef(lm.1)#  Model coefficients, i.e., log(c) and z
cfs
 # (Intercept) log(dg.area$Releve.area..m2.) 
 #                   3.22422712                    0.08688423 

sar<-data.frame(area=1:500, sp=exp(cfs[1])*(1:500)^cfs[2])
ggplot(dg.area, aes(x=Releve.area..m2., y=sp.rich))+geom_point()+
  labs(x="Plot area", y="Number of species")+
  theme_classic()+scale_x_log10()+scale_y_log10()+
  geom_line(data=sar, aes(x=area, y=sp))

ggplot(dg.area, aes(x=Releve.area..m2., y=sp.rich))+geom_point()+
  labs(x="Plot area", y="Number of species")+
  theme_classic()+
    geom_line(data=sar, aes(x=area, y=sp))


# Correction of richness to standard plot area (16 m2) and comparison between corrected and raw richness
dg.area$rich.corr<-dg.area$sp.rich*(16/dg.area$Releve.area..m2.)^cfs[2]
ggplot(dg.area, aes(x=Releve.area..m2., y=sp.rich))+geom_point()+
  geom_point(aes(x=Releve.area..m2., y=rich.corr), col=2)+
  geom_segment(data=dg.area%>%filter(Releve.area..m2.!=16), aes(x=Releve.area..m2., y=sp.rich, yend=rich.corr), arrow=arrow(length=unit(0.2, "cm")))+
  labs(x="Plot area", y="Number of species")+ 
  theme_classic()

ggplot(dg.area, aes(x=Releve.area..m2., y=sp.rich))+geom_point()+
  geom_point(aes(x=Releve.area..m2., y=rich.corr), col=2)+
  geom_segment(data=dg.area%>%filter(Releve.area..m2.!=16), aes(x=Releve.area..m2., y=sp.rich, yend=rich.corr), arrow=arrow(length=unit(0.2, "cm")))+
  labs(x="Plot area", y="Number of species")+ 
  theme_classic()+scale_x_log10()+scale_y_log10()


### Task 4
# Importing the data
herbivory.spe<-read.csv("data/Herbivory/Herbivory_Plots.csv")
herbivory.traits<-read.csv("data/Herbivory/Herbivory_Traits.csv")
rownames(herbivory.traits)<-herbivory.traits$Species
# herbivory.traits<-herbivory.traits[,-1]

# Wide table with total leaf biomass
herbivory.spe.w<-pivot_wider(herbivory.spe %>% select(!c(LeafBiomassEaten)), 
                             values_from=LeafBiomassTotal,names_from = Species, 
                             values_fill = 0, names_sort=T)
herbivory.spe.w<-data.frame(herbivory.spe.w, row.names=1, check.names = F)

# Checking the agreement of species names in community and trait matrix  
rownames(herbivory.traits)==colnames(as.matrix(herbivory.spe.w))

## a. Herbivory and trait CWMs
# Computation of CWMs for quantitative traits; note that:
# x = trait data frame
# a = community matrix - this really must be a matrix, which can be converted from a data frame by as.matrix()
# Number of rows in x must be equal to number of columns in a
# Row names of x must be equal to column names of a
herbivory.cwms<-functcomp(x=herbivory.traits[,-c(1,2)], 
                          a=as.matrix(herbivory.spe.w))

# Computation of leaf biomass eaten and leaf biomass total per plot
lb.eaten<-aggregate(LeafBiomassEaten~Plot.ID, data=herbivory.spe, sum) %>% select(2)
lb.total<-aggregate(LeafBiomassTotal~Plot.ID, data=herbivory.spe, sum)%>% select(2)

# Computing community proportion of herbivory (Relative herbivory) and 
# saving it to the herb com dataframe together with CWMs
herb.com<-data.frame(lb.eaten/lb.total, herbivory.cwms)
names(herb.com)[1]<-"RelHerbivory"
  
# Exploration of the data
pairs.panels(herb.com, gap=0, cex.cor = 1.5, scale = T)
pairs.panels(herb.com %>% mutate(RelHerbivory=log(RelHerbivory)), gap=0, scale = T)

# Fitting a model based on stepwise selection
lm.0<-lm(log(RelHerbivory)~+1, data=herb.com)
form.full<-as.formula(herb.com)
lm.step<-step(lm.0, scope = form.full)
lm.step<-update(lm.step, .~.-LDMC)
summary(lm.step)
plot(lm.step)

## b. Herbivory and the proportions of functional groups
herbivory.spe.2<-left_join(herbivory.spe, herbivory.traits[,1:2])

# Computing Total leaf biomass sum per functional group and plot
biomass.FG<-herbivory.spe.2 %>% group_by(Plot.ID,FG) %>% summarize(sumBiomass=sum(LeafBiomassTotal)) 
# Conversion to wide format
biomass.FG<-biomass.FG %>% pivot_wider(names_from = FG, values_from = sumBiomass,
                           values_fill = 0)
# And back to long - to include zero values
biomass.FG<-biomass.FG %>% pivot_longer(cols=2:5, values_to = "sumBiomass")

# Extracting herbivory values from community-level data (cwms), 
# copying row names to a column for joining
herb.com.2<- data.frame(Plot.ID=as.numeric(row.names(herb.com)), RelHerbivory = herb.com$RelHerbivory)
# Join biomass FG and plot-level herbivory
herbivory.FG<-left_join(biomass.FG, herb.com.2)
names(herbivory.FG)[2]<-"FG"
herbivory.FG$Plot.ID<-as.factor(herbivory.FG$Plot.ID)


# Plotting herbivory vs. leaf biomass of functional groups 
library(ggpmisc) # this is needed for the stat_poly_eq function

ggplot(herbivory.FG, aes(x=sumBiomass, y=RelHerbivory, col=FG, fill=FG))+
  geom_point()+
  geom_smooth(, method="lm")+
  scale_x_sqrt()+
  scale_y_log10()+
  stat_poly_eq(
    aes(label = paste(..p.value.label.., sep = "~~~")),
    formula = y ~ x,
    parse = T, 
    geom = "text_npc",        # Use normalized coordinates
    npcx = 0.9, npcy = c(1,0.95, 0.9,0.85) ,  # Position above the plot (centered)
    hjust = 0.5)+
  xlab("Functional Group Biomass")+
  ylab("Relative herbivory")+
  theme_bw()


### Comparative analysis
# Fitting the generalizad estimating equations model (GEE) assuming
# binomial response and independent correlation structure within plots
gee.0<-geeglm(cbind(LeafBiomassEaten, LeafBiomassTotal-LeafBiomassEaten)~Species, id=Plot.ID, 
          data=herbivory.spe, 
           family="binomial", 
           corstr="independence")
summary(gee.0)
anova(gee.0)

# Fitted values from the GEE model
library(emmeans)
fit.v<-emmeans(gee.0, specs=~Species, type="response")

# Plotting of the fitted values with 95% CIs
plot(fit.v)+
  xlab("Proportion of leaf area consumed")+
  theme_classic()

herbivory.fit<-summary(fit.v) # Saving the fitted values in a data frame
herbivory.fit$prob # This is the fitted value of proportion of leaf area eaten

# Joining the fitted values of herbivory with traits - joining is done by the Species variable, 
# which is the only common to both data frames
herbivory.comparative<-left_join(herbivory.fit, herbivory.traits)
names(herbivory.comparative)[2]<-"Herbivory"

herbivory.comparative.2<-herbivory.comparative[, c(2, 8:17)]# Selecting of the columns for analysis

# Pair plot with untransformed values
pairs.panels(herbivory.comparative.2, scale=T, gap=0, cex.cor=1.5)

# Pair plot with log-transformed herbivory values
pairs.panels(herbivory.comparative.2 %>% mutate(Herbivory=log(Herbivory))
             , scale=T, gap=0, cex.cor=1.5)

# Pair plot with sqrt-transformed herbivory values
pairs.panels(herbivory.comparative.2 %>% mutate(Herbivory=sqrt(Herbivory))
             , scale=T, gap=0, cex.cor=1.5)

# Multiple regression with log-transformed herbivory values
lm.0<-lm(log(Herbivory)~+1, data=herbivory.comparative.2)
form.full<-as.formula(herbivory.comparative.2)
lm.step<-step(lm.0, scope = form.full)
summary(lm.step)
lm.step<-update(lm.step, .~.-N-LDMC)
summary(lm.step)
plot(lm.step)


# Multiple regression with sqrt-transformed herbivory values
lm.0<-lm(sqrt(Herbivory)~+1, data=herbivory.comparative.2)
form.full<-as.formula(herbivory.comparative.2)
lm.step<-step(lm.0, scope = form.full)
summary(lm.step)
lm.step<-update(lm.step, .~.-N-C)
summary(lm.step)
plot(lm.step)

### Correlation of herbivory with Functional groups
names(herbivory.comparative)
herbivory.comparative.3<-herbivory.comparative[,c(2, 7)]# Selection of relevant columns 

ggplot(herbivory.comparative.3, aes(x=FG, y=Herbivory))+geom_boxplot()+
# scale_y_log10()+
scale_y_sqrt()+
    theme_classic()

# One-way ANOVA of functional group effect
aov.1<-aov(sqrt(Herbivory)~FG, data=herbivory.comparative.3)
summary(aov.1)

TukeyHSD(aov.1) #Graminoids are significantly lower than the rest. 
#No significant differences between the other groups