# GLM # priklad Potemnik skladistni rm(list=ls()) load("beetle.RData") beetle attach(beetle) proportions <- killed/population # Logisticka regrese # Vykresleni dat plot(dose,proportions,pch=20,xlab="CS2",ylab="umrtnost") # definice GLM modelu glmmod <- glm(formula = cbind(killed,population - killed) ~ dose, family = binomial(logit),data = beetle) glmmod <- glm(formula = proportions ~ dose, family = binomial(logit),weights=population,data = beetle) # alternativa summary(glmmod) # vykresleni regresni krivky t <- seq(from=min(dose),to=max(dose),length.out=100) lines(t,predict.glm(glmmod,data.frame(dose=t),type="response"),col="red") # overeni normality residui plot(glmmod,which=2) shapiro.test(residuals(glmmod)) # Probitovy model # Vykresleni dat plot(dose,proportions,pch=20,xlab="CS2",ylab="umrtnost") # definice GLM modelu glmmod <- glm(formula = cbind(killed, population - killed) ~ dose, family = binomial(probit),data = beetle) summary(glmmod) # vykresleni regresni krivky t <- seq(from=min(dose),to=max(dose),length.out=100) lines(t,predict.glm(glmmod,data.frame(dose=t),type="response"),col="red") # overeni normality residui plot(glmmod,which=2) shapiro.test(residuals(glmmod)) # komplementarni log-log # Vykresleni dat plot(dose,proportions,pch=20,xlab="CS2",ylab="umrtnost") # definice GLM modelu glmmod <- glm(formula = cbind(killed, population - killed) ~ dose, family = binomial(link="cloglog"),data = beetle) summary(glmmod) # vykresleni regresni krivky t <- seq(from=min(dose),to=max(dose),length.out=100) lines(t,predict.glm(glmmod,data.frame(dose=t),type="response"),col="red") # overeni normality residui plot(glmmod,which=2) shapiro.test(residuals(glmmod)) detach(beetle) #-------------------------------------------------------------------------------- # priklad AIDS ve VB rm(list=ls()) load("aids.RData") head(aids) attach(aids) # Poissonovska regrese time <- 1:36 # vykresleni dat plot(time,number,type="p",pch=20,ylab="number of AIDS", xlab="time",main="Poisson regression") # definice modelu ind <- which(number!=0) num <- number[ind] tim <- time[ind] glm1 <- glm(num ~ tim, family=poisson("identity"),start = c(1,1)) #glm1 <- lm(number ~ time) summary(glm1) glm2 <- glm(number ~ time, family=poisson("log")) summary(glm2) glm3 <- glm(number ~ time, family=poisson("sqrt")) summary(glm3) # vykresleni regresnich krivek #points(time,fitted(glm1),col=2) xx<-seq(0,37,length.out=100) yA.logit<-predict(glm1,list(tim=xx),type="response") lines(xx,yA.logit,col=2,lwd=2) yB.logit<-predict(glm2,list(time=xx),type="response") lines(xx,yB.logit,col=3,lwd=2) yC.logit<-predict(glm3,list(time=xx),type="response") lines(xx,yC.logit,col=4,lwd=2) legend("topleft",col=c(2,3,4),lty=c(1,1,1),lwd=c(2,2,2),legend=c("linear","log-linear","square-root")) # overeni normality residui plot(glm1,which=2) shapiro.test(residuals(glm1)) plot(glm2,which=2) shapiro.test(residuals(glm2)) plot(glm3,which=2) shapiro.test(residuals(glm3)) # x<-residuals(glm3) # library(nortest) # shapiro.test(x) # ad.test(x) # cvm.test(x) # pearson.test(x) # sf.test(x) # lillie.test(x) detach(aids) # --------------------------------------------------------------------------- # priklad vcely rm(list=ls()) load("bees.RData") head(bees) attach(bees) # vykresleni dat plot(time,number, cex = 0.75, main="Bees activity") # Poissonovska regrese m1<-glm(number~ time + I(time^2) ,data=bees,family=poisson) summary(m1) # overeni normality residui plot(m1,which=2) shapiro.test(residuals(m1)) # jaky bude stupen polynomu? # mod.opt<-step(m1,scope = list(upper = ~ time+I(time^2)+I(time^3)+I(time^4)+I(time^5)+I(time^6), lower = ~ 1)) # summary(mod.opt) # m1 <- mod.opt # anova(mod.opt,m1,test="Chisq") # vykresleni modelu plot(time,number, cex = 0.75, main="Bees activity") ab<-range(time) xx<-seq(ab[1],ab[2],length.out=100) # jeste vykreslime i interval spolehlivosti predicted.log <- predict(m1,list(time=xx),type = "link", se.fit = T) CI.L.log <- exp(predicted.log$fit - 1.96 * predicted.log$se.fit) CI.H.log <- exp(predicted.log$fit + 1.96 * predicted.log$se.fit) x<-c(xx,rev(xx)) y<-c(CI.L.log,rev(CI.H.log)) polygon(x,y,col="gray75",border="gray75") yy<-predict(m1,list(time=xx),type="response") lines(xx,yy,col="red") m1a <- update(m1,family=quasipoisson) # jeste vykreslime i interval spolehlivosti predicted.log <- predict(m1a,list(time=xx),type = "link", se.fit = T) CI.L.log <- exp(predicted.log$fit - 1.96 * predicted.log$se.fit) CI.H.log <- exp(predicted.log$fit + 1.96 * predicted.log$se.fit) x<-c(xx,rev(xx)) y<-c(CI.L.log,rev(CI.H.log)) polygon(x,y,col="gray75",border="gray75") yy<-predict(m1a,list(time=xx),type="response") lines(xx,yy,col="red") detach(bees) #----------------------------------------------------------------------- # priklad melanoma rm(list=ls()) load("melanoma.RData") melanoma attach(melanoma) # Vykresleni dat - tzv. mosaic plot plot(xtabs(Count ~ Type + Site),main="Mosaic plot") # "xtabs" vytvori kontingencni tabulku # da se testovat i klasickym Pearsonem # chisq.test(xtabs(Count ~ Type + Site)) # model pro nezavisla data m1 <- glm(Count ~ Type + Site, family=poisson, data=melanoma) # model pro zavisla m2 <- glm(Count ~ Type * Site, family=poisson, data=melanoma) # stejne jako Pearson #sum(residuals(m1, type = "pearson")^2) # uzitecna tabulka pro male kontingencni tabulky round(xtabs(residuals(m1) ~ Type + Site), 1) anova(m1, m2, test = "Chisq") # graficke porovnani plot(xtabs(fitted(m1) ~ Type + Site, data=melanoma),main="Independent data") plot(xtabs(fitted(m2) ~ Type + Site, data=melanoma),main="Full model") detach(melanoma) # ---------------------------------------------------------------------------