############# ## Cviceni 1 ############# titanic <- read.table("titanic.txt", header=TRUE) dim(titanic) summary(titanic) table(titanic$Survived, titanic$PClass) prop.table(table(is.na(titanic$Age), titanic$PClass), margin=2) prop.table(table(is.na(titanic$Age), titanic$Survived), margin=2) titanic.full <- na.omit(titanic) dim(titanic.full) ############# ## Cviceni 2 ############# barplot(prop.table(table(titanic$Survived, titanic$Sex), margin=2), main="Survival by gender") barplot(prop.table(table(titanic$Survived, titanic$PClass), margin=2), main="Survival by class") boxplot(titanic$Age~titanic$Survived) plot(jitter(titanic$Survived[titanic$Sex=="female"])~jitter(titanic$Age[titanic$Sex=="female"]), col="firebrick") points(jitter(titanic$Survived[titanic$Sex=="male"])~jitter(titanic$Age[titanic$Sex=="male"]), col="navyblue") plot(jitter(titanic$Survived[titanic$PClass=="1st" & titanic$Sex=="female"])~jitter(titanic$Age[titanic$PClass=="1st" & titanic$Sex=="female"]), bg="violet", pch=21, xlab="Age", ylab="Survived", main="Survival by age, gender and class", ylim=c(-0.3, 1.3), xlim=c(-0.5, 71.5)) points(jitter(titanic$Survived[titanic$PClass=="1st" & titanic$Sex=="male"])~jitter(titanic$Age[titanic$PClass=="1st" & titanic$Sex=="male"]), bg="violet", pch=22) points(jitter(titanic$Survived[titanic$PClass=="2nd" & titanic$Sex=="female"])~jitter(titanic$Age[titanic$PClass=="2nd" & titanic$Sex=="female"]), bg="orange", pch=21) points(jitter(titanic$Survived[titanic$PClass=="2nd" & titanic$Sex=="male"])~jitter(titanic$Age[titanic$PClass=="2nd" & titanic$Sex=="male"]), bg="orange", pch=22) points(jitter(titanic$Survived[titanic$PClass=="3rd" & titanic$Sex=="female"])~jitter(titanic$Age[titanic$PClass=="3rd" & titanic$Sex=="female"]), bg="springgreen", pch=21) points(jitter(titanic$Survived[titanic$PClass=="3rd" & titanic$Sex=="male"])~jitter(titanic$Age[titanic$PClass=="3rd" & titanic$Sex=="male"]), bg="springgreen", pch=22) levels(factor(titanic$Age)) titanic$Age.desc <- titanic$Age titanic$Age.desc[titanic$Age<=1.5] <- 1 titanic$Age.desc[titanic$Age>65] <- 66 titanic$Age.desc <- factor(titanic$Age.desc) levels(titanic$Age.desc) survived.byage <- tapply(titanic$Survived, titanic$Age.desc, sum) all.byage <- tapply(rep(1, nrow(titanic)), titanic$Age.desc, sum) summary(titanic$Age.desc) sum(all.byage) dim(titanic.full) plot(c(1:66), survived.byage/all.byage) ############# ## Cviceni 3 ############# model.0 <- glm(Survived ~ Sex + PClass + Age, family="binomial", data=titanic) summary(model.0) model.1 <- glm(Survived ~ Sex*PClass + Age, family="binomial", data=titanic) summary(model.1) model.2 <- glm(Survived ~ Sex*PClass + poly(Age, 2), family="binomial", data=titanic.full) summary(model.2) anova(model.0, model.1, test="Chisq") anova(model.1, model.2, test="Chisq") ############# ## Cviceni 4 ############# xx <- seq(0, 72, length=101) lines(xx, predict(model.1, type="response", newdata=data.frame(Sex="female", PClass="1st", Age=xx)), col="violet") lines(xx, predict(model.1, type="response", newdata=data.frame(Sex="male", PClass="1st", Age=xx)), col="violet", lty=2) lines(xx, predict(model.1, type="response", newdata=data.frame(Sex="female", PClass="2nd", Age=xx)), col="orange") lines(xx, predict(model.1, type="response", newdata=data.frame(Sex="male", PClass="2nd", Age=xx)), col="orange", lty=2) lines(xx, predict(model.1, type="response", newdata=data.frame(Sex="female", PClass="3rd", Age=xx)), col="springgreen") lines(xx, predict(model.1, type="response", newdata=data.frame(Sex="male", PClass="3rd", Age=xx)), col="springgreen", lty=2) 1/exp(coef(model.0)) exp(coef(model.1)) 1/exp(coef(model.1)) confint(model.1) exp(confint(model.1)) 1/exp(confint(model.1)) ############# ## Cviceni 5 ############# sum(abs( residuals(model.1, type="response") - ( titanic.full$Survived - predict(model.1, type="response") ) ) ) plot(residuals(model.1) ~ predict(model.1, type="response")) plot(residuals(model.1) ~ titanic.full$Age) lines(lowess(residuals(model.1) ~ titanic.full$Age)) model.3 <- glm(Survived ~ Sex*PClass + Age + I(as.numeric(Age<18)), family="binomial", data=titanic) summary(model.3) summary(model.1) plot(residuals(model.3) ~ titanic.full$Age) lines(lowess(residuals(model.3) ~ titanic.full$Age)) plot(residuals(model.3, type="response") ~ titanic.full$Age) lines(lowess(residuals(model.3, type="response") ~ titanic.full$Age)) anova(model.1, model.3, test="Chisq") plot(model.1)