Final Assignment Ananya Chatterjee, UCO: 459203 Solution 1 Probability Density Function: fX(x) = 1 σ √ 2π exp −(ln(x) − µ)2 2σ2 The likelihood function can be written as: L(µ|x) = n i=1 1 σ √ 2π exp −(ln(xi) − µ)2 2σ2 = 1 (σ22π) n 2 exp n i=1 −(ln(xi) − µ)2 2σ2 Log likelihood function is: l(µ|x) = − n i=1 −(ln(xi) − µ)2 2σ2 − nln2π 2 − nlnσ Score function: S(µ) = dl(µ|x) dt = n i=1 −(ln(xi) − nµ) σ2 Now we know that S(µ) = 0 for maximal likelihood estimate. Therefore, n i=1 −(ln(xi) − nµ) = 0 µ = n i=1 ln(xi) n Fisher Information: I(ˆµ) = − d2 l(µ|x) du2 = n σ2 x = c(4.856, 0.487, 0.580, 0.839, 0.721, 2.416, 0.715, 0.361, 0.703, 5.829) sigma = 1 n = length(x) loglikelihood = function(mu) { l = -((sum((log(x)-mu)^2))/(2*(sigma^2)))-((n*log(2*pi))/2)-(n*log(sigma)) return (l) } mu1 = sum(x)/n mu2 = seq(mu1-15,mu1+15,by=0.01) l_mu_x = matrix(0,length(mu2),1) #Log-likelihood over the range mu2 for(i in 1:length(mu2)) { l_mu_x[i] = loglikelihood(mu2[i]) } max_l = max(l_mu_x) #Scaled log-likelihood 1 scaled_l = l_mu_x - max_l plot(mu2,scaled_l,col="black",xlab="mu",ylab="scaled log-likelihood,quad approximation") #Quadratic Approximation quad_approx = function(fisher,mu_est,mu) { q = -(0.5*fisher*((mu-mu_est)^2)) return (q) } fisher = n/((sigma^2)) mu_est = (sum(log(x)))/n quad_approx_scaled_l = matrix(0,length(mu2),1) #Quadratic approximation over mu2 for(i in 1:length(mu2)) { quad_approx_scaled_l[i] = quad_approx(fisher,mu_est,mu2[i]) } lines(mu2,quad_approx_scaled_l,col="red") −10 −5 0 5 10 15 −1400−1000−600−200 mu scaledlog−likelihood,quadapproximation Solution 2 H0 : mean of skull length is equal to 177.568 mm H1 : mean of skull length is not equal to 177.568 mm 2 Wald Test Statistic Tw: Tw = (µh − µ0) I(µh) follows N(0, 1) µh = n i=1 xi n = mean(x) for N(µ, σ2 ) µ0 = 177.568 I(µh) = n σ2 mydata = read.table("one-sample-mean-skull-mf.txt",header = TRUE) skull_male=subset(mydata,sex=="m",select=skull.L) mu0 = 177.568 #Remove NA entrie(s) skull_male = skull_male[!is.na(skull_male)] n=length(skull_male) mu_hat = mean(skull_male) variance = var(skull_male) Tw = (mu_hat - mu0)*(sqrt(n/variance)) print(Tw) ## [1] 10.33382 #For N(0,1) print(qnorm(0.025)) ## [1] -1.959964 print(qnorm(0.975)) ## [1] 1.959964 #Critical region is (-infinity,qnorm(0.025))U(qnorm(0.975),infinity) = (-infinity,-1.96)U(1.96,infinity) #Tw=10.33382 belongs to it, we reject NULL Hypothesis Likelihood Ratio Test Statistics ULR ULR = −2 ∗ [l(µ0|x) − l(ˆµ|x)] sigma = sd(skull_male) loglikelihood = function(mu) { l = -((n/2)*(log(2*pi)))-((n/2)*(log(sigma^2)))-( (sum((skull_male-mu)^2))/(2*sigma^2) ) return (l) } #Log-likelihood at mu = mu0 loglikelihood_mu0 = loglikelihood(mu0) print(loglikelihood_mu0) ## [1] -762.6142 3 #Log-likelihood at mu = mu_hat loglikelihood_mu_hat = loglikelihood(mu_hat) print(loglikelihood_mu_hat) ## [1] -709.2203 ULR = -2*(loglikelihood_mu0 - loglikelihood_mu_hat) print(ULR) ## [1] 106.7877 #Chi square lower limit print(qchisq(0.95,1)) ## [1] 3.841459 #ULR=106.7877 belongs to (qchisq(0.95,1),infinity) = (3.841459,infinity) #We reject the NULL Hypothesis Wald Empirical Confidence Interval (dw, hw) = (ˆµ − µα/2 ∗ 1 I(ˆµ , ˆµ + µα/2 ∗ 1 I(ˆµ ) mu_alpha = qnorm(0.975) #Lower limit of confidence interval dw = mu_hat - (mu_alpha*(sqrt(variance/n))) #Upper limit of confidence interval hw = mu_hat + (mu_alpha*(sqrt(variance/n))) print(dw) ## [1] 181.1893 print(hw) ## [1] 182.8845 #Wald Empirical Confidence Interval is (dw,hw) = (181.1893,182.8845). Likelihood Ratio Confidence Interval CS1−α = µ : −2 ∗ [l(µ0|x) − l(ˆµ|x) − ψ1 2 (α)] < 0 mu = seq(180, 183, by = 0.01) mat = matrix(0,length(mu),1) for(i in 1:length(mu)) { mat[i] = -2 * (loglikelihood(mu[i]) - loglikelihood(mu_hat)) - qchisq(0.95,1) } 4 f1 = function(mu){ l = -2 * (loglikelihood(mu) - loglikelihood(mu_hat)) - qchisq(0.95,1) } #Compute lower limit of confidence interval LL = uniroot(f1,c(180, mu_hat))$root print(LL) ## [1] 181.1893 #Compute upper limit of confidence interval UL = uniroot(f1,c(mu_hat, 185))$root print(UL) ## [1] 182.8845 #Likelihood ratio confidence interval = (181.1893,182.8845) Solution 3 setwd("/home/ananya/ICT-workbench/Statistics/MV013_Final") library(ggplot2) library(png) #Read and Display PNG Image DisplayPNGImage = function(path, aspect){ require(’png’) img = readPNG(path, native=T) res = dim(img)[1:2] plot(1, 1, xlim=c(1,res[1]), ylim=c(1,res[2]), type=’n’, xaxs=’i’, yaxs=’i’, xaxt=’n’,yaxt=’n’,xlab=’’,y rasterImage(img, 1, 1, res[1], res[2]) } #Example of First Good Graph 5 1. Large amount of information represented in understandable form. 2. Eye catching colors with good contrast 3. Easy to understand #Example of Second Good Graph 1. Information given can be understood by all. 2. Eye catching colors with good contrast 3. Covers large amount of information in one picture. #Example of Third Good Graph 1. Use of soothing colors 2. Easy to understand and self explanatory. 3. Legend given is easy to understand. 6 #Example of First Bad Graph 1. Area Chart is not clear about population growth 2. Yearwise population growth could have been shown by drawing a seperate graph. #Example of Second Bad Graph 1. Legend provided is not clear on what it is measuring. 2. The colours used are not contrasting and creates confusion 7 #Example of Third Bad Graph 1. Seperate pie charts could have been drawn for income calculation 2. The colours used are not contrasting enough for clear differentiation Improvement of a Bad Example Let us the numbers from the Bad Graph 3 and draw the graph with improvements. library(grid) library(ggplot2) library(plotrix) library(gridExtra) require(’png’) image <- readPNG("South_Asia.png") mydata <- data.frame( Year = factor(c("1987","1999", "2011"), levels=c("1987","1999", "2011")), Population = c(1100, 1300, 1600) ) ggplot(data = mydata, aes(x = Year, y = Population)) + annotation_custom(rasterGrob(image, width = unit(1,"npc"), height = unit(1,"npc")), -Inf, Inf, -Inf, Inf) + geom_bar(stat = "identity", position = position_dodge(),width=0.3, fill="cyan", color="darkgreen") + xlab("Year") + ylab("Population in millions") + ggtitle("Population in South Asia") 8 0 500 1000 1500 1987 1999 2011 Year Populationinmillions Population in South Asia Pie Charts have been made for repesenting comparisons of Daily Population Income of three years slices <- c(1100, 1300, 1600) lbls <- c("1987","1999","2011") pct <- round(slices/sum(slices)*100) lbls <- paste(lbls, pct) lbls <- paste(lbls,"%",sep="") pie(slices,labels = lbls, col=rainbow(length(lbls)), main="Year Wise Population") 9 1987 28% 1999 32% 2011 40% Year Wise Population slices <- c(54, 84) lbls <- c("Under $1.25/Day", "Under $2/Day") lbls_1 <- paste(lbls, slices) lbls_1 <- paste(lbls_1,"%",sep="") p <- pie(slices, labels = lbls_1, col=rainbow(length(lbls)), main="Income in 1987") 10 Under $1.25/Day 54% Under $2/Day 84% Income in 1987 slices <- c(44, 77) lbls_2 <- paste(lbls, slices) lbls_2 <- paste(lbls_2,"%",sep="") p <- pie(slices, labels = lbls_2, col=rainbow(length(lbls)), main="Income in 1999") 11 Under $1.25/Day 44% Under $2/Day 77% Income in 1999 slices <- c(20, 65) lbls_3 <- paste(lbls, slices) lbls_3 <- paste(lbls_3,"%",sep="") p <- pie(slices, labels = lbls_3, col=rainbow(length(lbls)), main="Income in 2011") 12 Under $1.25/Day 20% Under $2/Day 65% Income in 2011 13