### BOOTSTRAP Imaginemos que mi muestra original es: x<-c(10, 15, 25, 37, 48, 23, 44, 19, 32, 20) set.seed(30) #para poder reproducir el mismo resultado indices<-sample(1:10, replace=T) indices [1] 1 5 4 5 4 2 9 3 10 2 x.asterisco<-x[indices] x.asterisco [1] 10 48 37 48 37 15 32 25 20 15 # EXAMPLE # GENERATE 10 values de una N(0,1), population mean=0, variance =1, we use the sample mean as an estimator of \mu, the variance of the sample mean is sigma^2/10= 0.1 > sqrt(0.1) # true standard error of the sample mean [1] 0.3162278 Si genero 100 valores: 1/100=0.01 sqrt(0.01) 0.1 # valor verdadero set.seed(10) n<-10 muestra.original<-rnorm(n) muestra.original > muestra.original [1] 0.01874617 -0.18425254 -1.37133055 -0.59916772 0.29454513 0.38979430 [7] -1.20807618 -0.36367602 -1.62667268 -0.25647839 #Con resultados teoricos: sqrt(var(muestra.original)/n) #standard error of the mean when using a particular sample [1] 0.2213258 # con una muestra de tamańo 10 # NOW BOOTSTRAP APPROXIMATION set.seed(10) n<-10 muestra.original<-rnorm(n) B<-200 muestras.bootstrap<-matrix(0,B,n) estadistico.boot<-array(0,B) i<-1 while (i < (B+1) ){ muestras.bootstrap[i,]<-sample(muestra.original,replace=T) estadistico.boot[i]<-mean(muestras.bootstrap[i,]) i<-i+1} error.estandar<-sd(estadistico.boot) error.estandar > error.estandar [1] 0.2059542 > error.estandar [1] 0.1019820 #con una muestra de tamańo 100 y B=100 replicaciones bootstrap > error.estandar [1] 0.09722201 #con una muestra de tamańo 100 y B=200 replicaciones bootstrap # Bootstrap estimate of BIAS #Graphically hist(estadistico.boot) abline(v=mean(muestra.original), col=2, lwd=2) #Numerically sesgo=mean(estadistico.boot)-mean(muestra.original) > sesgo=mean(estadistico.boot)-mean(muestra.original) > sesgo [1] -0.02666764 #### EXAMPLE CONFIDENCE INTERVALS FOR THE MEAN Times=c(12,2,6,2,19,5,34,4,1,4,8,7,1,21,6,11,8,28,6, 4,5,1,18,9,5,1,21,1,1,5,3,14,5,3,4,5,1,3,16,2) hist(Times,prob=T) #let check the "shape" of the interarrival times mean(Times) sd(Times) lamb<-1/mean(Times) # let us superimpose an exponential distribution x<-seq(0,35,length=800) f<-lamb*exp(-lamb*x) lines(x,f,lwd=2, col=2) library(boot) times.fun <- function(data, i) { media <- mean(data[i]) # compute the mean of each bootstrap sample n <- length(i) v <- (n-1)*var(data[i])/n^2 # compute the variance of the sample mean # it is needed only for the t-bootstrap CI c(media, v) } B<-9999 set.seed(10) b.obj<-boot(Times, times.fun, R=B) plot(b.obj) boot.ci(b.obj) # Exact confidence interval lower<-2*length(Times)*mean(Times)/qchisq( 0.975, 2*length(Times)) upper<-2*length(Times)*mean(Times)/qchisq( 0.025, 2*length(Times)) intervalo<-round(c(lower,upper),3) intervalo # To calculate confidence intervals for the standard deviation: Times.sd <- function(data,i) { d <- data[i] SD <- sqrt(var(d)) SD } set.seed(13) B <- 9999 b.obj <- boot(Times,Times.sd,R=B) > b.obj ORDINARY NONPARAMETRIC BOOTSTRAP Call: boot(data = Times, statistic = Times.sd, R = B) Bootstrap Statistics : original bias std. error t1* 7.871402 -0.2200949 1.281300 plot(b.obj) boot.ci(b.obj, type=c("norm", "basic", "perc","bca")) BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS Based on 9999 bootstrap replicates CALL : boot.ci(boot.out = b.obj, type = c("norm", "basic", "perc", "bca")) Intervals : Level Normal Basic 95% ( 5.580, 10.603 ) ( 5.702, 10.659 ) Level Percentile BCa 95% ( 5.084, 10.041 ) ( 5.805, 10.891 ) Calculations and Intervals on Original Scale