#(1)
library(dagitty)

# Define DAG
dag <- dagitty("dag {
  X1 -> Y
  X2 -> X1
  X2 -> Y
  U -> X2
  U -> Y
  V -> X1
}")
coordinates( dag ) <-
  list( x=c(X1=1, Y=2, X2=1, V=0, U=2),
        y=c(X1=1, Y=1, X2=0, V=1, U=0) )

plot(dag)

paths <- paths(dag, from = "X1", to = "Y")
paths


#(2)
df <- data.frame(
  Y1 = c(20, 30, 19, 17, 28, 22, 29, 22, 21, 21, 25, 24, 21, 25, 10, 35, 22, 31, 21, 33),
  Y0 = c(19, 28, 20, 14, 22, 22, 27, 23, 22, 19, 19, 24, 20, 22, 11, 23, 20, 30, 20, 25),
  D1 = c(1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1),
  D0 = c(1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1),
  Z = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
  X = c(1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1)
)


attach(df)

#(a)
mean(Y1 - Y0)

#(b)
D <- Z*D1 + (1-Z)*D0
mean(Y1[D==1] - Y0[D==1])

#(c)
mean(Y1[D==0] - Y0[D==0])

#(d)
mean(D1==1 & D0==1)
mean(D1==1 & D0==0)
mean(D1==0 & D0==1)
mean(D1==0 & D0==0)

#(e)
mean(Y1[D1==1 & D0==0] - Y0[D1==1 & D0==0])

#(f)
mean(Y1[D==1 & X==1] - Y0[D==1 & X==1])

#(g)
mean(Y1[D==1 & X==0] - Y0[D==1 & X==0])

#(h)
#does Y(1),Y(0) ind D hold?
hist(Y1[D==1])
hist(Y1[D==0])

hist(Y0[D==1])
hist(Y0[D==0])

#does Y(1),Y(0) ind D|X hold?
hist(Y1[D==1 & X==1])
hist(Y1[D==0 & X==1])

hist(Y0[D==1 & X==1])
hist(Y0[D==0 & X==1])
#Here you could eye-ball at the distributions or conduct some more rigorous
# statistical tests.
# The problem is that for n=20 the power of these tests is very small anyway.


#(i)
cor(D,Z)
#insturment appears weak
cor.test(D,Z)
#large uncertainty due to small sample size

#(j)
#no, the proportion of defiers > 0
mean(D1==0 & D0==1)

#(k)
hist(Y1-Y0)

#(l)
Y <- Y1*D + Y0*(1-D)
mean(Y[D==1]) - mean(Y[D==0])

#(m)
hatD <- predict(lm(D ~ Z ))
summary(lm(Y ~ hatD ))

hatD <- predict(lm(D ~ Z + X))
summary(lm(Y ~ hatD + X))

#(n)
diff_high <- mean(Y[D==1 & X==1]) - mean(Y[D==0 & X==1])
diff_low  <- mean(Y[D==1 & X==0]) - mean(Y[D==0 & X==0])
w_high <- sum(X==1 & D==1)/sum(D==1) 
w_low  <- sum(X==0 & D==1)/sum(D==1) 
diff_high*w_high + diff_low*w_low