################################# # Bayesian Networks (Bayes Nets) # Dr. Justin Esarey # March 10, 2014 ################################# # a standard analysis rm(list=ls()) set.seed(832980) N=1000 A<-runif(N, min=-2, max=2) B<-runif(N, min=-2, max=2) C<-0.5*A+0.5*B+0.4*runif(N) D<-(-1)*C+0.4*runif(N) dat<-data.frame(cbind(A,B,C,D)) summary(lm(A~B+C+D)) summary(lm(B~A+C+D)) summary(lm(C~A+B+D)) summary(lm(D~A+B+C)) library(bnlearn) bn.gs<-gs(x=dat, cluster = NULL, whitelist = NULL, blacklist = NULL, test = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE, undirected = FALSE) graphviz.plot(bn.gs) my.bn.par<-bn.fit(x=bn.gs, data=dat) my.bn.par #alternative fitting procedures bn.mmhc<-mmhc(x=dat, whitelist = NULL, blacklist = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE) graphviz.plot(bn.mmhc) bn.hc<-hc(x=dat, whitelist = NULL, blacklist = NULL, debug = FALSE, optimized = TRUE) graphviz.plot(bn.hc) # using whitelists and blacklists wl<-data.frame(from=c("A", "B"), to=c("C", "C")) bn.mmhc<-mmhc(x=dat, whitelist = wl, blacklist = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE) graphviz.plot(bn.mmhc) wl.2<-data.frame(from=c("A"), to=c("B")) bn.mmhc<-mmhc(x=dat, whitelist = wl.2, blacklist = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE) graphviz.plot(bn.mmhc) bl<-data.frame(from=c("B"), to=c("D")) bn.mmhc<-mmhc(x=dat, whitelist = NULL, blacklist = bl, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE) graphviz.plot(bn.mmhc) str<-paste("D>1") str2<-paste("A>1") cpquery(my.bn.par, eval(parse(text = str2)), eval(parse(text = str))) str2<-paste("A>-1") cpquery(my.bn.par, eval(parse(text = str2)), eval(parse(text = str))) str<-paste("D") str2<-paste("A>1") D.dist<-cpdist(my.bn.par, str, eval(parse(text=str2))) plot(density(D.dist$D)) str<-paste("D") str2<-paste("A<=1") D.dist<-cpdist(my.bn.par, str, eval(parse(text=str2))) lines(density(D.dist$D), lty=2) # observational network equivalence rm(list=ls()) set.seed(832980) N=1000 # A <- B <- C C<-runif(N, min=-2, max=2) B<-0.5*C + runif(N, min=-2, max=2) A<-0.5*B + runif(N, min=-2, max=2) dat <- data.frame(A,B,C) bn.gs<-gs(x=dat, cluster = NULL, whitelist = NULL, blacklist = NULL, test = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE, undirected = FALSE) graphviz.plot(bn.gs) # A -> B -> C rm(list=ls()) set.seed(832980) N=1000 A<-runif(N, min=-2, max=2) B<-0.5*A + runif(N, min=-2, max=2) C<-0.5*B + runif(N, min=-2, max=2) dat <- data.frame(A,B,C) bn.gs<-gs(x=dat, cluster = NULL, whitelist = NULL, blacklist = NULL, test = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE, undirected = FALSE) graphviz.plot(bn.gs) # A <- B -> C rm(list=ls()) set.seed(832980) N=1000 B<-runif(N, min=-2, max=2) A<-0.5*B + runif(N, min=-2, max=2) C<-0.5*B + runif(N, min=-2, max=2) dat <- data.frame(A,B,C) bn.gs<-gs(x=dat, cluster = NULL, whitelist = NULL, blacklist = NULL, test = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE, undirected = FALSE) graphviz.plot(bn.gs) # a data set with endogeneity # adapted from code by Ahra Wu rm(list=ls()) set.seed(832980) N=1000 # assigning the coefficients: you can let them vary by changing inside the bracket to "j" b.dv<-1 # default is 1, no endogeneity if =0 b.iv<-(-2) # default is -1 b.i<-1 IN<-runif(N, min=-5, max=5) IV<-c() DV<-c() # Data generating process of variable C for(i in 1:N){ mat<-matrix(data=c(1, -b.dv, -b.iv, 1), nrow=2, ncol=2, byrow=T) YY<-qr.solve(mat, b=c(b.i*IN[i]+rnorm(1, mean=0, sd=2),rnorm(1, mean=0, sd=2))) IV[i]<-YY[1] DV[i]<-YY[2] } dat<-as.data.frame(cbind(DV,IV,IN)) library(sem) summary(tsls(DV~IV, ~IN, data=dat)) library(bnlearn) bn.gs<-gs(x=dat, cluster = NULL, whitelist = NULL, blacklist = NULL, test = NULL, alpha = 0.05, B = NULL, debug = FALSE, optimized = TRUE, strict = FALSE, undirected = FALSE) graphviz.plot(bn.gs)