Getting started

The following examples provide simple illustrations of how to perform data analyses using additive Bayesian networks with abn (for installation instructions see here). The data sets used here are provided with abn. Many more examples are given at the foot of the relevant manual pages in R, e.g. see ?fitabn, ?buildscorecache, ?mostprobable, ?search.hillclimber. More realistic examples are given in case studies


  1. Fit an additive Bayesian network to data

  2. Examine the parameter estimates in additive Bayesian network

  3. Find the best fitting graphical structure for an additive Bayesian network using an exact search

  4. Find the best fitting graphical structure for an additive Bayesian network using a heuristic search


1. Fit an additive Bayesian network to data


mydat<-ex0.dag.data[,c("b1","b2","b3","g1","b4","p2","p4")];## take a subset of cols from dataset ex0.dat.data ## setup distribution list for each node mydists<-list(b1="binomial", b2="binomial", b3="binomial", g1="gaussian", b4="binomial", p2="poisson", p4="poisson" ); ## define model mydag<-matrix(data=c( 0,0,1,0,0,0,0, # b1<-b3 1,0,0,0,0,0,0, # b2<-b1 0,0,0,0,0,0,0, # 0,0,0,0,1,0,0, # g1<-b4 0,0,0,0,0,0,0, # 0,0,0,0,0,0,0, # 0,0,0,0,0,0,0 # ), byrow=TRUE,ncol=7); colnames(mydag)<-rownames(mydag)<-names(mydat); ## now fit the model to calculate its goodness of fit myres.c<-fitabn(dag.m=mydag,data.df=mydat,data.dists=mydists); print(myres.c$mlik);## log marginal likelihood goodness of fit


Examine the parameter estimates in additive Bayesian network


mydat<-ex0.dag.data[,c("b1","b2","b3","g1","b4","p2","p4")];## take a subset of cols from dataset ex0.dat.data ## setup distribution list for each node mydists<-list(b1="binomial", b2="binomial", b3="binomial", g1="gaussian", b4="binomial", p2="poisson", p4="poisson" ); ## define model mydag<-matrix(data=c( 0,0,1,0,0,0,0, # b1<-b3 1,0,0,0,0,0,0, # b2<-b1 0,0,0,0,0,0,0, # 0,0,0,0,1,0,0, # g1<-b4 0,0,0,0,0,0,0, # 0,0,0,0,0,0,0, # 0,0,0,0,0,0,0 # ), byrow=TRUE,ncol=7); colnames(mydag)<-rownames(mydag)<-names(mydat); ## now fit the model to calculate its goodness of fit myres.c<-fitabn(dag.m=mydag,data.df=mydat,data.dists=mydists,compute.fixed=TRUE); print(names(myres.c$marginals));# a list of all the posterior densities ##plot some of the marginal posterior densities - note by default all variables are standarized. par(mfrow=c(1,2)); plot(myres.c$marginals$b1[["b1|(Intercept)"]],type="b",xlab="b1|(Intercept)", main="Node b1, Intercept",pch="+",col="green"); plot(myres.c$marginals$g1[["g1|b4"]],type="b",xlab="g1|b4",main="Node g1, parameter b4",pch="+",col="orange");

Some marginal posterior densities

Marginal posterior densities


3. Find the best fitting graphical structure for an additive Bayesian network using an exact search


mydat<-ex1.dag.data;## this data comes with abn see ?ex1.dag.data ## setup distribution list for each node mydists<-list(b1="binomial", p1="poisson", g1="gaussian", b2="binomial", p2="poisson", b3="binomial", g2="gaussian", b4="binomial", b5="binomial", g3="gaussian" ); #use simple banlist with no constraints ban<-matrix(c( # 1 2 3 4 5 6 0,0,0,0,0,0,0,0,0,0, # 1 0,0,0,0,0,0,0,0,0,0, # 2 0,0,0,0,0,0,0,0,0,0, # 3 0,0,0,0,0,0,0,0,0,0, # 4 0,0,0,0,0,0,0,0,0,0, # 5 0,0,0,0,0,0,0,0,0,0, # 6 0,0,0,0,0,0,0,0,0,0, # 7 0,0,0,0,0,0,0,0,0,0, # 8 0,0,0,0,0,0,0,0,0,0, # 9 0,0,0,0,0,0,0,0,0,0 # 10 ),byrow=TRUE,ncol=10); colnames(ban)<-rownames(ban)<-names(mydat); #names must be set retain<-matrix(c( # 1 2 3 4 5 6 0,0,0,0,0,0,0,0,0,0, # 1 0,0,0,0,0,0,0,0,0,0, # 2 0,0,0,0,0,0,0,0,0,0, # 3 0,0,0,0,0,0,0,0,0,0, # 4 0,0,0,0,0,0,0,0,0,0, # 5 0,0,0,0,0,0,0,0,0,0, # 6 0,0,0,0,0,0,0,0,0,0, # 7 0,0,0,0,0,0,0,0,0,0, # 8 0,0,0,0,0,0,0,0,0,0, # 9 0,0,0,0,0,0,0,0,0,0 # 10 ),byrow=TRUE,ncol=10); colnames(retain)<-rownames(retain)<-names(mydat); #names must be set ## parent limits max.par<-list("b1"=4,"p1"=4,"g1"=4,"b2"=4,"p2"=4,"b3"=4,"g2"=4,"b4"=4,"b5"=4,"g3"=4); ## now build cache mycache<-buildscorecache(data.df=mydat,data.dists=mydists, dag.banned=ban, dag.retained=retain,max.parents=max.par); #now find the globally best DAG mp.dag<-mostprobable(score.cache=mycache); fitabn(dag.m=mp.dag,data.df=mydat,data.dists=mydists)$mlik; ## plot the best model - requires Rgraphviz myres<-fitabn(dag.m=mp.dag,data.df=mydat,data.dists=mydists,create.graph=TRUE); plot(myres$graph);

Best DAG using exact search

Best DAG using exact search


4. Find the best fitting graphical structure for an additive Bayesian network using a heuristic search


mydat<-ex1.dag.data;## this data comes with abn see ?ex1.dag.data ## setup distribution list for each node mydists<-list(b1="binomial", p1="poisson", g1="gaussian", b2="binomial", p2="poisson", b3="binomial", g2="gaussian", b4="binomial", b5="binomial", g3="gaussian" ); #use simple banlist with no constraints ban<-matrix(c( # 1 2 3 4 5 6 0,0,0,0,0,0,0,0,0,0, # 1 0,0,0,0,0,0,0,0,0,0, # 2 0,0,0,0,0,0,0,0,0,0, # 3 0,0,0,0,0,0,0,0,0,0, # 4 0,0,0,0,0,0,0,0,0,0, # 5 0,0,0,0,0,0,0,0,0,0, # 6 0,0,0,0,0,0,0,0,0,0, # 7 0,0,0,0,0,0,0,0,0,0, # 8 0,0,0,0,0,0,0,0,0,0, # 9 0,0,0,0,0,0,0,0,0,0 # 10 ),byrow=TRUE,ncol=10); colnames(ban)<-rownames(ban)<-names(mydat); #names must be set retain<-matrix(c( # 1 2 3 4 5 6 0,0,0,0,0,0,0,0,0,0, # 1 0,0,0,0,0,0,0,0,0,0, # 2 0,0,0,0,0,0,0,0,0,0, # 3 0,0,0,0,0,0,0,0,0,0, # 4 0,0,0,0,0,0,0,0,0,0, # 5 0,0,0,0,0,0,0,0,0,0, # 6 0,0,0,0,0,0,0,0,0,0, # 7 0,0,0,0,0,0,0,0,0,0, # 8 0,0,0,0,0,0,0,0,0,0, # 9 0,0,0,0,0,0,0,0,0,0 # 10 ),byrow=TRUE,ncol=10); colnames(retain)<-rownames(retain)<-names(mydat); #names must be set ## not run because may take some minutes for buildscorecache() ## parent limits max.par<-list("b1"=4,"p1"=4,"g1"=4,"b2"=4,"p2"=4,"b3"=4,"g2"=4,"b4"=4,"b5"=4,"g3"=4); ## now build cache mycache<-buildscorecache(data.df=mydat,data.dists=mydists, dag.banned=ban, dag.retained=retain,max.parents=max.par); # repeat but this time have the majority consensus network plotted as the searches progress heur.res2<-search.hillclimber(score.cache=mycache,num.searches=1000,seed=0,verbose=FALSE, trace=TRUE,timing.on=FALSE); ## for publication quality output for the consensus network use graphviz direct tographviz(dag.m=heur.res$consensus,data.df=mydat,data.dists=mydists,outfile="graphcon.dot"); ## and then process using graphviz tools e.g. on linux #system("dot -Tpdf -o graphcon.pdf graphcon.dot"); #system("evince graphcon.pdf"); ## note the .dot file created can be easily edited manually to provide custom shapes, colours etc.

Majority consensus DAG from 1000 heuristic searches

Majority consensus DAG from 1000 heuristic searches

Recent Posts