Hello BUGS list:
I wrote to the list about a month ago after observing that models with the inprod function run slowly. I received a number of responses and suggestions, and I would like to summarize in this email. At the end of the email, I am including some R code to demonstrate the responses I received.
A lot of people have noticed this in their own code and it has been reported various places. The most common response was to simply write out code for the matrix multiplication instead of using the inprod function. This is illustrated in "model 2" below. Using this method on my small test data set was approximately 18X faster than using inprod, but means that you have to write out the matrix multiplication which could be tedious and error-prone for large models.
A very easy suggestion (thanks to Michael Sweeting) was to use the inprod2 function (see model 3 below), which is 3.5X faster than inprod for this test dataset on my computer. From a coding standpoint, this is definitely the most efficient solution, but it does run 5X slower than the "brute force method" (model 2). If you are running a lot of iterations or a big model that takes a long time, this might not be the best.
Kris Jamsen, along with a number of other people suggested using the paste function in R to simplify writing out the "brute force" code, as demonstrated with model 5 below. This was a very common response to my email, and it appears most people are either doing this or simply writing out the code themselves. Using the paste function has the benefit of creating code that runs at equivalent speed to model 2. It takes a bit more thought than writing out the code yourself, but is probably less prone to errors from typing mistakes and is probably faster to code if you have a large model.
Finally, Wei Chen suggested a rather ingenious alteration to the Winbugs code to get around using the inprod function by creating a temporary variable to do the multiplication, then using the sum function. This is illustrated in model 4 (below). The code runs nearly as fast as the "brute force" code (2X slower than model 2), and requires very little extra coding. This is an extremely efficient solution that I think represents a good compromise between efficient coding and efficient computation.
Thanks to everyone who responded to my email; I hope this summary will be useful for people.
Sincerely,
Kenneth Elgersma
###################################################################
## R code illustrating different solutions to the slow computation of models with the inprod function ##
## all 5 models below are equivalent (give the same results) ##
###################################################################
library(R2WinBUGS)
#X1 is a matrix with samples as rows, species presence/absence as columns
X1 <- matrix(c(
0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,
0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,1,1,1,
1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,
0,0,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1,
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,
0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,1,0,0,0,
0,0,0,0,0,0,0,0,0), nrow=99, ncol=6)
#Y is the response variable
Y <- c(9.8930187,-1.490307,-0.4936855,0.5393929,-1.57666,-0.728428,-1.5674914,
0.7069833,-2.8345468,-0.6530553,-0.747831,-4.0941493,-3.2709876,1.4481509,
-1.6980442,0.7288624,-4.4118578,-1.1038497,-4.238852,2.1894815,3.3573135,
-0.6171794,-2.7569383,-0.2444025,-3.5896847,-3.0592437,-2.240281,-0.9253375,
-3.0509816,0.7698062,-3.0803311,-5.8194451,-3.1037302,2.489518,-1.0438584,
-3.6437553,-2.2660199,-4.6090409,-4.4674625,-2.1653731,2.6709991,-2.9697435,
-1.3261689,-3.3756437,-3.9558114,1.2298576,8.3432455,0.8881382,-3.8493501,
1.8933769,-1.5663625,-3.0006472,-0.1109214,5.8355305,15.1267489,-1.2012711,
7.2557701,1.0296247,0.8402131,1.1968549,3.9400009,1.4664672,1.7834119,
-2.7578203,-0.7995056,0.6273709,-1.3193063,-0.6902929,-2.0584048,-0.2920117,
5.9116684,-3.3724693,0.2567529,-2.0187592,-0.3093973,1.093452,6.2728027,
16.8341751,0.3946647,-2.4653587,1.7957032,0.2632702,0.3318346,-2.1720797,
3.7329973,1.9678246,1.0105771,1.158189,0.9274523,0.3195322,-2.7850753,4.0820113,
-0.7489046,-1.5531917,-2.7992032,0.7910987,-0.7379023,1.2350259,-0.8007561)
N <- length(Y) #N is the number of samples
#model 1 uses inprod function
model1 <- function() {
#data model
for (i in 1:N){
Y[i] ~ dnorm(mu[i], tau)
mu[i] <- beta0 + inprod(beta1[], X1[i,])
}
#priors
beta0 ~ dnorm(0,1.0E-6)
beta1[1] <- 0
for (i in 2:6){beta1[i] ~dnorm(0,tau1)}
tau ~ dgamma(0.001,0.001)
tau1 ~ dgamma(0.001,0.001)
sigma <- 1/sqrt(tau)
sigma1 <- 1/sqrt(tau1)
}
filename <- file.path(tempdir(), "model.bug")
write.model(model1, filename)
data <- list("X1", "N", "Y")
parameters <- c("beta0", "beta1", "sigma", "sigma1")
inits <- list(
list(beta1=c(NA,rep(0,times=5)), tau1=1, beta0=0, tau=1),
list(beta1=c(NA,rep(3,times=5)), tau1=3, beta0=3, tau=3)
)
sims <- bugs(data,inits,parameters,filename,n.chains=2, n.iter=50000, n.burnin=1000,n.thin=5, debug=T)
#model 1 updates took 106 seconds
#model 2 uses "brute force" coding
model2 <- function() {
#data model
for (i in 1:N){
Y[i] ~ dnorm(mu[i], tau)
mu[i] <- beta0 +
beta1[1]*X1[i,1] +
beta1[2]*X1[i,2] +
beta1[3]*X1[i,3] +
beta1[4]*X1[i,4] +
beta1[5]*X1[i,5] +
beta1[6]*X1[i,6]
}
#priors
beta0 ~ dnorm(0,1.0E-6)
beta1[1] <- 0
for (i in 2:6){beta1[i] ~dnorm(0,tau1)}
tau ~ dgamma(0.001,0.001)
tau1 ~ dgamma(0.001,0.001)
sigma <- 1/sqrt(tau)
sigma1 <- 1/sqrt(tau1)
}
filename <- file.path(tempdir(), "model.bug")
write.model(model2, filename)
data <- list("X1", "N", "Y")
parameters <- c("beta0", "beta1", "sigma", "sigma1")
inits <- list(
list(beta1=c(NA,rep(0,times=5)), tau1=1, beta0=0, tau=1),
list(beta1=c(NA,rep(3,times=5)), tau1=3, beta0=3, tau=3)
)
sims <- bugs(data,inits,parameters,filename,n.chains=2, n.iter=50000, n.burnin=1000,n.thin=5, debug=T)
#model 2 updates took 6 seconds
#model 3 uses inprod2 function
model3 <- function() {
#data model
for (i in 1:N){
Y[i] ~ dnorm(mu[i], tau)
mu[i] <- beta0 + inprod2(beta1[], X1[i,])
}
#priors
beta0 ~ dnorm(0,1.0E-6)
beta1[1] <- 0
for (i in 2:6){beta1[i] ~dnorm(0,tau1)}
tau ~ dgamma(0.001,0.001)
tau1 ~ dgamma(0.001,0.001)
sigma <- 1/sqrt(tau)
sigma1 <- 1/sqrt(tau1)
}
filename <- file.path(tempdir(), "model.bug")
write.model(model3, filename)
data <- list("X1", "N", "Y")
parameters <- c("beta0", "beta1", "sigma", "sigma1")
inits <- list(
list(beta1=c(NA,rep(0,times=5)), tau1=1, beta0=0, tau=1),
list(beta1=c(NA,rep(3,times=5)), tau1=3, beta0=3, tau=3)
)
sims <- bugs(data,inits,parameters,filename,n.chains=2, n.iter=50000, n.burnin=1000,n.thin=5, debug=T)
#model 3 updates took 30 seconds
#model4 uses Wei Chen's solution of multiplying, then summing in Winbugs
p <- ncol(X1) #p is the number of predictor variables
model4 <- function() {
#data model
for (i in 1:N) {for (j in 1:p) {temp[i,j] <- X1[i,j]*beta1[j] } }
for (i in 1:N){
Y[i] ~ dnorm(mu[i], tau)
mu[i] <- beta0 + sum(temp[i,])
}
#priors
beta0 ~ dnorm(0,1.0E-6)
beta1[1] <- 0
for (i in 2:6){beta1[i] ~dnorm(0,tau1)}
tau ~ dgamma(0.001,0.001)
tau1 ~ dgamma(0.001,0.001)
sigma <- 1/sqrt(tau)
sigma1 <- 1/sqrt(tau1)
}
filename <- file.path(tempdir(), "model.bug")
write.model(model4, filename)
data <- list("X1", "N", "Y", "p")
parameters <- c("beta0", "beta1", "sigma", "sigma1")
inits <- list(
list(beta1=c(NA,rep(0,times=5)), tau1=1, beta0=0, tau=1),
list(beta1=c(NA,rep(3,times=5)), tau1=3, beta0=3, tau=3)
)
sims <- bugs(data,inits,parameters,filename,n.chains=2, n.iter=50000, n.burnin=1000,n.thin=5, debug=T)
#model 4 updates took 12 seconds
#model 5 uses "paste" function in R to write the "brute force" code
#first break the X1 matrix into column vectors
#(there are more efficient ways of doing this)
X6 <- X1[,6]
X5 <- X1[,5]
X4 <- X1[,4]
X3 <- X1[,3]
X2 <- X1[,2]
X1 <- X1[,1]
#use paste function to write the formula
X<-paste("X", 1:6, "[i]", sep="")
beta1<-paste("beta1", "[", 1:6, "]", sep="")
form<-paste(X,beta1, sep="*", collapse="+")
#use cat function to write a .bug file
cat(
"model
{
for(i in 1:N){
Y[i]~dnorm(mu[i], tau)
mu[i]<-beta0+", form, "}","\n",
"#Priors
beta0 ~ dnorm(0,1.0E-6)
beta1[1] <- 0
for (i in 2:6){beta1[i] ~dnorm(0,tau1)}
tau ~ dgamma(0.001,0.001)
tau1 ~ dgamma(0.001,0.001)
sigma <- 1/sqrt(tau)
sigma1 <- 1/sqrt(tau1)
}" , file="model5.bug")
#run Winbugs
data <- list("X1", "X2","X3","X4","X5","X6", "N", "Y")
parameters <- c("beta0", "beta1", "sigma", "sigma1")
inits <- list(
list(beta1=c(NA,rep(0,times=5)), tau1=1, beta0=0, tau=1),
list(beta1=c(NA,rep(3,times=5)), tau1=3, beta0=3, tau=3)
)
sims <- bugs(data,inits,parameters,"model5.bug",n.chains=2, n.iter=50000, n.burnin=1000,n.thin=5, debug=T)
#model 5 updates took 6 seconds
-------------------------------------------------------------------
This list is for discussion of modelling issues and the BUGS software.
For help with crashes and error messages, first mail [log in to unmask]
To mail the BUGS list, mail to [log in to unmask]
Before mailing, please check the archive at www.jiscmail.ac.uk/lists/bugs.html
Please do not mail attachments to the list.
To leave the BUGS list, send LEAVE BUGS to [log in to unmask]
If this fails, mail [log in to unmask], NOT the whole list
|