library(nimble)
library(coda)
library(convoSPAT)
library(StatMatch)
library(FNN)
library(fields)
library(ggplot2)
library(gridExtra)
library(RColorBrewer)
nimbleOptions(verbose = TRUE)
source('../bayes_nsgp/source_nimble.R')
source('../code/nngp5.R')
## Registering the following user-provided distributions: dmnorm_nn2 .
source('../code/sgv_code.R')
# Setup
set.seed(1)
n <- 1000
locs <- matrix(runif(2*n), ncol = 2)
dst <- as.matrix(dist(locs, upper = TRUE, diag = TRUE))
# Simulate some data
sigsq <- 1
tausq <- 0.1 # SNR = 1/0.1 = 10
phi <- 0.1
mu <- 0
C <- sigsq*matern_corr(dst, phi, 0.5)
D <- tausq*diag(n)
set.seed(2)
y <- rep(mu, n) + t(chol(C)) %*% rnorm(n)
z <- y + rnorm(n, sd = sqrt(tausq))
plotDF <- data.frame(x1 = locs[,1], x2 = locs[,2], y = y, z = z)
grid.arrange(
ggplot(plotDF, aes(x = x1, y = x2, color = y)) + coord_fixed() +
ggtitle("y") + geom_point() + ylab(NULL) + xlab(NULL) +
scale_color_gradientn(colors = brewer.pal(9,"RdBu"),
limits = c(-max(abs(c(y,z))), max(abs(c(y,z))))),
ggplot(plotDF, aes(x = x1, y = x2, color = z)) + coord_fixed() +
ggtitle("z") + geom_point() + ylab(NULL) + xlab(NULL) +
scale_color_gradientn(colors = brewer.pal(9,"RdBu"),
limits = c(-max(abs(c(y,z))), max(abs(c(y,z))))),
ncol = 2
)
# Likelihood p(z|theta) = \int_y p(z|y,theta) p(y|theta) dy : exact (w/out normalizing constant)
# The time here includes calculation of the covariance matrix
prt <- proc.time()
C <- sigsq*matern_corr(dst, phi, 0.5)
D <- tausq*diag(n)
cov.chol <- chol(C + D)
tmp1 <- backsolve(cov.chol, z - rep(mu, length(z)), transpose = TRUE)
loglik_exact <- -as.numeric( sum(log(diag(cov.chol))) + 0.5*sum(tmp1^2) )
exct.tm <- (proc.time() - prt)[3]
#---------------------------------
# Set up the SGV approx
#---------------------------------
# (1) order the coordinates;
# (2) identify neighbors;
# (3) determine conditioning sets
prt <- proc.time()
sgv_obj <- sgv_setup( locs = locs, m = 20 ) # m = number of neighbors
sgv.setup.tm <- (proc.time() - prt)[3]
# Demonstrate ordering and nearest neighbors
par(mfrow=c(1,3))
for(i in c(40, 97, 341)){
plot(sgv_obj$locs_ord[1:n,], asp = 1, pch = 16, col = "gray90",
xlab = "", ylab = "", main = paste("i = ", i, ": black = 1:i, green = 30 NN", sep = ""))
points(sgv_obj$locs_ord[1:i,1], sgv_obj$locs_ord[1:i,2], pch = 16)
points(sgv_obj$locs_ord[i,1], sgv_obj$locs_ord[i,2], pch = 16, col = 2)
points(sgv_obj$locs_ord[(sgv_obj$nID[i,])[sgv_obj$nID[i,] != -1],1],
sgv_obj$locs_ord[(sgv_obj$nID[i,])[sgv_obj$nID[i,] != -1],2], pch = 16, col = 3)
}
#---------------------------------
# Calculate U
#---------------------------------
# U = upper triangular Cholesky factor of covariance
# matrix of the ordered z and y values (hence is 2n x 2n)
# Note: U is a function of theta = covariance parameters
# For now, this function uses a generic cov_iso function
prt <- proc.time()
U <- calculateU( sgv_obj, theta = c(mu, tausq, sigsq, phi) )
u.tm <- (proc.time() - prt)[3]
#---------------------------------
# Calculate the approx likelihood
#---------------------------------
prt <- proc.time()
loglik_approx_sgv <- sgv_loglikelihood(z, sgv_obj, U, mn = rep(mu, length(z)))
aprx.sgv.tm <- (proc.time() - prt)[3]
#---------------------------------
# Set up the NNGP-response approx
#---------------------------------
prt <- proc.time()
# (1) order the coordinates;
locs_mmd <- orderCoordinatesMMD(locs)
ord <- locs_mmd$orderedIndicesNoNA
ord_all <- ord
locs_ord <- locs[ord_all,]
# (2) identify neighbors
nID <- determineNeighbors(locs_ord, 20)
nngp.setup.tm <- (proc.time() - prt)[3]
#---------------------------------
# Calculate AD
#---------------------------------
dst_ord <- as.matrix(dist(locs_ord, upper = T, diag = T))
# Note: the preceding distance calculation should be folded into calculateAD
# without requiring the full N x N distance matrix.
# Hence the calcAD.tm is a slight underestimate
prt <- proc.time()
ADmat <- calculateAD(dst_ord, nID, n, 20, sqrt(tausq), sqrt(sigsq), phi)
calcAD.tm <- (proc.time() - prt)[3]
#---------------------------------
# Calculate the approximate likelihood
#---------------------------------
prt <- proc.time()
loglik_approx_nngp <- dmnorm_nn2(z[ord_all], rep(mu,n), ADmat, nID, n, 20, TRUE) + 0.5*log(2*pi)*n # Add back on the normalizing constant (not included in other calculations)
aprx.nngp.tm <- (proc.time() - prt)[3]
data.frame(
Method = c("Exact", "SGV approx", "NNGP approx"),
N = rep(n, 3),
loglik = c(loglik_exact, loglik_approx_sgv, loglik_approx_nngp),
Time = c(exct.tm, u.tm + aprx.sgv.tm, calcAD.tm + aprx.nngp.tm),
setupTime = c(NA, sgv.setup.tm, nngp.setup.tm)
)
## Method N loglik Time setupTime
## 1 Exact 1000 -86.16267 0.208 NA
## 2 SGV approx 1000 -86.80645 0.530 0.656
## 3 NNGP approx 1000 -86.88952 0.097 0.081
# One-time setup time
data.frame(
N = n_vec,
SGV = sgv_setup_vec, NNGP = nngp_setup_vec
)
## N SGV NNGP
## 1 100 0.020 0.003
## 2 500 0.124 0.024
## 3 1000 0.520 0.060
## 4 2000 0.811 0.165
## 5 5000 2.341 0.883
## 6 10000 5.849 3.317
# Likelihood calculation time
data.frame(
N = n_vec, Exact = exact_time_vec,
SGV = sgv_time_vec, NNGP = nngp_time_vec
)
## N Exact SGV NNGP
## 1 100 0.001 0.043 0.035
## 2 500 0.024 0.110 0.040
## 3 1000 0.249 0.221 0.307
## 4 2000 1.301 0.442 0.162
## 5 5000 20.302 1.315 0.460
## 6 10000 161.394 3.414 0.980
Update calculateAD
to take the coordinates and not an \(N\times N\) matrix.
Generalize calculateU
to involve a general nonstationary covariance function.
Faster way to do matrix operations within calculateU
?