cbpalette <- multi.utils::cbpalette()

Basic usage

Data:

set.seed(84343124)
y <- rpois(10, 5)
plot(y)

The log-posterior for the model is:

log_post <- function(eta, y) {
  sum(y) * eta - (length(y) + 1) * exp(eta) - sum(lgamma(y + 1)) + eta
}

Get the first two derivatives too, and put them into a list:

fn <- function(x) log_post(x, y)
gr <- function(x) numDeriv::grad(fn, x)
he <- function(x) numDeriv::hessian(fn, x)

ff <- list(fn = fn, gr = gr, he = he)

Use the aghq package with k = 3. Note that all aghq::aghq:: is doing is normalising the posterior. We have already provided the exact posterior to it via fn$ff.

quad <- aghq::aghq(ff = ff, k = 3, startingvalue = 0)

The result is of class "aghq":

class(quad)
## [1] "aghq"
summary(quad)
## AGHQ on a 1 dimensional posterior with  3 quadrature points
## 
## The posterior mode is: 1.494225 
## 
## The log of the normalizing constant/marginal likelihood is: -23.32123 
## 
## The covariance matrix used for the quadrature is...
##            [,1]
## [1,] 0.02040204
## 
## Here are some moments and quantiles for the transformed parameter:
## 
##            mean        sd     2.5%   median    97.5%
## theta1 1.483746 0.1424731 1.204479 1.482833 1.763169

We can plot the log marginal posterior for theta1 in its normalised and unnormalised versions. The unnormalised version doesn’t require anything from aghq.

data.frame(
  theta1 = seq(from = 1, to = 2, by = 0.01)
) %>%
  mutate(
    f_unnormalised = ff$fn(theta1),
    f_normalised = f_unnormalised - quad$normalized_posterior$lognormconst
  ) %>%
  pivot_longer(
    cols = starts_with("f"),
    names_sep = "_",
    names_to = c("function", "normalised")
  ) %>%
  ggplot(aes(x = theta1, y = value)) +
    geom_line() +
    facet_grid(~normalised) +
    labs(x = "theta1", y = "log(posterior)") +
    theme_minimal()

The contents of the quad object can be viewed using str(quad). One part of the contents are the nodes and weights used for quadrature, which are:

quad$normalized_posterior$nodesandweights

The logarithm of the normalising constant (should correspond to logpost - logpost_normalised, the difference between the two curves above):

quad$normalized_posterior$lognormconst
## [1] -23.32123

The mode is as follows, which corresponds to the second node. For k = 3, and other odd numbers, the mode is included as a node.

quad$optresults$mode
## [1] 1.494225

More detailed background

One dimensional example

Suppose we have \[ f(\theta) = \exp(-\frac{\theta^2}{2}), \] which we would like to integrate over \(\mathbb{R}\), i.e. \(\int_{-\infty}^\infty f(\theta) \text{d}\theta\).

f <- function(theta) {
  exp(-(theta^2) / 2)  
}
plot <- data.frame(x = seq(from = -10, to = 10, by = 0.1)) %>%
  mutate(
    y = f(x)
  ) %>%
  ggplot(aes(x = x, y = y)) +
    geom_line(col = cbpalette[1]) +
    labs(x = "theta", y = "f(theta)") +
    theme_minimal()

plot

Going to do Gauss-Hermite quadrature with k = 3 points:

gg <- mvQuad::createNIGrid(1, "GHe", 3)

The positions of the nodes and their weights are:

gg$nodes
##               [,1]
## [1,] -1.732051e+00
## [2,] -1.394817e-16
## [3,]  1.732051e+00
gg$weights
##          [,1]
## [1,] 1.872321
## [2,] 1.671086
## [3,] 1.872321
plot <- plot +
  geom_point(
    data = data.frame(nodes = gg$nodes, weights = gg$weights, y = 0), 
    aes(x = nodes, y = y, size = weights), alpha = 0.5
  ) +
  labs(size = "Weight") +
  scale_size_continuous(limits = c(1, 2))

plot

ggsave(filename = "aghq-1d.pdf", plot = plot, height = 4, width = 6.25)

We can perform the quadrature using mvQuad via:

mvQuad::quadrature(f, gg)
## [1] 2.506628

This isn’t doing anything fancy, it’s just multiplying the values at the nodes by the weights:

sum(f(gg$nodes) * gg$weights)
## [1] 2.506628

Note that you can also use these functions to get the nodes and weights: mvQuad::getNodes(gg), mvQuad::getWeights(gg). The answer we get with aghq is exact in this case, as compared with the true answer of \(\sqrt {2 \pi}\) because the target function is a log-quadratic:

sqrt(2 * pi)
## [1] 2.506628

Two dimensional example

What about a function in two dimensions? Take this one, where each marginal is the same as above:

f <- function(theta) {
  prod(exp(-(theta^2) / 2))
}

df <- expand.grid(
  theta1 = seq(-3, 3, by = 0.1),
  theta2 = seq(-3, 3, by = 0.1)
)

plot2 <- df %>%
  rowwise() %>%
  mutate(
    y = f(c(theta1, theta2))
  ) %>%
  ggplot() +
    geom_contour(aes(x = theta1, y = theta2, z = y), col = multi.utils::cbpalette()[1]) +
    theme_minimal()

plot2

Let’s create a quadrature grid again:

gg2 <- mvQuad::createNIGrid(2, "GHe", 3)
gg2
## Grid for Numerical Integration  
##  # dimensions: 2  # gridpoints: 9     used memory:704 bytes
## 
## nD-Construction principle:  'product'
##  same quadrature rule for each dimension 
##       type:  GHe 
##       level:  3 
##       initial domain:  -Inf Inf

The nodes and weights are:

mvQuad::getNodes(gg2)
##                [,1]          [,2]
##  [1,] -1.732051e+00 -1.732051e+00
##  [2,] -1.394817e-16 -1.732051e+00
##  [3,]  1.732051e+00 -1.732051e+00
##  [4,] -1.732051e+00 -1.394817e-16
##  [5,] -1.394817e-16 -1.394817e-16
##  [6,]  1.732051e+00 -1.394817e-16
##  [7,] -1.732051e+00  1.732051e+00
##  [8,] -1.394817e-16  1.732051e+00
##  [9,]  1.732051e+00  1.732051e+00
mvQuad::getWeights(gg2)
##           [,1]
##  [1,] 3.505588
##  [2,] 3.128809
##  [3,] 3.505588
##  [4,] 3.128809
##  [5,] 2.792527
##  [6,] 3.128809
##  [7,] 3.505588
##  [8,] 3.128809
##  [9,] 3.505588

This is equivalent to what can be achieved using the gg object from the one-dimensional example:

expand.grid(mvQuad::getNodes(gg), mvQuad::getNodes(gg))
cbind(apply(expand.grid(mvQuad::getWeights(gg), mvQuad::getWeights(gg)), 1, prod))
##           [,1]
##  [1,] 3.505588
##  [2,] 3.128809
##  [3,] 3.505588
##  [4,] 3.128809
##  [5,] 2.792527
##  [6,] 3.128809
##  [7,] 3.505588
##  [8,] 3.128809
##  [9,] 3.505588

What do these nodes and weights look like on the plot?

plot2 <- plot2 +
  geom_point(
    data = mvQuad::getNodes(gg2) %>%
            as.data.frame() %>%
            mutate(weights = mvQuad::getWeights(gg2)), 
    aes(x = V1, y = V2, size = weights),
    alpha = 0.5
  ) +
  labs(col = "f(theta1, theta2)", size = "Weight") +
  scale_size_continuous(limits = c(2, 4))

plot2

ggsave(filename = "aghq-2d.pdf", plot = plot2, height = 4, width = 6.25)

Ok! So now let’s use mvQuad to do the quadrature:

#' Need f to be defined like this for some reason, can probably be fixed
f <- function(x) {
  apply(x, 1, function(y) prod(exp(-(y^2) / 2)))  
}

mvQuad::quadrature(f, gg2)
## [1] 6.283185

Again, in this case the quadrature is exact

2 * pi
## [1] 6.283185

See Definition 1 of the theory paper for more on when the integration will be exact.

Gauss-Hermite Quadrature

Define the Hermite polynomial with \(k = 3\) as:

Hk <- as.function(mpoly::hermite(3, kind = "he"))
## f(.) with . = x
plot <- data.frame(x = seq(-3, 3, by = 0.1)) %>%
  mutate(Hkx = Hk(x)) %>%
  ggplot(aes(x = x, y = Hkx)) +
    geom_line() + 
    labs(x = "x", y = "H3(x)") +
    theme_minimal()

plot

In Gauss-Hermite quadrature, we pick nodes which are zeroes of the Hermite polynomial:

nn <- mvQuad::getNodes(gg)

plot +
  geom_point(
    data = data.frame(x = nn, y = Hk(nn)), aes(x = x, y = y),
    col = cbpalette[2], size = 3
  )

Note that these nodes are chosen only based on the Hermite polynomial. The function to be integrated has not played any role in their choice! This is probably not a good idea. So in adaptive quadrature, the function plays some role in how the points are chosen. Specifically in adaptive GHQ it’s done by scaling the nodes and weights by the mode and curvature at the mode of the function to be integrated. Imagine we have some fake values for the mode theta_hat and curvature at the mode theta_hess as follows:

theta_hat <- c(2, 3)
theta_hess <- matrix(c(3, 1, 1, 5), nrow = 2, ncol = 2)
theta_hess_inv <- solve(theta_hess)

Then the rescaling can be done with mvQuad easily via:

k <- 3
int_grid_software <- mvQuad::createNIGrid(2, "GHe", k)
mvQuad::rescale(int_grid_software, m = theta_hat, C = theta_hess_inv, dec.type = 2)

All that is doing manually to the nodes is:

int_grid_manual <- mvQuad::createNIGrid(2, "GHe", k)
nn_rescaled <- mvQuad::getNodes(int_grid_manual) %*% chol(theta_hess_inv)
nn_rescaled[, 1] <- nn_rescaled[, 1] + theta_hat[1]
nn_rescaled[, 2] <- nn_rescaled[, 2] + theta_hat[2]

Or to the weights is:

mvQuad::getWeights(int_grid_software)
##            [,1]
##  [1,] 0.9369077
##  [2,] 0.8362094
##  [3,] 0.9369077
##  [4,] 0.8362094
##  [5,] 0.7463342
##  [6,] 0.8362094
##  [7,] 0.9369077
##  [8,] 0.8362094
##  [9,] 0.9369077
ww_rescaled <- mvQuad::getWeights(int_grid_manual) * det(chol(theta_hess_inv))

Tomato disease example

Ok, let’s now move on to an example with real data.

data("tswv", package = "EpiILMCT")
dat <- tswv$tswvsir
dat$epidat <- dat$epidat[order(dat$epidat[, 4]), ]
I <- dat$epidat[, 4]
R <- dat$epidat[, 2]
infected <- !is.infinite(I)

datlist <- list(
  D = as.matrix(dist(dat$location[dat$epidat[, 1], ])),
  I = I,
  R = R,
  infected = as.numeric(infected[infected])
)
precompile()
## Removing: libTMB.cpp libTMB.o libTMBdbg.cpp libTMBomp.cpp
## Precompilation sources generated
compile("disease.cpp")
## [1] 0
dyn.load(dynlib("disease"))

Create the unnormalised negative log posterior using TMB::MakeADFun as follows:

ff <- TMB::MakeADFun(
  data = datlist,
  parameters = list(theta1 = 0, theta2 = 0),
  DLL = "disease",
  ADreport = FALSE,
  silent = TRUE
)

Perform AGHQ with \(k = 7\), starting optimisation from \((0, 0)\):

tm <- Sys.time()

tmp <- capture.output(
  ilm_quadrature <- aghq::aghq(ff, 7, c(0, 0))
)

runtime <- difftime(Sys.time(), tm, units = "secs")

The time this took was:

runtime
## Time difference of 0.1223681 secs

Note that as there are \(k = 7\) quadrature points per dimension, in total there are \(7^2 = 49\):

summary(ilm_quadrature)
## AGHQ on a 2 dimensional posterior with  7 7 quadrature points
## 
## The posterior mode is: -4.38672 0.2909931 
## 
## The log of the normalizing constant/marginal likelihood is: -1087.572 
## 
## The covariance matrix used for the quadrature is...
##            [,1]       [,2]
## [1,] 0.03494285 0.01960776
## [2,] 0.01960776 0.01205820
## 
## Here are some moments and quantiles for the transformed parameter:
## 
##              mean        sd        2.5%     median      97.5%
## theta1 -4.4392970 0.1997867 -4.88217078 -4.4274227 -4.0933793
## theta2  0.2583451 0.1207060 -0.01654348  0.2670827  0.4616635

The parameters \(\alpha = \exp(\theta_1)\) and \(\beta = \exp(\theta_2)\), which we can compute summaries for as follows:

ilm_means <- aghq::compute_moment(ilm_quadrature$normalized_posterior, function(x) exp(x))
names(ilm_means) <- c("alpha", "beta")
ilm_means
##    alpha     beta 
## 0.012035 1.304030

You can calculate other moments using the ff, gg or nn arguments to aghq::compute_moment. There is also some nuance to the argument method, either reuse or correct. reuse uses the previously computed AGHQ nodes and weights. correct recomputes the mode and curvature for the product of \(g\) and the posterior; it has convergence proofs, but is a bit slower and can be unstable.

We can also compute quantiles using the function aghq::compute_quantiles, where q is a vector of numeric values in \((0, 1)\).

my_q <- c(0.025, 0.1, 0.5, 0.9, 0.975)

For \(\alpha\):

exp(aghq::compute_quantiles(ilm_quadrature$marginals[[1]], q = my_q))
##       2.5%        10%        50%        90%      97.5% 
## 0.00758054 0.00904698 0.01194524 0.01503700 0.01668276

For \(\beta\):

exp(aghq::compute_quantiles(ilm_quadrature$marginals[[2]], q = my_q)) # beta
##      2.5%       10%       50%       90%     97.5% 
## 0.9835926 1.1021213 1.3061485 1.4952597 1.5867113