Originally Alex used the ipoptr package. However, it is now not possible, or at least very difficult, to install it (as of 01/04/2023). We will here use the nloptr package instead.

data(gcdatalist, package = "aghq")
precompile()

## Removing: libTMB.cpp libTMB.o libTMBdbg.cpp libTMBomp.cpp

## Precompilation sources generated

set.seed(4365789)

globalpath <- normalizePath(tempdir(), winslash = "/")
plotpath <- normalizePath(file.path(globalpath, "astro"), winslash = "/", mustWork = FALSE)
if (!dir.exists(plotpath)) dir.create(plotpath)

Get the TMB template from the aghq package, the compile and load it:

file.copy(normalizePath(system.file("extsrc/01_astro.cpp", package = "aghq"), winslash = "/"), globalpath)

## [1] FALSE

compile(normalizePath(file.path(globalpath, "01_astro.cpp"), winslash = "/"))

## [1] 0

dyn.load(normalizePath(dynlib(file.path(globalpath, "01_astro")), winslash = "/"))

Parameter transformations:

parambounds <- list(
  Psi0 = c(1, 200),
  gamma = c(0.3, 0.7),
  alpha = c(3.0, 3.7),
  beta = c(-0.5, 1)
)

get_Psi0 <- function(theta) {
  # theta = -log( (Psi0 - 1) / (200 - 1) )
  (parambounds$Psi0[2] - parambounds$Psi0[1]) * exp(-exp(theta)) + parambounds$Psi0[1]
}

get_theta1 <- function(Psi0) {
  log(-log((Psi0 - parambounds$Psi0[1]) / (parambounds$Psi0[2] - parambounds$Psi0[1])))
}

get_gamma <- function(theta) {
  # theta = -log( (gamma - 0.3) / (0.7 - 0.3) )
  (parambounds$gamma[2] - parambounds$gamma[1]) * exp(-exp(theta)) + parambounds$gamma[1]
}

get_theta2 <- function(gamma) {
  log(-log((gamma - parambounds$gamma[1]) / (parambounds$gamma[2] - parambounds$gamma[1])))
}

get_alpha <- function(theta) {
  # theta = log(alpha - 3)
  exp(theta) + parambounds$alpha[1]
}

get_theta3 <- function(alpha) {
  log(alpha - parambounds$alpha[1])
}

get_beta <- function(theta) {
  # theta = -log( (beta - (-0.5)) / (1 - (-0.5)) )
  (parambounds$beta[2] - parambounds$beta[1]) * exp(-exp(theta)) + parambounds$beta[1]
}

get_theta4 <- function(beta) {
  log(-log((beta - parambounds$beta[1]) / (parambounds$beta[2] - parambounds$beta[1])))
}

1 Optimisation

Optimisation using IPOPT replaced by optimisation using nloptr:

nloptr_objective <- function(theta) ff$fn(theta)
nloptr_objective_gradient <- function(theta) ff$gr(theta)

We use box constraints, to improve stability of optimization:

lowerbounds <- c(
  get_theta1(parambounds$Psi0[2] - 0.001),
  get_theta2(parambounds$gamma[2] - 0.001),
  get_theta3(parambounds$alpha[1] + 0.001),
  get_theta4(parambounds$beta[2] - 0.001)
)

upperbounds <- c(
  get_theta1(parambounds$Psi0[1] + 1),
  get_theta2(parambounds$gamma[1] + 0.01),
  get_theta3(parambounds$alpha[2] - 0.01),
  get_theta4(parambounds$beta[1] + 0.01)
)

Start optimisation at the midpoint:

thetastart <- (upperbounds + lowerbounds) / 2
thetastart_list <- list(theta1 = thetastart[1], theta2 = thetastart[2], theta3 = thetastart[3], theta4 = thetastart[4])

data.frame(
  "param" = paste0("theta", 1:4),
  lowerbound = lowerbounds,
  upperbound = upperbounds,
  thetastart = thetastart
) %>%
  ggplot(aes(ymin = lowerbound, y = thetastart, ymax = upperbound, x = param)) +
    geom_pointrange() +
    coord_flip() +
    labs(x = "", y = "") +
    theme_minimal()

Nonlinear constraints, specified as a function:

nloptr_nonlinear_constraints <- function(theta) -1 * Es$fn(theta)

nloptr_nonlinear_constraints_jacobian <- function(theta) {
  J <- -1 * Es$gr(theta)
  as.matrix(J)
}

nonlinear_lowerbound <- rep(0, nrow(gcdata) + 2)
nonlinear_upperbound <- rep(Inf, nrow(gcdata) + 2)

Objective function and its derivatives:

ff <- MakeADFun(
  data = gcdatalist,
  parameters = thetastart_list,
  DLL = "01_astro",
  ADreport = FALSE,
  silent = TRUE
)

Nonlinear constraints and their Jacobian:

Es <- MakeADFun(
  data = gcdatalist,
  parameters = thetastart_list,
  DLL = "01_astro",
  ADreport = TRUE,
  silent = TRUE
)

Check starting value satisfies constraints. Note that constraints in nloptr are of the form \(g(x) \leq 0\).

stopifnot(all(nloptr_nonlinear_constraints(thetastart) <= 0))

tm <- Sys.time()

tmp <- capture.output(
  nloptr_result <- nloptr::nloptr(
    x0 = thetastart,
    eval_f = nloptr_objective,
    eval_grad_f = nloptr_objective_gradient,
    lb = lowerbounds,
    ub = upperbounds,
    eval_g_ineq = nloptr_nonlinear_constraints,
    eval_jac_g_ineq = nloptr_nonlinear_constraints_jacobian,
    opts = list("algorithm" = "NLOPT_LD_MMA", obj_scaling_factor = 1, "xtol_rel" = 1.0e-3)
  )
)

optruntime <- difftime(Sys.time(), tm, units = "secs")
cat("Run time for mass model optimisation:", optruntime, "seconds.\n")

## Run time for mass model optimisation: 0.09713817 seconds.

Check that the optimisation results look right (based on Figure 2 of Bilodeau, Stringer, and Tang (2021)).

get_Psi0(nloptr_result$solution[1])

## [1] 32.43971

get_gamma(nloptr_result$solution[2])

## [1] 0.3164915

get_alpha(nloptr_result$solution[3])

## [1] 3.00865

get_beta(nloptr_result$solution[4])

## [1] 0.03624215

2 AGHQ

Create the template for AGHQ:

ffa <- list(
  fn = function(x) -1 * ff$fn(x),
  gr = function(x) -1 * ff$gr(x),
  he = function(x) -1 * ff$he(x)
)

useropt <- list(
  ff = ffa,
  mode = nloptr_result$solution,
  hessian = ff$he(nloptr_result$solution)
)

#' Fit it with "reuse" first, then correct marginals later
cntrl <- aghq::default_control(negate = TRUE)

#' Do the quadrature
tm <- Sys.time()

k_dense <- 5
astroquad <- aghq::aghq(ff, k = k_dense, thetastart, optresults = useropt, control = cntrl)

#' Correct the marginals except for beta, which gave NA due to the constraints
for (j in 1:3) astroquad$marginals[[j]] <- marginal_posterior(astroquad, j, method = "correct")
quadruntime <- difftime(Sys.time(), tm, units = "secs")

cat("Run time for mass model quadrature:", quadruntime ,"seconds.\n")

## Run time for mass model quadrature: 0.2340021 seconds.

#' Total runtime for AGHQ
optruntime + quadruntime

## Time difference of 0.3311403 secs

3 EB

tm <- Sys.time()

k_eb <- 1
astroebquad <- aghq::aghq(ff, k = k_eb, thetastart, optresults = useropt, control = cntrl)

#' Correct the marginals except for beta, which gave NA due to the constraints
for (j in 1:3) astroebquad$marginals[[j]] <- marginal_posterior(astroebquad, j, method = "correct")
quadebruntime <- difftime(Sys.time(), tm, units = "secs")

cat("Run time for mass model quadrature:", quadebruntime ,"seconds.\n")

## Run time for mass model quadrature: 0.05084991 seconds.

#' Total runtime for EB
optruntime + quadebruntime

## Time difference of 0.1479881 secs

4 PCA-AGHQ

C <- Matrix::forceSymmetric(solve(useropt$hessian))
eigenC <- eigen(C)
lambda <- eigenC$values

data.frame(
  n = 1:length(lambda),
  tv = cumsum(lambda / sum(lambda))
) %>% ggplot(aes(x = n, y = tv)) +
  geom_point() +
  geom_hline(yintercept = 0.9, col = "grey", linetype = "dashed") +
  annotate("text", x = 3, y = 0.89, label = "90% of total variation explained", col = "grey") +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "PCA dimensions included", y = "Total variation explained") +
  theme_minimal()

k_pca <- 5
s <- 1 #' Chosen based on the Scree plot
m <- length(useropt$mode)
levels <- c(rep(k_pca, s), rep(1, m - s))
pca_grid <- mvQuad::createNIGrid(dim = m, type = "GHe", level = levels)
mvQuad::rescale(pca_grid, m = useropt$mode, C = C, dec.type = 1)

#' Check that the PCA grid has the correct number of nodes
stopifnot(nrow(mvQuad::getNodes(pca_grid)) == k_pca^s)

tm <- Sys.time()

astropcaquad <- local_aghq(ff, k_pca, thetastart, optresults = useropt, basegrid = pca_grid, adapt = FALSE, control = cntrl)

#' Correct the marginals except for beta, which gave NA due to the constraints
for (j in 1:3) astropcaquad$marginals[[j]] <- marginal_posterior(astropcaquad, j, method = "correct")
pcaquadruntime <- difftime(Sys.time(), tm, units = "secs")

cat("Run time for mass model quadrature:", pcaquadruntime ,"seconds.\n")

## Run time for mass model quadrature: 0.0270288 seconds.

#' Total runtime for PCA-AGHQ
optruntime + pcaquadruntime

## Time difference of 0.124167 secs

5 NUTS

stanmod <- tmbstan(
  ff,
  chains = 4,
  cores = 4,
  iter = 1e04,
  warmup = 1e03,
  init = nloptr_result$solution,
  seed = 48645,
  algorithm = "NUTS"
)

# Time
get_elapsed_time(stanmod)

##           warmup  sample
## chain:1 0.705935 4.01523
## chain:2 0.619971 3.93486
## chain:3 0.620559 4.11472
## chain:4 0.614505 3.79424

standata <- as.data.frame(stanmod)
standata$Psi0 <- get_Psi0(standata[ ,1])
standata$gamma <- get_gamma(standata[ ,2])
standata$alpha <- get_alpha(standata[ ,3])
standata$beta <- get_beta(standata[ ,4])

6 Comparison

6.1 Plots

#' Manual plots
parambounds <- list(
  Psi0 = c(1, 200),
  gamma = c(0.3, 0.7),
  alpha = c(3.0, 3.7),
  beta = c(-0.5, 1)
)

#' Add small buffers for stability
get_theta2_robust <- function(gamma) {
  log(-log((gamma - parambounds$gamma[1] + 1e-03) / (parambounds$gamma[2] - parambounds$gamma[1] + 1e-03)))
}

get_theta3_robust <- function(alpha) log(alpha - parambounds$alpha[1] + 1e-03)

#' Transformed PDFs
translist1 <- make_transformation(totheta = get_theta1, fromtheta = get_Psi0)
translist2 <- make_transformation(totheta = get_theta2_robust, fromtheta = get_gamma)
translist3 <- make_transformation(totheta = get_theta3_robust, fromtheta = get_alpha)
translist4 <- make_transformation(totheta = get_theta4, fromtheta = get_beta)

Psi0pdf <- compute_pdf_and_cdf(astroquad$marginals[[1]], translist1)
gammapdf <- compute_pdf_and_cdf(astroquad$marginals[[2]], translist2)
alphapdf <- compute_pdf_and_cdf(astroquad$marginals[[3]], translist3, interpolation = "polynomial")
betapdf <- compute_pdf_and_cdf(astroquad$marginals[[4]], translist4)

Psi0ebpdf <- compute_pdf_and_cdf(astroebquad$marginals[[1]], translist1)
gammaebpdf <- compute_pdf_and_cdf(astroebquad$marginals[[2]], translist2)
alphaebpdf <- compute_pdf_and_cdf(astroebquad$marginals[[3]], translist3, interpolation = "polynomial")
betaebpdf <- compute_pdf_and_cdf(astroebquad$marginals[[4]], translist4)

Psi0pcapdf <- compute_pdf_and_cdf(astropcaquad$marginals[[1]], translist1)
gammapcapdf <- compute_pdf_and_cdf(astropcaquad$marginals[[2]], translist2)
alphapcapdf <- compute_pdf_and_cdf(astropcaquad$marginals[[3]], translist3, interpolation = "polynomial")
betapcapdf <- compute_pdf_and_cdf(astropcaquad$marginals[[4]], translist4)

Psi0prior <- function(Psi0) dunif(Psi0, parambounds$Psi0[1], parambounds$Psi0[2], log = FALSE)
gammaprior <- function(gamma) dunif(gamma, parambounds$gamma[1], parambounds$gamma[2], log = FALSE)
alphaprior <- function(alpha) dgamma(alpha - parambounds$alpha[1], shape = 1, rate = 4.6, log = FALSE)
betaprior <- function(beta) dunif(beta, parambounds$beta[1], parambounds$beta[2], log = FALSE)

(Psi0_postplot <- Psi0pdf %>%
  ggplot(aes(x = transparam, y = pdf_transparam)) +
  geom_histogram(
    data = standata,
    mapping = aes(x = Psi0, y = after_stat(density)),
    bins = 50,
    colour = "white",
    fill = "grey"
  ) +
  geom_line() +
  geom_line(
    data = Psi0pcapdf,
    inherit.aes = FALSE,
    mapping = aes(x = transparam, y = pdf_transparam),
    colour = "blue",
  ) +
  stat_function(fun = Psi0prior, linetype = "dashed") +
  labs(title = "", x = "", y = "Density") +
  scale_x_continuous(breaks = seq(10, 60, by = 5)) +
  coord_cartesian(xlim = c(24, 43)) +
  theme_minimal())

(gamma_postplot <- gammapdf %>%
  ggplot(aes(x = transparam, y = pdf_transparam)) +
  geom_histogram(
    data = standata,
    mapping = aes(x = gamma, y = after_stat(density)),
    bins = 100,
    colour = "white",
    fill = "grey"
  ) +
  geom_line() +
  geom_line(
    data = gammapcapdf,
    mapping = aes(x = transparam, y = pdf_transparam),
    colour = "blue",
  ) +
  stat_function(fun = gammaprior, linetype = "dashed") +
  labs(title = "", x = "", y = "Density") +
  scale_x_continuous(breaks = seq(0, 0.44, by = 0.02)) +
  coord_cartesian(xlim = c(0.3, 0.4)) +
  theme_minimal())

(alpha_postplot <- alphapdf %>%
  ggplot(aes(x = transparam, y = pdf_transparam)) +
  geom_histogram(
    data = standata,
    mapping = aes(x = alpha, y = after_stat(density)),
    bins = 2000,
    colour = "white",
    fill = "grey"
  ) +
  geom_line() +
  geom_line(
    data = alphapcapdf,
    mapping = aes(x = transparam, y = pdf_transparam),
    colour = "blue",
  ) +
  stat_function(fun = alphaprior, linetype = "dashed") +
  labs(title = "",x = "",y = "Density") +
  scale_x_continuous(breaks = seq(3, 4, by = 0.02)) +
  coord_cartesian(xlim = c(3, 3.05)) +
  theme_minimal())

(beta_postplot <- betapdf %>%
  ggplot(aes(x = transparam, y = pdf_transparam)) +
  geom_histogram(
    data = standata,
    mapping = aes(x = beta, y = after_stat(density)),
    bins = 50,
    colour = "white",
    fill = "grey"
  ) +
  geom_line() +
  geom_line(
    data = betapcapdf,
    mapping = aes(x = transparam, y = pdf_transparam),
    colour = "blue",
  ) +
  stat_function(fun = betaprior, linetype = "dashed") +
  labs(title = "",x = "",y = "Density") +
  scale_x_continuous(breaks = seq(-0.5, 1, by = 0.1)) +
  coord_cartesian(xlim = c(-0.3, 0.4)) +
  theme_minimal())

6.2 Normalising constant

data.frame(
  method = c("AGHQ", "PCA-AGHQ", "EB"),
  lognormconst = c(astroquad$normalized_posterior$lognormconst, astropcaquad$normalized_posterior$lognormconst, astroebquad$normalized_posterior$lognormconst)
) %>%
  gt::gt()

method	lognormconst
AGHQ	-706.2578
PCA-AGHQ	-706.2804
EB	-706.3330

6.3 KS tests

kstable <- function(quad) {
  aghqpostsamps_spline <- sample_marginal(quad, nrow(standata), interpolation = "spline")
  aghqpostsamps_poly <- sample_marginal(quad, nrow(standata), interpolation = "polynomial")

  suppressWarnings({
    kstable <- data.frame(
      Psi0 = ks.test(standata[[1]], aghqpostsamps_poly[[1]])$statistic,
      gamma = ks.test(standata[[2]], aghqpostsamps_poly[[2]])$statistic,
      alpha = ks.test(standata[[3]], aghqpostsamps_poly[[3]])$statistic,
      beta = ks.test(standata[[4]], aghqpostsamps_poly[[4]])$statistic
    )
  })

  return(kstable)
}

kstable_astroquad <- kstable(astroquad) %>%
  pivot_longer(cols = everything()) %>%
  mutate(method = "AGHQ", k = k_dense, s = NA, total_points = k^4)

#' Not working (yet?)
# kstable_astropcaquad <- kstable(astropcaquad) %>%
#   pivot_longer(cols = everything()) %>%
#   mutate(method = "AGHQ-PCA", k = k_pca, s = s, total_points = k^s)

bind_rows(kstable_astroquad) %>%
  gt::gt()

name	value	method	k	s	total_points
Psi0	0.008277778	AGHQ	5	NA	625
gamma	0.010861111	AGHQ	5	NA	625
alpha	0.037861111	AGHQ	5	NA	625
beta	0.007166667	AGHQ	5	NA	625

bind_rows(kstable_astroquad) %>%
  ggplot(aes(x = name, y = value, col = method)) +
    geom_jitter(height = 0, width = 0.05) +
    scale_y_continuous(limits = c(0, 0.05)) +
    scale_colour_manual(values = multi.utils::cbpalette()) +
    labs(x = "Parameter", y = "KS(method, NUTS)", col = "Method") +
    theme_minimal()

Original computing environment

sessionInfo()

## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS 13.3.1
## 
## Matrix products: default
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] aghq_0.4.1           nloptr_2.0.3         naomi_2.8.5          sf_1.0-9             tmbstan_1.0.4        rstan_2.21.5         StanHeaders_2.21.0-7 TMB_1.9.2            Matrix_1.5-4.1      
## [10] rmarkdown_2.18       multi.utils_0.1.0    patchwork_1.1.2      forcats_0.5.2        stringr_1.5.0        dplyr_1.0.10         purrr_1.0.1          readr_2.1.3          tidyr_1.3.0         
## [19] tibble_3.2.1         ggplot2_3.4.0        tidyverse_1.3.1     
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3    ellipsis_0.3.2      class_7.3-20        rprojroot_2.0.3     fs_1.6.1            rstudioapi_0.14     proxy_0.4-27        farver_2.1.1        bit64_4.0.5         fansi_1.0.4        
##  [11] lubridate_1.8.0     xml2_1.3.3          splines_4.2.0       codetools_0.2-18    cachem_1.0.6        knitr_1.41          polynom_1.4-1       jsonlite_1.8.4      gt_0.8.0            broom_0.8.0        
##  [21] dbplyr_2.1.1        compiler_4.2.0      httr_1.4.4          backports_1.4.1     assertthat_0.2.1    fastmap_1.1.0       cli_3.6.1           htmltools_0.5.3     prettyunits_1.1.1   tools_4.2.0        
##  [31] gtable_0.3.1        glue_1.6.2          tinytex_0.39.1      V8_4.2.2            Rcpp_1.0.10         jquerylib_0.1.4     cellranger_1.1.0    vctrs_0.6.1         mvQuad_1.0-6        xfun_0.37          
##  [41] ps_1.7.3            rvest_1.0.2         lifecycle_1.0.3     statmod_1.4.36      orderly_1.4.3       ids_1.0.1           scales_1.2.1        vroom_1.6.0         hms_1.1.2           parallel_4.2.0     
##  [51] inline_0.3.19       rticles_0.23.6      yaml_2.3.7          curl_5.0.0          memoise_2.0.1       gridExtra_2.3       sass_0.4.4          loo_2.5.1           stringi_1.7.8       RSQLite_2.2.14     
##  [61] highr_0.9           e1071_1.7-12        pkgbuild_1.3.1      rlang_1.1.0         pkgconfig_2.0.3     matrixStats_0.62.0  evaluate_0.20       lattice_0.20-45     labeling_0.4.2      bit_4.0.5          
##  [71] tidyselect_1.2.0    processx_3.8.0      traduire_0.0.6      bookdown_0.26       magrittr_2.0.3      R6_2.5.1            generics_0.1.3      DBI_1.1.3           pillar_1.9.0        haven_2.5.0        
##  [81] withr_2.5.0         units_0.8-0         modelr_0.1.8        crayon_1.5.2        uuid_1.1-0          KernSmooth_2.23-20  utf8_1.2.3          tzdb_0.3.0          grid_4.2.0          readxl_1.4.0       
##  [91] isoband_0.2.6       data.table_1.14.6   blob_1.2.3          callr_3.7.3         reprex_2.0.1        digest_0.6.31       classInt_0.4-8      numDeriv_2016.8-1.1 openssl_2.0.5       RcppParallel_5.1.5 
## [101] stats4_4.2.0        munsell_0.5.0       bslib_0.4.1         askpass_1.1

Bibliography

Bilodeau, Blair, Alex Stringer, and Yanbo Tang. 2021. “Stochastic Convergence Rates and Applications of Adaptive Quadrature in Bayesian Inference.” https://arxiv.org/abs/2102.06801.

Milky way mass estimation using PCA-AGHQ

Adam Howes