1 What might be possible?

k_seq <- 1:10
s_seq <- 1:10

M <- matrix(0, length(k_seq), length(s_seq))

for (i in 1:length(k_seq)) {
    for (j in 1:length(s_seq)) {
        M[i, j] <- k_seq[i]^(s_seq[j])
  }
}

# Assume anything more than a million isn't possible
M[M > 1e6] <- NA

M
##       [,1] [,2] [,3]  [,4]   [,5]    [,6]   [,7]   [,8]   [,9] [,10]
##  [1,]    1    1    1     1      1       1      1      1      1     1
##  [2,]    2    4    8    16     32      64    128    256    512  1024
##  [3,]    3    9   27    81    243     729   2187   6561  19683 59049
##  [4,]    4   16   64   256   1024    4096  16384  65536 262144    NA
##  [5,]    5   25  125   625   3125   15625  78125 390625     NA    NA
##  [6,]    6   36  216  1296   7776   46656 279936     NA     NA    NA
##  [7,]    7   49  343  2401  16807  117649 823543     NA     NA    NA
##  [8,]    8   64  512  4096  32768  262144     NA     NA     NA    NA
##  [9,]    9   81  729  6561  59049  531441     NA     NA     NA    NA
## [10,]   10  100 1000 10000 100000 1000000     NA     NA     NA    NA

2 Data import

We start by reading in a selection of fitted PCA-AGHQ models. We only need certain parts of the fits, and as they are large we are careful here with memory.

read_sk <- function(s, k) {
  assign(paste0("s", s, "k", k), readRDS(paste0("depends/aghq-s", s, "-k", k, ".rds"))[c("quad", "time")], envir = parent.frame())
}

read_sk(s = 1, k = 2)
read_sk(s = 2, k = 2)
read_sk(s = 3, k = 2)
read_sk(s = 4, k = 2)
read_sk(s = 5, k = 2)

stopifnot(nrow(s5k2$quad$modesandhessians) == 2^5)

k2 <- list(s1k2, s2k2, s3k2, s4k2, s5k2)
k2_lognormconsts <- sapply(k2, function(x) x$quad$normalized_posterior$lognormconst)
k2_times <- sapply(k2, function(x) as.numeric(x$time, units = "mins"))

rm(list = c("k2", "s1k2", "s2k2", "s3k2", "s4k2", "s5k2"))
gc()
##           used  (Mb) gc trigger   (Mb) limit (Mb)   max used    (Mb)
## Ncells 4144260 221.4    7965630  425.5         NA    7965630   425.5
## Vcells 9720272  74.2  866337632 6609.7     102400 1997794518 15242.0
read_sk(s = 1, k = 3)
read_sk(s = 2, k = 3)
read_sk(s = 3, k = 3)
read_sk(s = 4, k = 3)
read_sk(s = 5, k = 3)
read_sk(s = 7, k = 3)
read_sk(s = 8, k = 3)

stopifnot(nrow(s3k3$quad$modesandhessians) == 3^3)

k3 <- list(s1k3, s2k3, s3k3, s4k3, s5k3, s7k3, s8k3)
k3_lognormconsts <- sapply(k3, function(x) x$quad$normalized_posterior$lognormconst)
k3_times <- sapply(k3, function(x) as.numeric(x$time, units = "mins"))

rm(list = c("k3", "s1k3", "s2k3", "s3k3", "s4k3", "s5k3", "s7k3", "s8k3"))
gc()
##           used  (Mb) gc trigger    (Mb) limit (Mb)   max used    (Mb)
## Ncells 4142439 221.3    7965630   425.5         NA    7965630   425.5
## Vcells 9667897  73.8 1736983234 13252.2     102400 1997794518 15242.0
read_sk(s = 1, k = 5)
read_sk(s = 2, k = 5)
read_sk(s = 3, k = 5)
read_sk(s = 4, k = 5)
read_sk(s = 5, k = 5)

k5 <- list(s1k5, s2k5, s3k5, s4k5, s5k5)
k5_lognormconsts <- sapply(k5, function(x) x$quad$normalized_posterior$lognormconst)
k5_times <- sapply(k5, function(x) as.numeric(x$time, units = "mins"))

rm(list = c("k5", "s1k5", "s2k5", "s3k5", "s4k5", "s5k5"))
gc()
##           used  (Mb) gc trigger    (Mb) limit (Mb)   max used    (Mb)
## Ncells 4142547 221.3    7965630   425.5         NA    7965630   425.5
## Vcells 9668237  73.8 1389586588 10601.8     102400 1997794518 15242.0

3 Table processing

df <- data.frame(
  s = c(1:5, 1:5, 7, 8, 1:5),
  k = rep(c(2, 3, 5), times = c(5, 7, 5)),
  lognormconst = c(k2_lognormconsts, k3_lognormconsts, k5_lognormconsts),
  time = c(k2_times, k3_times, k5_times)
)

write_csv(df, "increase-s-k.csv")

4 Visualisation

ggplot(df, aes(x = s, y = lognormconst)) +
  geom_point() +
  facet_grid(. ~ k) +
  labs(x = "Dimensions kept of the PCA", y = "Log normalising constant") +
  theme_minimal()

ggplot(df, aes(x = s, y = time)) +
  geom_point() +
  facet_grid(. ~ k) +
  scale_y_continuous(limits = c(0, NA)) +
  labs(x = "Dimensions kept of the PCA", y = "Time (mins)") +
  theme_minimal()