Abstract
Background We have fit the Naomi model with a range of grids.
Task How does changing the number of points per dimension \(k\), or the number of dimensions of variation kept \(s\) change the log normalising constant estimated?
k_seq <- 1:10
s_seq <- 1:10
M <- matrix(0, length(k_seq), length(s_seq))
for (i in 1:length(k_seq)) {
for (j in 1:length(s_seq)) {
M[i, j] <- k_seq[i]^(s_seq[j])
}
}
# Assume anything more than a million isn't possible
M[M > 1e6] <- NA
M
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1 1 1 1 1 1 1 1 1 1
## [2,] 2 4 8 16 32 64 128 256 512 1024
## [3,] 3 9 27 81 243 729 2187 6561 19683 59049
## [4,] 4 16 64 256 1024 4096 16384 65536 262144 NA
## [5,] 5 25 125 625 3125 15625 78125 390625 NA NA
## [6,] 6 36 216 1296 7776 46656 279936 NA NA NA
## [7,] 7 49 343 2401 16807 117649 823543 NA NA NA
## [8,] 8 64 512 4096 32768 262144 NA NA NA NA
## [9,] 9 81 729 6561 59049 531441 NA NA NA NA
## [10,] 10 100 1000 10000 100000 1000000 NA NA NA NA
We start by reading in a selection of fitted PCA-AGHQ models. We only need certain parts of the fits, and as they are large we are careful here with memory.
read_sk <- function(s, k) {
assign(paste0("s", s, "k", k), readRDS(paste0("depends/aghq-s", s, "-k", k, ".rds"))[c("quad", "time")], envir = parent.frame())
}
read_sk(s = 1, k = 2)
read_sk(s = 2, k = 2)
read_sk(s = 3, k = 2)
read_sk(s = 4, k = 2)
read_sk(s = 5, k = 2)
stopifnot(nrow(s5k2$quad$modesandhessians) == 2^5)
k2 <- list(s1k2, s2k2, s3k2, s4k2, s5k2)
k2_lognormconsts <- sapply(k2, function(x) x$quad$normalized_posterior$lognormconst)
k2_times <- sapply(k2, function(x) as.numeric(x$time, units = "mins"))
rm(list = c("k2", "s1k2", "s2k2", "s3k2", "s4k2", "s5k2"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4144260 221.4 7965630 425.5 NA 7965630 425.5
## Vcells 9720272 74.2 866337632 6609.7 102400 1997794518 15242.0
read_sk(s = 1, k = 3)
read_sk(s = 2, k = 3)
read_sk(s = 3, k = 3)
read_sk(s = 4, k = 3)
read_sk(s = 5, k = 3)
read_sk(s = 7, k = 3)
read_sk(s = 8, k = 3)
stopifnot(nrow(s3k3$quad$modesandhessians) == 3^3)
k3 <- list(s1k3, s2k3, s3k3, s4k3, s5k3, s7k3, s8k3)
k3_lognormconsts <- sapply(k3, function(x) x$quad$normalized_posterior$lognormconst)
k3_times <- sapply(k3, function(x) as.numeric(x$time, units = "mins"))
rm(list = c("k3", "s1k3", "s2k3", "s3k3", "s4k3", "s5k3", "s7k3", "s8k3"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4142439 221.3 7965630 425.5 NA 7965630 425.5
## Vcells 9667897 73.8 1736983234 13252.2 102400 1997794518 15242.0
read_sk(s = 1, k = 5)
read_sk(s = 2, k = 5)
read_sk(s = 3, k = 5)
read_sk(s = 4, k = 5)
read_sk(s = 5, k = 5)
k5 <- list(s1k5, s2k5, s3k5, s4k5, s5k5)
k5_lognormconsts <- sapply(k5, function(x) x$quad$normalized_posterior$lognormconst)
k5_times <- sapply(k5, function(x) as.numeric(x$time, units = "mins"))
rm(list = c("k5", "s1k5", "s2k5", "s3k5", "s4k5", "s5k5"))
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4142547 221.3 7965630 425.5 NA 7965630 425.5
## Vcells 9668237 73.8 1389586588 10601.8 102400 1997794518 15242.0
df <- data.frame(
s = c(1:5, 1:5, 7, 8, 1:5),
k = rep(c(2, 3, 5), times = c(5, 7, 5)),
lognormconst = c(k2_lognormconsts, k3_lognormconsts, k5_lognormconsts),
time = c(k2_times, k3_times, k5_times)
)
write_csv(df, "increase-s-k.csv")
ggplot(df, aes(x = s, y = lognormconst)) +
geom_point() +
facet_grid(. ~ k) +
labs(x = "Dimensions kept of the PCA", y = "Log normalising constant") +
theme_minimal()
ggplot(df, aes(x = s, y = time)) +
geom_point() +
facet_grid(. ~ k) +
scale_y_continuous(limits = c(0, NA)) +
labs(x = "Dimensions kept of the PCA", y = "Time (mins)") +
theme_minimal()