1 Background

We compare the inference results from TMB, aghq, adam, and tmbstan. Import these inference results as follows:

tmb <- readRDS("depends/tmb.rds")
aghq <- readRDS("depends/aghq.rds")
tmbstan <- readRDS("depends/tmbstan.rds")

2 Second 90

The 90-90-90 treatment targets are that

90% of PLHIV know their status
90% of PLHIV who kno wtheir status are on antiretroviral therapy (ART)
90% of PLHIV on ART have suppressed viral load

To meet the second 90, we require that \(90\% \times 90\% = 81\%\) of PLHIV are on ART.

mf_out <- tmb$naomi_data$mf_out

mf_out_fine <- mf_out %>%
  tibble::rownames_to_column("id") %>%
  mutate(id = as.numeric(id)) %>%
  filter(
    area_id %in% paste0("MWI_4_", 1:32, "_demo"),
    sex %in% c("male", "female"),
    age_group %in%
      c(
        "Y000_004", "Y005_009", "Y010_014", "Y015_019", "Y020_024", "Y025_029",
        "Y025_034", "Y030_034", "Y035_039", "Y040_044", "Y045_049", "Y050_054",
        "Y055_059", "Y060_064", "Y065_069", "Y070_074", "Y075_079", "Y080_999"
      )
  )

tmbstan_second90 <- apply(tmbstan$mcmc$sample$alpha_t1_out[mf_out_fine$id, ], 1, function(x) sum(x > 0.81) / length(x))
tmb_second90 <- apply(tmb$fit$sample$alpha_t1_out[mf_out_fine$id, ], 1, function(x) sum(x > 0.81) / length(x))
aghq_second90 <- apply(aghq$quad$sample$alpha_t1_out[mf_out_fine$id, ], 1, function(x) sum(x > 0.81) / length(x))

Plot the results from the different inference methods:

n <- length(tmbstan_second90)
stopifnot(length(tmb_second90) == n)
stopifnot(length(aghq_second90) == n)

df_second90 <- data.frame(
  prob = c(tmbstan_second90, tmb_second90, aghq_second90),
  method = c(rep("tmbstan", times = n), rep("TMB", times = n), rep("aghq", times = n)),
  index = c(1:n, 1:n, 1:n)
)

df_wide_second90 <- pivot_wider(df_second90, names_from = method, values_from = prob)

plot1 <- ggplot(df_wide_second90, aes(x = tmbstan, y = TMB)) +
  geom_point(shape = 1, alpha = 0.6, col = "#999999") +
  coord_fixed(ratio = 1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  labs(x = "NUTS", y = "TMB") +
  theme_minimal()

plot2 <- ggplot(df_wide_second90, aes(x = tmbstan, y = aghq)) +
  geom_point(shape = 1, alpha = 0.6, col = "#999999") +
  coord_fixed(ratio = 1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  labs(x = "NUTS", y = "PCA-AGHQ") +
  theme_minimal()

plot1 + plot2

Appears that: 1) systematic overestimation of probability by both approximate inference methods 2) aghq does better at not underestimating near zero.

What is the correlation?

cor(tmbstan_second90, tmb_second90)

## [1] 0.9896333

cor(tmbstan_second90, aghq_second90)

## [1] 0.9918411

And what about the mean square error and root mean square error?

(mse_tmb_second90 <- mean((tmbstan_second90 - tmb_second90)^2))

## [1] 0.001634993

(mse_aghq_second90 <- mean((tmbstan_second90 - aghq_second90)^2))

## [1] 0.001355959

(rmse_tmb_second90 <- sqrt(mse_tmb_second90))

## [1] 0.04043505

(rmse_aghq_second90 <- sqrt(mse_aghq_second90))

## [1] 0.03682334

mse_change <- round(100 * (mse_aghq_second90 - mse_tmb_second90) / mse_tmb_second90)
rmse_change <- round(100 * (rmse_aghq_second90 - rmse_tmb_second90) / rmse_tmb_second90)

There is a change of -17% in the MSE moving from TMB to aghq, and a change of -9% in the RMSE.

Which strata have exceedance probabilities which are particularly poorly estimated? First for TMB:

tmb_fails_second90 <- df_wide_second90 %>%
  mutate(diff = tmbstan - TMB) %>%
  filter(diff > 0.1) %>%
  pull(index)

And for aghq:

aghq_fails_second90 <- df_wide_second90 %>%
  mutate(diff = tmbstan - aghq) %>%
  filter(diff > 0.1) %>%
  pull(index)

There are 39 uniquely badly estimated by TMB, 16 uniquely badly estimated by aghq, and 8 badly estimated by both. Which strata do these correspond to?

unique(mf_out_fine[tmb_fails_second90, ]$area_id)

##  [1] "MWI_4_1_demo"  "MWI_4_10_demo" "MWI_4_11_demo" "MWI_4_12_demo" "MWI_4_13_demo" "MWI_4_14_demo"
##  [7] "MWI_4_17_demo" "MWI_4_18_demo" "MWI_4_19_demo" "MWI_4_21_demo" "MWI_4_23_demo" "MWI_4_25_demo"
## [13] "MWI_4_29_demo" "MWI_4_3_demo"  "MWI_4_30_demo" "MWI_4_31_demo" "MWI_4_32_demo" "MWI_4_4_demo" 
## [19] "MWI_4_7_demo"  "MWI_4_8_demo"

unique(mf_out_fine[tmb_fails_second90, ]$sex)

## [1] "female"

unique(mf_out_fine[tmb_fails_second90, ]$age_group)

## [1] "Y045_049" "Y080_999" "Y060_064" "Y065_069" "Y040_044" "Y070_074" "Y075_079" "Y025_034" "Y055_059"

unique(mf_out_fine[aghq_fails_second90, ]$area_id)

##  [1] "MWI_4_11_demo" "MWI_4_13_demo" "MWI_4_17_demo" "MWI_4_19_demo" "MWI_4_21_demo" "MWI_4_23_demo"
##  [7] "MWI_4_25_demo" "MWI_4_31_demo" "MWI_4_4_demo"  "MWI_4_7_demo"

unique(mf_out_fine[aghq_fails_second90, ]$sex)

## [1] "female"

unique(mf_out_fine[aghq_fails_second90, ]$age_group)

## [1] "Y045_049" "Y040_044" "Y050_054" "Y055_059" "Y060_064" "Y065_069" "Y030_034" "Y025_034" "Y080_999"

Add colour by sex to the above plots:

df_wide_second90 <- df_wide_second90 %>%
  left_join(
    mf_out_fine %>%
      tibble::rowid_to_column("index"),
    by = "index"
  )

plot1 <- ggplot(df_wide_second90, aes(x = tmbstan, y = TMB, col = sex)) +
  geom_point(shape = 1, alpha = 0.6) +
  coord_fixed(ratio = 1) +
  scale_color_manual(values = c("#E69F00", "#999999")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  labs(col = "Sex") +
  guides(col = "none") +
  labs(x = "NUTS", y = "TMB") +
  theme_minimal()

plot2 <- ggplot(df_wide_second90, aes(x = tmbstan, y = aghq, col = sex)) +
  geom_point(shape = 1, alpha = 0.6) +
  coord_fixed(ratio = 1) +
  scale_colour_manual(values = c("#E69F00", "#999999"), labels = c("Female", "Male")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  guides(colour = guide_legend(override.aes = list(alpha = 1))) +
  labs(x = "NUTS", y = "PCA-AGHQ", col = "Sex") +
  theme_minimal()

ggsave("second90.png", plot1 + plot2, h = 4, w = 6.25)

plot1 + plot2

Check the RMSE stratified by sex:

df_wide_second90 %>%
  group_by(sex) %>%
  summarise(
    "RMSE TMB (percent)" = 100 * sqrt(mean((tmbstan - TMB)^2)),
    "RMSE aghq (percent)" = 100 * sqrt(mean((tmbstan - aghq)^2))
  )

3 High incidence strata

Above 1% incidence is considered high, and could be met with intensified interventions.

Very high (>3%)
High (1-3%)
Moderate (0.3-1%)
Low (<0.3%)

tmbstan_1inc <- apply(tmbstan$mcmc$sample$lambda_t1_out[mf_out_fine$id, ], 1, function(x) sum(x > 0.01) / length(x))
tmb_1inc <- apply(tmb$fit$sample$lambda_t1_out[mf_out_fine$id, ], 1, function(x) sum(x > 0.01) / length(x))
aghq_1inc <- apply(aghq$quad$sample$lambda_t1_out[mf_out_fine$id, ], 1, function(x) sum(x > 0.01) / length(x))

n <- length(tmbstan_1inc)
stopifnot(length(tmb_1inc) == n)
stopifnot(length(aghq_1inc) == n)

df_1inc <- data.frame(
  prob = c(tmbstan_1inc, tmb_1inc, aghq_1inc),
  method = c(rep("tmbstan", times = n), rep("TMB", times = n), rep("aghq", times = n)),
  index = c(1:n, 1:n, 1:n)
)

df_wide_1inc <- pivot_wider(df_1inc, names_from = method, values_from = prob)

df_wide_1inc <- df_wide_1inc %>%
  left_join(
    mf_out_fine %>%
      tibble::rowid_to_column("index"),
    by = "index"
  )

plot1 <- ggplot(df_wide_1inc, aes(x = tmbstan, y = TMB)) +
  geom_point(shape = 1, alpha = 0.6, col = "#999999") +
  coord_fixed(ratio = 1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  labs(x = "NUTS", y = "TMB") +
  theme_minimal()

plot2 <- ggplot(df_wide_1inc, aes(x = tmbstan, y = aghq)) +
  geom_point(shape = 1, alpha = 0.6, col = "#999999") +
  coord_fixed(ratio = 1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  labs(x = "NUTS", y = "PCA-AGHQ") +
  theme_minimal()

ggsave("1inc.png", plot1 + plot2, h = 4, w = 6.25)

plot1 + plot2

cor(tmbstan_1inc, tmb_1inc)

## [1] 0.9884578

cor(tmbstan_1inc, aghq_1inc)

## [1] 0.9865951

mse_tmb_1inc <- mean((tmbstan_1inc - tmb_1inc)^2)
mse_aghq_1inc <- mean((tmbstan_1inc - aghq_1inc)^2)
round(100 * (mse_aghq_1inc - mse_tmb_1inc) / mse_tmb_1inc)

## [1] -5

rmse_tmb_1inc <- sqrt(mse_tmb_1inc)
rmse_aghq_1inc <- sqrt(mse_aghq_1inc)
round(100 * (rmse_aghq_1inc - rmse_tmb_1inc) / rmse_tmb_1inc)

## [1] -2

4 Unmet treatment need

Number of people needing treatment is a function of ART coverage, HIV prevalence and population size. What summaries of unmet treatment need might be important to calculate?

#' To-do!

5 Plot for manuscript

Combine the above plots into one nice one. ABF (always be facetting):

df_wide <- bind_rows(
  df_wide_second90 %>%
    mutate(type = "Second 90"),
  df_wide_1inc %>%
    mutate(type = "High incidence")
)

write_csv(df_wide, "exceedance.csv")

fct_reorg <- function(fac, ...) {
  fct_recode(fct_relevel(fac, ...), ...)
}

exceedance_manuscript_plot <- df_wide %>%
  pivot_longer(cols = c("TMB", "aghq"), names_to = "method", values_to = "estimate") %>%
  mutate(
    method = fct_reorg(method, "TMB" = "TMB", "PCA-AGHQ" = "aghq"),
    type = fct_relevel(type, "Second 90", "High incidence")
  ) %>%
  ggplot(aes(x = tmbstan, y = estimate, col = sex)) +
    geom_point(shape = 1, alpha = 0.6) +
    coord_fixed(ratio = 1) +
    scale_colour_manual(values = c("#56B4E9", "#999999"), labels = c("Female", "Male")) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
    facet_grid(type ~ method) +
    labs(x = "NUTS", y = "", col = "Sex") +
    theme_minimal() +
    theme(
      legend.position = "bottom"
    )

exceedance_manuscript_plot

ggsave("exceedance.png", exceedance_manuscript_plot, h = 6.5, w = 6.25, bg = "white")

6 Excursion and contour uncertainty regions

Based on Bolin and Lindgren (2015).

\(D_m = \{s: P(x(s) > u) \geq 1 - \alpha\}\) runs into the problem of multiple hypothesis testing
Care about \(P(x(s) > u, s \in D_m)\) which is smaller than \(1 - \alpha\)
You can use the marginals and adjust the threshold to control type 1 error, false discovery rate, or posteior probabilites
Bolin and Lindgren use sequential importance sampling to estimate the joint probabilities
\(x(s) > u\) is called a positive excursion set, and \(x(s) < u\) is called a negative excursion set
The excursions package can calculate excursion sets, contour credible regions and level avoiding sets, excursion functions, contour maps and their quality measures, and simultaneous confidence bands

Bolin, David, and Finn Lindgren. 2015. “Excursion and Contour Uncertainty Regions for Latent Gaussian Models.” Journal of the Royal Statistical Society: Series B: Statistical Methodology, 85–106.

Exceedance probability comparison for the simplified Naomi model