Abstract
Background
Task
Findings
Next steps
source("functions.R")
set.seed(2)
# Time-step duration
dt <- 0.05
# Final time point
end <- 50
mod <- stoch_seir_dust$new(
beta = 2,
gamma = 1,
sigma = 1,
population_size = 10^6,
start_infections = 10,
capacity_per_flight = 200, # Capacity of a Boeing 737 is around 200
num_flights = 50,
num_flightsAB = 25,
dt = dt,
virus_shed = 10,
non_virus_shed = 100000,
volume_wastewater = 500 * 10^6, # 500 litres = 500 * 1000 ml
shedding_freq = 1,
bias = 1,
m_tot = 10000000
)
output <- mod$run(1:(end / dt))
transformed_output <- mod$transform_variables(output)
extract_vars <- function(output, vars) {
state_df <- do.call(cbind, output[vars]) %>%
as.data.frame() %>%
pivot_longer(
-time,
names_to = "var",
values_to = "value"
)
}
state_df <- extract_vars(
output = transformed_output,
vars = c("time", "S", "E", "I", "R")
)
ggplot(state_df, aes(x = time, y = value, colour = var)) +
geom_line() +
labs(x = "Time (days)", y = "Number of individuals in each state", col = "State") +
theme_minimal()
num_flightsAB <- mod$contents()[["num_flightsAB"]]
indiv_flight_infections <- data.frame(time = transformed_output$time, transformed_output$n_inf_specific_flightABOut)
colnames(indiv_flight_infections) <- c("time", paste0("flight", 1:num_flightsAB))
airplane_infections_df <- indiv_flight_infections %>%
pivot_longer(
-time,
names_to = "flight",
values_to = "infections",
names_prefix = "flight",
names_transform = as.numeric
) %>%
mutate(
day_interval = cut(time, breaks = max(time)),
day = cut_limit(day_interval, type = "upper")
) %>%
group_by(flight, day) %>%
summarise(infections = sum(infections))
ggplot(airplane_infections_df, aes(x = day, y = infections, group = day)) +
geom_boxplot(fill = NA, outlier.colour = NA, alpha = 0.1) +
geom_jitter(size = 0.5, width = 0.15, alpha = 0.5) +
theme(legend.position = "none") +
labs(x = "Time (days)", y = "Number of infections on each flight") +
theme_minimal()
ggplot(airplane_infections_df, aes(x = day, y = infections, group = flight)) +
geom_line(alpha = 0.5) +
theme(legend.position = "none") +
labs(x = "Time (days)", y = "Number of infections on each flight") +
theme_minimal()
indiv_flight_reads <- data.frame(time = transformed_output$time, transformed_output$m_i_Out)
colnames(indiv_flight_reads) <- c("time", paste0("flight", 1:num_flightsAB))
airplane_reads_df <- indiv_flight_reads %>%
pivot_longer(
-time,
names_to = "flight",
values_to = "reads",
names_prefix = "flight",
names_transform = as.numeric
) %>%
mutate(
day_interval = cut(time, breaks = max(time)),
day = cut_limit(day_interval, type = "upper")
) %>%
group_by(flight, day) %>%
summarise(reads = sum(reads))
ggplot(airplane_reads_df, aes(x = day, y = reads, group = day)) +
geom_boxplot(fill = NA, outlier.colour = NA, alpha = 0.1) +
geom_jitter(size = 0.5, width = 0.15, alpha = 0.5) +
theme(legend.position = "none") +
labs(x = "Time (days)", y = "Number of reads from each flight's wastewater") +
theme_minimal()
ggplot(airplane_reads_df, aes(x = day, y = reads, group = flight)) +
geom_line(alpha = 0.5) +
theme(legend.position = "none") +
labs(x = "Time (days)", y = "Number of reads from each flight's wastewater") +
theme_minimal()
indiv_flight_reads <- data.frame(time = transformed_output$time, transformed_output$stoch_m_i_pois_Out)
colnames(indiv_flight_reads) <- c("time", paste0("flight", 1:num_flightsAB))
airplane_reads_df <- indiv_flight_reads %>%
pivot_longer(
-time,
names_to = "flight",
values_to = "reads",
names_prefix = "flight",
names_transform = as.numeric
) %>%
mutate(
day_interval = cut(time, breaks = max(time)),
day = cut_limit(day_interval, type = "upper")
) %>%
group_by(flight, day) %>%
summarise(reads = sum(reads))
ggplot(airplane_reads_df, aes(x = day, y = reads, group = day)) +
geom_boxplot(fill = NA, outlier.colour = NA, alpha = 0.1) +
geom_jitter(size = 0.5, width = 0.15, alpha = 0.5) +
theme(legend.position = "none") +
labs(x = "Time (days)", y = "Number of reads from each flight's wastewater") +
theme_minimal()
ggplot(airplane_reads_df, aes(x = day, y = reads, group = flight)) +
geom_line(alpha = 0.5) +
theme(legend.position = "none") +
labs(x = "Time (days)", y = "Number of reads from each flight's wastewater") +
theme_minimal()
We start by creating an experimental design. The particular
experimental conditions can be varied in order to test different
hypotheses. Here we vary the m_tot
parameter. We use a
replicate
parameter taking values 1 to
n_replicates
to repeat each experimental condition a number
of times as the results are stochastic.
n_replicates <- 10
pars <- expand.grid(
"beta" = 2,
"gamma" = 1,
"sigma" = 1,
"population_size" = 10^6,
"start_infections" = 10,
"capacity_per_flight" = 200,
"num_flights" = 50,
"num_flightsAB" = 25,
"dt" = 0.05,
"virus_shed" = 10,
"non_virus_shed" = 10^5,
"volume_wastewater" = 500 * 10^6,
"shedding_freq" = 1,
"bias" = 1,
"m_tot" = c(10^4, 10^5, 10^6, 10^7, 10^8, 10^9),
"replicate" = 1:n_replicates
)
pars
We create a function run_simulation
which takes as input
all of the parameters in pars
(with the exception of
replicate
) then:
odin
model with those parameter valuesrun_simulation <- function(beta, gamma, sigma, population_size, start_infections,
capacity_per_flight, num_flights, num_flightsAB, dt,
virus_shed, non_virus_shed, volume_wastewater,
shedding_freq, bias, m_tot) {
# Create odin model
mod <- stoch_seir_dust$new(
beta = beta,
gamma = gamma,
sigma = sigma,
population_size = population_size,
start_infections = start_infections,
capacity_per_flight = capacity_per_flight,
num_flights = num_flights,
num_flightsAB = num_flightsAB,
dt = dt,
virus_shed = virus_shed,
non_virus_shed = non_virus_shed,
volume_wastewater = volume_wastewater,
shedding_freq = shedding_freq,
bias = bias,
m_tot = m_tot
)
end <- 50
output <- mod$run(1:(end / dt))
transformed_output <- mod$transform_variables(output)
# Output number of reads from each flight (stochastic)
indiv_flight_reads <- data.frame(time = transformed_output$time, transformed_output$stoch_m_i_pois_Out)
colnames(indiv_flight_reads) <- c("time", paste0("flight", 1:num_flightsAB))
airplane_reads_df <- indiv_flight_reads %>%
pivot_longer(
-time,
names_to = "flight",
values_to = "reads",
names_prefix = "flight",
names_transform = as.numeric
) %>%
mutate(
day_interval = cut(time, breaks = max(time)),
day = cut_limit(day_interval, type = "upper")
) %>%
group_by(flight, day) %>%
summarise(reads = sum(reads))
return(airplane_reads_df)
}
The output of run_simulation
is a dataframe with columns
for flight
, day
and reads
that
looks like:
head(airplane_reads_df)
As a first version of a “detection” algorithm, we could use the first day that there are greater than \(n\) reads aggregated over all flights.
first_day_n_reads <- function(df, n = 10) {
df %>%
group_by(day) %>%
summarise(reads = sum(reads)) %>%
filter(reads >= 10) %>%
pull(day) %>%
min()
}
Using the dataframe pars
we can now use
purrr::pmap
to mutate
a new list column
airplane_reads
containing the results of the function
run_simulate
. We then take airplane_reads
and
mutate
a numeric column reads_heuristic
using
purrr::map_dbl
. The result out
can then be
saved with saveRDS
.
out <- pars %>%
dplyr::mutate(
airplane_reads = purrr::pmap(select(., -replicate), run_simulation),
reads_heuristic = purrr::map_dbl(airplane_reads, first_day_n_reads, n = 10)
)
saveRDS(out, "airplane-simulations.rds")
By looking at the m_tot
and reads_heuristic
column, we can start to answer relevant questions like “what is the
effect of more sequencing effort on time to detection”?
out %>%
select(m_tot, reads_heuristic) %>%
ggplot(aes(x = forcats::as_factor(m_tot), y = reads_heuristic, col = log10(m_tot))) +
geom_boxplot(fill = NA, outlier.colour = NA, alpha = 0.1) +
geom_jitter(size = 0.5, width = 0.15, alpha = 0.5) +
labs(x = "Total sequencing amount", y = "First day with above 10 reads of pathogen of interest", col = "log10(Total sequencing amount)") +
theme_minimal()