Skip to content

add more flexibility with delays #305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 42 commits into from
Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
a94f753
add more flexibility with delays
sbfnk Jul 21, 2022
404d04c
update man pages
sbfnk Jul 21, 2022
7bcf7a6
check if truncation mean is uncertain
sbfnk Jul 25, 2022
3553b49
start on static delays
sbfnk Sep 11, 2022
f075f1d
add generation time and truncation opts
sbfnk Sep 11, 2022
2c51993
update trunc_opts docs
sbfnk Sep 11, 2022
0479e32
add NEWS
sbfnk Sep 11, 2022
381ed5a
run documentation update
sbfnk Sep 11, 2022
c8f9c71
mention doc update in NEWS
sbfnk Sep 11, 2022
8e9c79d
optimise discrete pmfs
sbfnk Sep 10, 2022
cc31fc9
explicit zero for first element of gt
sbfnk Sep 11, 2022
564ac99
remove deprecated discretised gamma pmf
sbfnk Sep 11, 2022
e01a7a4
fix link
sbfnk Sep 11, 2022
84dd939
only revert after delta pmf
sbfnk Sep 11, 2022
33277f4
simplify discrete delta pmf
sbfnk Sep 11, 2022
ec536c5
implement fixed delays in stan model
sbfnk Sep 13, 2022
d2c00b7
clarify NEWS
sbfnk Sep 15, 2022
56c33ec
fix check for gt uncertainty
sbfnk Sep 16, 2022
8ae1cbf
create `gt_fixed` with the correct type
sbfnk Sep 19, 2022
63ba0ed
update simulation model for flexible delays
sbfnk Sep 19, 2022
93aa345
only calculate max fixed delay if we can
sbfnk Sep 19, 2022
f922fcf
use correct sd in gt
sbfnk Sep 30, 2022
c9b52d2
work out pmf parameters
sbfnk Sep 30, 2022
35a5838
don't reverse delay pmf
sbfnk Sep 30, 2022
f49443d
update docs
sbfnk Sep 30, 2022
8a0b944
use log parameters in lognormal
sbfnk Sep 30, 2022
32ba0b0
fix truncation parameters
sbfnk Sep 30, 2022
243cd99
fix no delay case
sbfnk Oct 5, 2022
0f79151
document missing options
sbfnk Oct 5, 2022
0361d02
adapt test to new function syntax
sbfnk Oct 5, 2022
ed003a8
change expected tests results (rounding differences)
sbfnk Oct 5, 2022
3f01a21
simplify treatment of delay distributions
sbfnk Oct 31, 2022
7c6bfa3
fixes following more thorough testing
sbfnk Nov 1, 2022
20593d0
more sensible default for max of distribution
sbfnk Nov 15, 2022
c3fc270
fix tests
sbfnk Nov 15, 2022
6b25de6
flexible generation time weights
sbfnk Nov 15, 2022
91a006c
size -> linewidth
sbfnk Nov 15, 2022
c353f3a
doc update
sbfnk Nov 15, 2022
6d031ed
bring size back for geom_point
sbfnk Nov 15, 2022
3d07292
update snaps
sbfnk Nov 15, 2022
aaf2d1e
update documentation
sbfnk Nov 15, 2022
0ede0b4
fix examples
sbfnk Nov 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add generation time and truncation opts
  • Loading branch information
sbfnk committed Oct 31, 2022
commit f075f1d1b240c2ec9f8ef46f1cbce5e3711a1670
50 changes: 2 additions & 48 deletions R/create.R
Original file line number Diff line number Diff line change
Expand Up @@ -398,54 +398,8 @@ create_stan_data <- function(reported_cases, generation_time,
## make sure we have at least max_gt seeding time
delays$seeding_time <- max(delays$seeding_time, generation_time$max)

## complete generation time parameters if not all are given
if (is.null(generation_time)) {
generation_time <- list(mean = 1)
}
for (param in c("mean_sd", "sd", "sd_sd")) {
if (!(param %in% names(generation_time))) generation_time[[param]] <- 0
}
## check if generation time is fixed
if (generation_time$sd == 0 && generation_time$sd_sd == 0) {
if (generation_time$mean_sd > 0) {
stop("Error in generation time definition: if sd_mean is 0 and ",
"sd_sd is 0 then mean_sd must be 0, too.")
}
if ("max" %in% names(generation_time)) {
if (generation_time$max != generation_time$mean) {
stop("Error in generation time defintion: if max_gt(",
generation_time$max_gt,
") is given it must be equal to the mean if it is fixed (",
generation_time$mean,
")")
}
} else {
generation_time$max_gt <- generation_time$mean
}
if (round(generation_time$mean) != generation_time$mean) {
stop(("Error: if using a fixed generation time it must be integer"))
}
}

## check if delay is fixed
for (i in seq_len(delays$delays)) {
if (delays$delay_sd_mean[i] == 0 && delays$delay_sd_sd[i] == 0) {
if (delays$delay_mean_sd[i] > 0) {
stop("Error in delay distribution definition: if sd_mean is 0 and ",
"sd_sd is 0 then mean_sd must be 0, too.")
}
if (delays$max_delay[i] != delays$delay_mean_mean[i]) {
stop("Error in delay defintion: if max_delay(",
delays$max_delay[i],
") is given it must be equal to the mean if it is fixed (",
delays$delay_mean_mean[i],
")")
}
if (round(delays$delay_mean_mean[i]) != delays$delay_mean_mean[i]) {
stop(("Error: if using a fixed delay it must be integer"))
}
}
}
## for backwards compatibility call generation_time_opts internally
generation_time <- do.call(generation_time_opts, generation_time)

cases <- reported_cases[(delays$seeding_time + 1):(.N - horizon)]$confirm

Expand Down
59 changes: 26 additions & 33 deletions R/estimate_infections.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,9 @@
#'
#' @param reported_cases A data frame of confirmed cases (confirm) by date
#' (date). confirm must be integer and date must be in date format.
#' @param generation_time The generation time distribution as parameters
#' of a discretised (upper-)truncated gamma delay
#' distributions, given as a list with the following parameters:
#' "mean", the mean generation time;
#' "mean_sd", the standard deviation in the estimate of "mean" parameter
#' (assumed normally distributed); "sd", the standard
#' deviation of the generation time; "sd_sd", the standard
#' deviation of the estimate of the "sd" parameter (assumed normally
#' distributed) sd_sd"; and "max", the maximum generation time.
#' The "mean" parameter is mandatory; if it is the only one given it represents
#' a fixed generation time and must be integer-valued; if "sd" is also
#' given and greater than 0 this represents a generation time distribution and
#' "mean" can be real-valued. In that case, "max" also needs to be given.
#' The "mean_sd" and "sd_sd" parameters should be provided to represent
#' uncertainty in the parameter values of the delay but are optional.
#' If this is set to NULL, a fixed generation time of 1 will be used, modelling
#' infections as an AR(1) process.
#' @param generation_time A call to `generation_time_opts()` defining the
#' generation time distribution used. For backwards compatibility a list of
#' summary parameters can also be passed.
#' @param delays A call to `delay_opts()` defining delay distributions and
#' options. See the documentation of `delay_opts()` and the examples below for
#' details.
Expand Down Expand Up @@ -70,19 +56,22 @@
#' reported_cases <- example_confirmed[1:60]
#'
#' # set up example generation time
#' generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
#' generation_time <- generation_time_opts(
#' disease = "SARS-CoV-2", source = "ganyani", fixed = TRUE
#' )
#' # set delays between infection and case report
#' incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")
#' incubation_period <- get_incubation_period(
#' disease = "SARS-CoV-2", source = "lauer"
#' )
#' reporting_delay <- list(
#' mean = convert_to_logmean(2, 1), mean_sd = 0.1,
#' sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 10
#' mean = convert_to_logmean(2, 1), mean_sd = 0,
#' sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10
#' )
#'
#' # default setting
#' # here we assume that the observed data is truncated by the same delay as
#' # default settings but assuming that delays are fixed rather than uncertain
#' def <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
#' rt = rt_opts(prior = list(mean = 2, sd = 0.1)),
#' stan = stan_opts(control = list(adapt_delta = 0.95))
#' )
Expand All @@ -95,7 +84,7 @@
#' # These settings are an area of active research. See ?gp_opts for details.
#' agp <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
#' rt = rt_opts(prior = list(mean = 2, sd = 0.1)),
#' gp = gp_opts(ls_min = 10, basis_prop = 0.1),
#' stan = stan_opts(control = list(adapt_delta = 0.95))
Expand All @@ -106,7 +95,7 @@
#' # Adjusting for future susceptible depletion
#' dep <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
#' rt = rt_opts(
#' prior = list(mean = 2, sd = 0.1),
#' pop = 1000000, future = "latest"
Expand All @@ -118,15 +107,15 @@
#'
#' # Adjusting for truncation of the most recent data
#' # See estimate_truncation for an approach to estimating this from data
#' trunc_dist <- list(
#' trunc_dist <- trunc_opts(
#' mean = convert_to_logmean(0.5, 0.5), mean_sd = 0.1,
#' sd = convert_to_logsd(0.5, 0.5), sd_sd = 0.1,
#' max = 3
#' )
#' trunc <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
#' truncation = trunc_opts(trunc_dist),
#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
#' truncation = trunc_dist
#' rt = rt_opts(prior = list(mean = 2, sd = 0.1)),
#' gp = gp_opts(ls_min = 10, basis_prop = 0.1),
#' stan = stan_opts(control = list(adapt_delta = 0.95))
Expand All @@ -141,7 +130,7 @@
#' # other options
#' backcalc <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
#' rt = NULL, backcalc = backcalc_opts(),
#' obs = obs_opts(scale = list(mean = 0.4, sd = 0.05)),
#' horizon = 0
Expand All @@ -151,7 +140,7 @@
#' # Rt projected into the future using the Gaussian process
#' project_rt <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
#' rt = rt_opts(
#' prior = list(mean = 2, sd = 0.1),
#' future = "project"
Expand All @@ -163,12 +152,13 @@
#' snapshot_cases <- example_confirmed[80:130]
#' snapshot <- estimate_infections(snapshot_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
#' rt = rt_opts(prior = list(mean = 1, sd = 0.1))
#' )
#' plot(snapshot)
#'
#' # stationary Rt assumption (likely to provide biased real-time estimates)
#' # with uncertain reporting delays
#' stat <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
Expand All @@ -177,6 +167,7 @@
#' plot(stat)
#'
#' # no gaussian process (i.e fixed Rt assuming no breakpoints)
#' # with uncertain reporting delays
#' fixed <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
Expand All @@ -189,6 +180,7 @@
#' plot(no_delay)
#'
#' # break point but otherwise static Rt
#' # with uncertain reporting delays
#' bp_cases <- data.table::copy(reported_cases)
#' bp_cases <- bp_cases[, breakpoint := ifelse(date == as.Date("2020-03-16"), 1, 0)]
#' bkp <- estimate_infections(bp_cases,
Expand All @@ -202,6 +194,7 @@
#' plot(bkp)
#'
#' # weekly random walk
#' # with uncertain reporting delays
#' rw <- estimate_infections(reported_cases,
#' generation_time = generation_time,
#' delays = delay_opts(incubation_period, reporting_delay),
Expand All @@ -216,7 +209,7 @@
#' options(old_opts)
#' }
estimate_infections <- function(reported_cases,
generation_time,
generation_time = generation_time_opts(),
delays = delay_opts(),
truncation = trunc_opts(),
rt = rt_opts(),
Expand Down
129 changes: 100 additions & 29 deletions R/opts.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,77 @@
#' Generation Time Distribution Options
#'
#' @description `r lifecycle::badge("stable")`
#' Returns generation time parameters in a format for lower level model use.
#' @param mean Numeric, defaults to 1. If the only non-zero summary parameter
#' then this is the fixed interval of the generation time. If the `sd` is
#' non-zero then this is the mean of a gamma distribution.
#' @param sd Numeric, defaults to 0. Sets the standard deviation for a gamma
#' distribution generation time.
#' @param mean_sd Numeric, defaults to 0. The prior uncertainty for the mean
#' of the generation time.
#' @param sd_sd Numeric, defaults to 0. The prior uncertainty for the standard
#' deviation of the generation time.
#' @param ... Delay distributions as a list with the following parameters:
#' "mean", "mean_sd", "sd_mean", "sd_sd", and "max" defining a truncated log
#' normal (with all parameters except for max defined in logged form).
#' @seealso convert_to_logmean convert_to_logsd bootstrapped_dist_fit
#' @return A list summarising the input delay distributions.
#' @export
#' @examples
#' # default settings with a fixed generation time
#' generation_time_opts()
#'
#' # A fixed gamma distributed generation time
#' generation_time_opts(mean = 3, sd = 2)
#'
#' # An uncertain gamma distributed generation time
#' generation_time_opts(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5)
generation_time_opts <- function(mean = 1, mean_sd = 0, sd = 0, sd_sd = 0,
max = 15, fixed = FALSE, disease, source) {
if (missing(disease) & missing(source)) {
gt <- list(
gt_mean = mean,
gt_mean_sd = mean_sd,
gt_sd = sd,
gt_sd_sd = sd_sd,
gt_max = max,
gt_fixed = fixed
)
}else{
gt <- get_generation_time(
disease = disease, source = source, max_value = max
)
gt$gt_fixed <- fixed
}


## check if generation time is fixed
if (gt$gt_sd == 0 && gt$gt_sd_sd == 0) {
if (gt$gt_mean %% 1 != 0) {
stop(
"When the generation time is set to a constant it must be an integer"
)
}
if (gt$gt_max != gt$gt_mean) {
gt$gt_max <- gt$gt_mean
}
if (any(gt$gt_mean_sd > 0, gt$gt_sd_sd > 0)) {
stop("Error in generation time definition: if sd_mean is 0 and ",
"sd_sd is 0 then mean_sd must be 0, too.")
}
gt$gt_pmf <- c(1, rep(0, gt$gt_max - 1))
}else{
gt$pmf <- discretised_gamma_pmf(
mean = gt$gt_mean, sd = gt$gt_sd, max_d = gt$gt_max, zero_pad = 1,
reverse = TRUE
)
}
if (gt$gt_sd_sd == 0 & gt$gt_mean_sd == 0) {
gt$fixed <- TRUE
}
return(gt)
}

#' Delay Distribution Options
#'
#' @description `r lifecycle::badge("stable")`
Expand All @@ -17,6 +91,9 @@
#' real-valued. In that case, "max" also needs to be given.
#' The "mean_sd" and "sd_sd" parameters should be provided to represent
#' uncertainty in the parameter values of the delay but are optional.
#' @param fixed Logical, defaults to `FALSE`. Should reporting delays be treated
#' as coming from fixed (vs uncertain) distributions. Making this simplification
#' drastically reduces compute requirements.
#' @seealso convert_to_logmean convert_to_logsd bootstrapped_dist_fit
#' @return A list summarising the input delay distributions.
#' @export
Expand Down Expand Up @@ -82,41 +159,35 @@ delay_opts <- function(..., fixed = FALSE) {
#' Truncation Distribution Options
#'
#' @description `r lifecycle::badge("stable")`
#' Returns a truncation distribution formatted for usage by downstream functions. See
#' `estimate_truncation` for an approach to estimate this distribution.
#' @param dist A list defining the truncation distribution as parameters of a
#' discretised (upper-)truncated lognormal density distribution; defaults to
#' `NULL` in which case no truncation is used. Otherwise it defines the
#' truncation distributions as a list with the following parameters:
#' "mean", the mu parameter or mean of the natural logarithm of the truncation;
#' "mean_sd", the standard deviation in the estimate of "mean" parameter
#' (assumed normally distributed); "sd", the sigma parameter or standard
#' deviation of the natural logarithm of the truncation; "sd_sd", the standard
#' deviation of the estimate of the "sd" parameter (assumed normally
#' distributed) sd_sd"; and "max", the maximum truncation.
#' The "mean" and "sd" and "max" parameters are mandatory;
#' the "mean_sd" and "sd_sd"
#' parameters should be provided to represent
#' uncertainty in the parameter values of the delay but are optional.
#' Returns a log-normal truncation distribution formatted for usage by
#' downstream functions. See `estimate_truncation()` for an approach to
#' estimate this distribution.
#' @param mean Numeric, defaults to 0. Mean on the log scale of the truncation
#' distribution
#' @param sd Numeric, defaults to 0. Sets the standard deviation for the log
#' normal truncation distribution
#' @param mean_sd Numeric, defaults to 0. The prior uncertainty for the log
#' normal truncation distribution.
#' @param sd_sd Numeric, defaults to 0. The prior uncertainty for the standard
#' deviation of the log normal truncation distribution.
#' @seealso convert_to_logmean convert_to_logsd bootstrapped_dist_fit
#' @return A list summarising the input truncation distribution.
#' @export
#' @examples
#' # no truncation
#' trunc_opts()
trunc_opts <- function(dist = NULL) {
#'
#' # truncation dist
#' trunc_opts(mean = 3, sd = 2)
trunc_opts <- function(mean = 0 , sd = 0, mean_sd = 0, sd_sd = 0, max = 0) {
present <- !(mean == 0 && sd == 0 && max == 0)
data <- list()
data$truncation <- ifelse(is.null(dist), 0, 1)
if (data$truncation) {
for (param in c("mean_sd", "sd_sd")) {
if (!(param %in% names(dist))) dist[[param]] <- 0
}
}
data$trunc_mean_mean <- allocate_delays(dist$mean, data$truncation)
data$trunc_mean_sd <- allocate_delays(dist$mean_sd, data$truncation)
data$trunc_sd_mean <- allocate_delays(dist$sd, data$truncation)
data$trunc_sd_sd <- allocate_delays(dist$sd_sd, data$truncation)
data$max_truncation <- allocate_delays(dist$max, data$truncation)
data$truncation <- as.numeric(present)
data$trunc_mean_mean <- ifelse(present, mean, numeric())
data$trunc_mean_sd <- ifelse(present, mean_sd, numeric())
data$trunc_sd_mean <- ifelse(present, sd, numeric())
data$trunc_sd_sd <- ifelse(present, sd_sd, numeric())
data$max_truncation <- ifelse(present, max, numeric())
return(data)
}

Expand Down Expand Up @@ -291,7 +362,7 @@ gp_opts <- function(basis_prop = 0.2,
#' model. Custom settings can be supplied which override the defaults.
#' @param family Character string defining the observation model. Options are
#' Negative binomial ("negbin"), the default, and Poisson.
#' @param phi A numeric vector of length 2, defaults to 0, 1. Indicates the
#' @param phi A numeric vector of length 2, defaults to 0, 1. Indicates the
#' mean and standard deviation of the normal prior used for the observation
#' process.
#' @param weight Numeric, defaults to 1. Weight to give the observed data in
Expand Down
Loading