add generation time and truncation opts

epiforecasts · sbfnk · Nov 18, 2022 · Jul 21, 2022 · Jul 21, 2022 · Jul 25, 2022
commit f075f1d1b240c2ec9f8ef46f1cbce5e3711a1670
diff --git a/R/create.R b/R/create.R
@@ -398,54 +398,8 @@ create_stan_data <- function(reported_cases, generation_time,
   ## make sure we have at least max_gt seeding time
   delays$seeding_time <- max(delays$seeding_time, generation_time$max)
 
-  ## complete generation time parameters if not all are given
-  if (is.null(generation_time)) {
-    generation_time <- list(mean = 1)
-  }
-  for (param in c("mean_sd", "sd", "sd_sd")) {
-    if (!(param %in% names(generation_time))) generation_time[[param]] <- 0
-  }
-  ## check if generation time is fixed
-  if (generation_time$sd == 0 && generation_time$sd_sd == 0) {
-    if (generation_time$mean_sd > 0) {
-      stop("Error in generation time definition: if sd_mean is 0 and ",
-           "sd_sd is 0 then mean_sd must be 0, too.")
-    }
-    if ("max" %in% names(generation_time)) {
-      if (generation_time$max != generation_time$mean) {
-        stop("Error in generation time defintion: if max_gt(",
-             generation_time$max_gt,
-             ") is given it must be equal to the mean if it is fixed (",
-             generation_time$mean,
-             ")")
-      }
-    } else {
-      generation_time$max_gt <- generation_time$mean
-    }
-    if (round(generation_time$mean) != generation_time$mean) {
-      stop(("Error: if using a fixed generation time it must be integer"))
-    }
-  }
-
-  ## check if delay is fixed
-  for (i in seq_len(delays$delays)) {
-    if (delays$delay_sd_mean[i] == 0 && delays$delay_sd_sd[i] == 0) {
-      if (delays$delay_mean_sd[i] > 0) {
-        stop("Error in delay distribution definition: if sd_mean is 0 and ",
-             "sd_sd is 0 then mean_sd must be 0, too.")
-      }
-      if (delays$max_delay[i] != delays$delay_mean_mean[i]) {
-        stop("Error in delay defintion: if max_delay(",
-             delays$max_delay[i],
-             ") is given it must be equal to the mean if it is fixed (",
-             delays$delay_mean_mean[i],
-             ")")
-      }
-      if (round(delays$delay_mean_mean[i]) != delays$delay_mean_mean[i]) {
-        stop(("Error: if using a fixed delay it must be integer"))
-      }
-    }
-  }
+  ## for backwards compatibility call generation_time_opts internally
+  generation_time <- do.call(generation_time_opts, generation_time)
 
   cases <- reported_cases[(delays$seeding_time + 1):(.N - horizon)]$confirm
 

diff --git a/R/estimate_infections.R b/R/estimate_infections.R
@@ -19,23 +19,9 @@
 #' 
 #' @param reported_cases A data frame of confirmed cases (confirm) by date
 #' (date). confirm must be integer and date must be in date format.
-#' @param generation_time The generation time distribution as parameters
-#' of a discretised (upper-)truncated gamma delay
-#' distributions, given as a list with the following parameters:
-#' "mean", the mean generation time;
-#' "mean_sd", the standard deviation in the estimate of "mean" parameter
-#' (assumed normally distributed); "sd", the standard
-#' deviation of the generation time; "sd_sd", the standard
-#' deviation of the estimate of the "sd" parameter (assumed normally
-#' distributed) sd_sd"; and "max", the maximum generation time.
-#' The "mean" parameter is mandatory; if it is the only one given it represents
-#' a fixed generation time and must be integer-valued; if "sd" is also
-#' given and greater than 0 this represents a generation time distribution and
-#' "mean" can be real-valued. In that case, "max" also needs to be given.
-#' The "mean_sd" and "sd_sd" parameters should be provided to represent
-#' uncertainty in the parameter values of the delay but are optional.
-#' If this is set to NULL, a fixed generation time of 1 will be used, modelling
-#' infections as an AR(1) process.
+#' @param generation_time A call to `generation_time_opts()` defining the
+#' generation time distribution used. For backwards compatibility a list of
+#' summary parameters can also be passed.
 #' @param delays A call to `delay_opts()` defining delay distributions and
 #' options. See the documentation of `delay_opts()` and the examples below for
 #' details.
@@ -70,19 +56,22 @@
 #' reported_cases <- example_confirmed[1:60]
 #'
 #' # set up example generation time
-#' generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
+#' generation_time <- generation_time_opts(
+#'  disease = "SARS-CoV-2", source = "ganyani", fixed = TRUE
+#' )
 #' # set delays between infection and case report
-#' incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")
+#' incubation_period <- get_incubation_period(
+#'  disease = "SARS-CoV-2", source = "lauer"
+#' )
 #' reporting_delay <- list(
-#'   mean = convert_to_logmean(2, 1), mean_sd = 0.1,
-#'   sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 10
+#'   mean = convert_to_logmean(2, 1), mean_sd = 0,
+#'   sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10
 #' )
 #'
-#' # default setting
-#' # here we assume that the observed data is truncated by the same delay as
+#' # default settings but assuming that delays are fixed rather than uncertain
 #' def <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
-#'   delays = delay_opts(incubation_period, reporting_delay),
+#'   delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
 #'   rt = rt_opts(prior = list(mean = 2, sd = 0.1)),
 #'   stan = stan_opts(control = list(adapt_delta = 0.95))
 #' )
@@ -95,7 +84,7 @@
 #' # These settings are an area of active research. See ?gp_opts for details.
 #' agp <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
-#'   delays = delay_opts(incubation_period, reporting_delay),
+#'   delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
 #'   rt = rt_opts(prior = list(mean = 2, sd = 0.1)),
 #'   gp = gp_opts(ls_min = 10, basis_prop = 0.1),
 #'   stan = stan_opts(control = list(adapt_delta = 0.95))
@@ -106,7 +95,7 @@
 #' # Adjusting for future susceptible depletion
 #' dep <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
-#'   delays = delay_opts(incubation_period, reporting_delay),
+#'   delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
 #'   rt = rt_opts(
 #'     prior = list(mean = 2, sd = 0.1),
 #'     pop = 1000000, future = "latest"
@@ -118,15 +107,15 @@
 #'
 #' # Adjusting for truncation of the most recent data
 #' # See estimate_truncation for an approach to estimating this from data
-#' trunc_dist <- list(
+#' trunc_dist <- trunc_opts(
 #'   mean = convert_to_logmean(0.5, 0.5), mean_sd = 0.1,
 #'   sd = convert_to_logsd(0.5, 0.5), sd_sd = 0.1,
 #'   max = 3
 #' )
 #' trunc <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
-#'   delays = delay_opts(incubation_period, reporting_delay),
-#'   truncation = trunc_opts(trunc_dist),
+#'   delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
+#'   truncation = trunc_dist
 #'   rt = rt_opts(prior = list(mean = 2, sd = 0.1)),
 #'   gp = gp_opts(ls_min = 10, basis_prop = 0.1),
 #'   stan = stan_opts(control = list(adapt_delta = 0.95))
@@ -141,7 +130,7 @@
 #' # other options
 #' backcalc <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
-#'   delays = delay_opts(incubation_period, reporting_delay),
+#'   delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
 #'   rt = NULL, backcalc = backcalc_opts(),
 #'   obs = obs_opts(scale = list(mean = 0.4, sd = 0.05)),
 #'   horizon = 0
@@ -151,7 +140,7 @@
 #' # Rt projected into the future using the Gaussian process
 #' project_rt <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
-#'   delays = delay_opts(incubation_period, reporting_delay),
+#'   delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
 #'   rt = rt_opts(
 #'     prior = list(mean = 2, sd = 0.1),
 #'     future = "project"
@@ -163,12 +152,13 @@
 #' snapshot_cases <- example_confirmed[80:130]
 #' snapshot <- estimate_infections(snapshot_cases,
 #'   generation_time = generation_time,
-#'   delays = delay_opts(incubation_period, reporting_delay),
+#'   delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE),
 #'   rt = rt_opts(prior = list(mean = 1, sd = 0.1))
 #' )
 #' plot(snapshot)
 #'
 #' # stationary Rt assumption (likely to provide biased real-time estimates)
+#' # with uncertain reporting delays
 #' stat <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
 #'   delays = delay_opts(incubation_period, reporting_delay),
@@ -177,6 +167,7 @@
 #' plot(stat)
 #'
 #' # no gaussian process (i.e fixed Rt assuming no breakpoints)
+#' # with uncertain reporting delays
 #' fixed <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
 #'   delays = delay_opts(incubation_period, reporting_delay),
@@ -189,6 +180,7 @@
 #' plot(no_delay)
 #'
 #' # break point but otherwise static Rt
+#' # with uncertain reporting delays
 #' bp_cases <- data.table::copy(reported_cases)
 #' bp_cases <- bp_cases[, breakpoint := ifelse(date == as.Date("2020-03-16"), 1, 0)]
 #' bkp <- estimate_infections(bp_cases,
@@ -202,6 +194,7 @@
 #' plot(bkp)
 #'
 #' # weekly random walk
+#' # with uncertain reporting delays
 #' rw <- estimate_infections(reported_cases,
 #'   generation_time = generation_time,
 #'   delays = delay_opts(incubation_period, reporting_delay),
@@ -216,7 +209,7 @@
 #' options(old_opts)
 #' }
 estimate_infections <- function(reported_cases,
-                                generation_time,
+                                generation_time = generation_time_opts(),
                                 delays = delay_opts(),
                                 truncation = trunc_opts(),
                                 rt = rt_opts(),

diff --git a/R/opts.R b/R/opts.R
@@ -1,3 +1,77 @@
+#' Generation Time Distribution Options
+#'
+#' @description `r lifecycle::badge("stable")`
+#' Returns generation time parameters in a format for lower level model use.
+#' @param mean Numeric, defaults to 1. If the only non-zero summary parameter
+#' then  this is the fixed interval of the generation time. If the `sd` is
+#' non-zero then this is the mean of a gamma distribution.
+#' @param sd Numeric, defaults to 0. Sets the standard deviation for a gamma
+#' distribution generation time.
+#' @param mean_sd Numeric, defaults to 0. The prior uncertainty for the mean
+#' of the generation time.
+#' @param sd_sd Numeric, defaults to 0. The prior uncertainty for the standard
+#' deviation of the generation time.
+#' @param ... Delay distributions as a list with the following parameters:
+#' "mean", "mean_sd", "sd_mean", "sd_sd", and "max" defining a truncated log
+#' normal (with all parameters except for max defined in logged form).
+#' @seealso convert_to_logmean convert_to_logsd bootstrapped_dist_fit
+#' @return A list summarising the input delay distributions.
+#' @export
+#' @examples
+#' # default settings with a fixed generation time
+#' generation_time_opts()
+#'
+#' # A fixed gamma distributed generation time
+#' generation_time_opts(mean = 3, sd = 2)
+#'
+#' # An uncertain gamma distributed generation time
+#' generation_time_opts(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5)
+generation_time_opts <- function(mean = 1, mean_sd = 0, sd = 0, sd_sd = 0,
+                                 max = 15, fixed = FALSE, disease, source) {
+  if (missing(disease) & missing(source)) {
+    gt <- list(
+      gt_mean = mean,
+      gt_mean_sd = mean_sd,
+      gt_sd = sd,
+      gt_sd_sd = sd_sd,
+      gt_max = max,
+      gt_fixed = fixed
+    )
+  }else{
+    gt <- get_generation_time(
+      disease = disease, source = source, max_value = max
+    )
+    gt$gt_fixed <- fixed
+  }
+
+
+  ## check if generation time is fixed
+  if (gt$gt_sd == 0 && gt$gt_sd_sd == 0) {
+    if (gt$gt_mean %% 1 != 0) {
+      stop(
+        "When the generation time is set to a constant it must be an integer"
+      )
+    }
+    if (gt$gt_max != gt$gt_mean) {
+      gt$gt_max <- gt$gt_mean
+    }
+    if (any(gt$gt_mean_sd > 0, gt$gt_sd_sd > 0)) {
+      stop("Error in generation time definition: if sd_mean is 0 and ",
+           "sd_sd is 0 then mean_sd must be 0, too.")
+    }
+    gt$gt_pmf <- c(1, rep(0, gt$gt_max - 1))
+  }else{
+    gt$pmf <- discretised_gamma_pmf(
+      mean = gt$gt_mean, sd = gt$gt_sd, max_d = gt$gt_max, zero_pad = 1,
+      reverse = TRUE
+    )
+  }
+  if (gt$gt_sd_sd == 0 & gt$gt_mean_sd == 0) {
+    gt$fixed <- TRUE
+  }
+  return(gt)
+}
+
 #' Delay Distribution Options
 #'
 #' @description `r lifecycle::badge("stable")`
@@ -17,6 +91,9 @@
 #' real-valued. In that case, "max" also needs to be given.
 #' The "mean_sd" and "sd_sd" parameters should be provided to represent
 #' uncertainty in the parameter values of the delay but are optional.
+#' @param fixed Logical, defaults to `FALSE`. Should reporting delays be treated
+#' as coming from fixed (vs uncertain) distributions. Making this simplification
+#' drastically reduces compute requirements.
 #' @seealso convert_to_logmean convert_to_logsd bootstrapped_dist_fit
 #' @return A list summarising the input delay distributions.
 #' @export
@@ -82,41 +159,35 @@ delay_opts <- function(..., fixed = FALSE) {
 #' Truncation Distribution Options
 #'
 #' @description `r lifecycle::badge("stable")`
-#' Returns a truncation distribution formatted for usage by downstream functions. See
-#' `estimate_truncation` for an approach to estimate this distribution.
-#' @param dist A list defining the truncation distribution as parameters of a
-#' discretised (upper-)truncated lognormal density distribution; defaults to
-#' `NULL` in which case no truncation is used. Otherwise it defines the
-#' truncation distributions as a list with the following parameters:
-#' "mean", the mu parameter or mean of the natural logarithm of the truncation;
-#' "mean_sd", the standard deviation in the estimate of "mean" parameter
-#' (assumed normally distributed); "sd", the sigma parameter or standard
-#' deviation of the natural logarithm of the truncation; "sd_sd", the standard
-#' deviation of the estimate of the "sd" parameter (assumed normally
-#' distributed) sd_sd"; and "max", the maximum truncation.
-#' The "mean" and "sd" and "max" parameters are mandatory;
-#' the "mean_sd" and "sd_sd"
-#' parameters should be provided to represent
-#' uncertainty in the parameter values of the delay but are optional.
+#' Returns a log-normal truncation distribution formatted for usage by
+#'  downstream functions. See `estimate_truncation()` for an approach to
+#'  estimate this distribution.
+#' @param mean Numeric, defaults to 0. Mean on the log scale of the truncation
+#' distribution
+#' @param sd Numeric, defaults to 0. Sets the standard deviation for the log
+#' normal truncation distribution
+#' @param mean_sd Numeric, defaults to 0. The prior uncertainty for the log
+#' normal truncation distribution.
+#' @param sd_sd Numeric, defaults to 0. The prior uncertainty for the standard
+#' deviation of the  log normal truncation distribution.
 #' @seealso convert_to_logmean convert_to_logsd bootstrapped_dist_fit
 #' @return A list summarising the input truncation distribution.
 #' @export
 #' @examples
 #' # no truncation
 #' trunc_opts()
-trunc_opts <- function(dist = NULL) {
+#'
+#' # truncation dist
+#' trunc_opts(mean = 3, sd = 2)
+trunc_opts <- function(mean = 0 , sd = 0, mean_sd = 0, sd_sd = 0, max = 0) {
+  present <- !(mean == 0 && sd == 0 && max == 0)
   data <- list()
-  data$truncation <- ifelse(is.null(dist), 0, 1)
-  if (data$truncation) {
-    for (param in c("mean_sd", "sd_sd")) {
-      if (!(param %in% names(dist))) dist[[param]] <- 0
-    }
-  }
-  data$trunc_mean_mean <- allocate_delays(dist$mean, data$truncation)
-  data$trunc_mean_sd <- allocate_delays(dist$mean_sd, data$truncation)
-  data$trunc_sd_mean <- allocate_delays(dist$sd, data$truncation)
-  data$trunc_sd_sd <- allocate_delays(dist$sd_sd, data$truncation)
-  data$max_truncation <- allocate_delays(dist$max, data$truncation)
+  data$truncation <- as.numeric(present)
+  data$trunc_mean_mean <- ifelse(present, mean, numeric())
+  data$trunc_mean_sd <- ifelse(present, mean_sd, numeric())
+  data$trunc_sd_mean <- ifelse(present, sd, numeric())
+  data$trunc_sd_sd <- ifelse(present, sd_sd, numeric())
+  data$max_truncation <- ifelse(present, max, numeric())
   return(data)
 }
 
@@ -291,7 +362,7 @@ gp_opts <- function(basis_prop = 0.2,
 #' model. Custom settings can be supplied which override the defaults.
 #' @param family Character string defining the observation model. Options are
 #' Negative binomial ("negbin"), the default, and Poisson.
-#' @param phi A numeric vector of length 2, defaults to 0, 1. Indicates the 
+#' @param phi A numeric vector of length 2, defaults to 0, 1. Indicates the
 #' mean and standard deviation of the normal prior used for the observation
 #' process.
 #' @param weight Numeric, defaults to 1. Weight to give the observed data in