nmode's Git Repositories - Rnaught/blob - R/seq_bayes.R

   1 #' Sequential Bayes (seqB)
   2 #'
   3 #' This function implements a sequential Bayesian estimation method of R0 due to
   4 #' Bettencourt and Riberio (PloS One, 2008). See details for important
   5 #' implementation notes.
   6 #'
   7 #' The method sets a uniform prior distribution on R0 with possible values
   8 #' between `0` and `kappa`, discretized to a fine grid. The distribution of R0
   9 #' is then updated sequentially, with one update for each new case count
  10 #' observation. The final estimate of R0 is the mean of the (last) posterior
  11 #' distribution. The prior distribution is the initial belief of the
  12 #' distribution of R0, which is the uninformative uniform distribution with
  13 #' values between `0` and `kappa`. Users can change the value of `kappa` only
  14 #' (i.e., the prior distribution cannot be changed from the uniform). As more
  15 #' case counts are observed, the influence of the prior distribution should
  16 #' lessen on the final estimate.
  17 #'
  18 #' This method is based on an approximation of the SIR model, which is most
  19 #' valid at the beginning of an epidemic. The method assumes that the mean of
  20 #' the serial distribution (sometimes called the serial interval) is known. The
  21 #' final estimate can be quite sensitive to this value, so sensitivity testing
  22 #' is strongly recommended. Users should be careful about units of time (e.g.,
  23 #' are counts observed daily or weekly?) when implementing.
  24 #'
  25 #' Our code has been modified to provide an estimate even if case counts equal
  26 #' to zero are present in some time intervals. This is done by grouping the
  27 #' counts over such periods of time. Without grouping, and in the presence of
  28 #' zero counts, no estimate can be provided.
  29 #'
  30 #' @param cases Vector of case counts. The vector must only contain non-negative
  31 #'   integers, and have at least two positive integers.
  32 #' @param mu Mean of the serial distribution. This must be a positive number.
  33 #'   The value should match the case counts in time units. For example, if case
  34 #'   counts are weekly and the serial distribution has a mean of seven days,
  35 #'   then `mu` should be set to `1`. If case counts are daily and the serial
  36 #'   distribution has a mean of seven days, then `mu` should be set to `7`.
  37 #' @param kappa Largest possible value of the uniform prior (defaults to `20`).
  38 #'   This must be a number greater than or equal to `1`. It describes the prior
  39 #'   belief on the ranges of R0, and should be set to a higher value if R0 is
  40 #'   believed to be larger.
  41 #' @param post Whether to return the posterior distribution of R0 instead of the
  42 #'   estimate of R0 (defaults to `FALSE`). This must be a value identical to
  43 #'   `TRUE` or `FALSE`.
  44 #'
  45 #' @return If `post` is identical to `TRUE`, a list containing the following
  46 #'   components is returned:
  47 #'   * `supp` - the support of the posterior distribution of R0
  48 #'   * `pmf` - the probability mass function of the posterior distribution of R0
  49 #'
  50 #'   Otherwise, if `post` is identical to `FALSE`, only the estimate of R0 is
  51 #'   returned. Note that the estimate is equal to `sum(supp * pmf)` (i.e., the
  52 #'   posterior mean).
  53 #'
  54 #' @references [Bettencourt and Riberio (PloS One, 2008)](
  55 #'   https://doi.org/10.1371/journal.pone.0002185)
  56 #'
  57 #' @seealso `vignette("seq_bayes_post", package = "Rnaught")` for examples of
  58 #'   using the posterior distribution.
  59 #'
  60 #' @export
  61 #'
  62 #' @examples
  63 #' # Weekly data.
  64 #' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  65 #'
  66 #' # Obtain R0 when the serial distribution has a mean of five days.
  67 #' seq_bayes(cases, mu = 5 / 7)
  68 #'
  69 #' # Obtain R0 when the serial distribution has a mean of three days.
  70 #' seq_bayes(cases, mu = 3 / 7)
  71 #'
  72 #' # Obtain R0 when the serial distribution has a mean of seven days, and R0 is
  73 #' # believed to be at most 4.
  74 #' estimate <- seq_bayes(cases, mu = 1, kappa = 4)
  75 #'
  76 #' # Same as above, but return the posterior distribution of R0 instead of the
  77 #' # estimate.
  78 #' posterior <- seq_bayes(cases, mu = 1, kappa = 4, post = TRUE)
  79 #'
  80 #' # Display the support and probability mass function of the posterior.
  81 #' posterior$supp
  82 #' posterior$pmf
  83 #'
  84 #' # Note that the following always holds:
  85 #' estimate == sum(posterior$supp * posterior$pmf)
  86 seq_bayes <- function(cases, mu, kappa = 20, post = FALSE) {
  87   if (any(cases == 0)) {
  88     times <- which(cases > 0)
  89     if (length(times) < 2) {
  90       stop("Vector of case counts must contain at least two positive integers.")
  91     }
  92     cases <- cases[times]
  93   } else {
  94     times <- seq_along(cases)
  95   }
  96
  97   support <- seq(0, kappa, 0.01)
  98   tau <- diff(times)
  99
 100   prior <- rep(1, kappa / 0.01 + 1)
 101   prior <- prior / sum(prior)
 102   posterior <- seq(0, length(prior))
 103
 104   for (i in seq_len(length(cases) - 1)) {
 105     lambda <- tau[i] / mu * (support - 1) + log(cases[i])
 106     log_like <- cases[i + 1] * lambda - exp(lambda)
 107     max_log_like <- max(log_like)
 108
 109     if (max_log_like > 700) {
 110       log_like <- log_like - max_log_like + 700
 111     }
 112
 113     posterior <- exp(log_like) * prior
 114     posterior <- posterior / sum(posterior)
 115     prior <- posterior
 116   }
 117
 118   if (!post) {
 119     return(sum(support * posterior))
 120   }
 121   list(supp = support, pmf = posterior)
 122 }