nmode's Git Repositories - Rnaught/blob - R/seqB.R

   1 #' seqB method
   2 #'
   3 #' This function implements a sequential Bayesian estimation method of R0 due to
   4 #' Bettencourt and Riberio (PloS One, 2008). See details for important
   5 #' implementation notes.
   6 #'
   7 #' The method sets a uniform prior distribution on R0 with possible values
   8 #' between zero and \code{kappa}, discretized to a fine grid. The distribution
   9 #' of R0 is then updated sequentially, with one update for each new case count
  10 #' observation. The final estimate of R0 is \code{Rhat}, the mean of the (last)
  11 #' posterior distribution. The prior distribution is the initial belief of the
  12 #' distribution of R0, which is the uninformative uniform distribution with
  13 #' values between zero and \code{kappa}. Users can change the value of
  14 #' \code{kappa} only (i.e., the prior distribution cannot be changed from the
  15 #' uniform). As more case counts are observed, the influence of the prior
  16 #' distribution should lessen on the final estimate \code{Rhat}.
  17 #'
  18 #' This method is based on an approximation of the SIR model, which is most
  19 #' valid at the beginning of an epidemic. The method assumes that the mean of
  20 #' the serial distribution (sometimes called the serial interval) is known. The
  21 #' final estimate can be quite sensitive to this value, so sensitivity testing
  22 #' is strongly recommended. Users should be careful about units of time (e.g.,
  23 #' are counts observed daily or weekly?) when implementing.
  24 #'
  25 #' Our code has been modified to provide an estimate even if case counts equal
  26 #' to zero are present in some time intervals. This is done by grouping the
  27 #' counts over such periods of time. Without grouping, and in the presence of
  28 #' zero counts, no estimate can be provided.
  29 #'
  30 #' @param NT Vector of case counts.
  31 #' @param mu Mean of the serial distribution. This needs to match case counts in
  32 #'           time units. For example, if case counts are weekly and the serial
  33 #'           distribution has a mean of seven days, then \code{mu} should be set
  34 #'           to one. If case counts are daily and the serial distribution has a
  35 #'           mean of seven days, then \code{mu} should be set to seven.
  36 #' @param kappa Largest possible value of uniform prior (defaults to 20). This
  37 #'              describes the prior belief on ranges of R0, and should be set to
  38 #'              a higher value if R0 is believed to be larger.
  39 #'
  40 #' @return \code{seqB} returns a list containing the following components:
  41 #'         \code{Rhat} is the estimate of R0 (the posterior mean),
  42 #'         \code{posterior} is the posterior distribution of R0 from which
  43 #'         alternate estimates can be obtained (see examples), and \code{group}
  44 #'         is an indicator variable (if \code{group == TRUE}, zero values of NT
  45 #'         were input and grouping was done to obtain \code{Rhat}). The variable
  46 #'         \code{posterior} is returned as a list made up of \code{supp} (the
  47 #'         support of the distribution) and \code{pmf} (the probability mass
  48 #'         function).
  49 #'
  50 #' @examples
  51 #' # Weekly data.
  52 #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  53 #'
  54 #' ## Obtain R0 when the serial distribution has a mean of five days.
  55 #' res1 <- seqB(NT, mu = 5 / 7)
  56 #' res1$Rhat
  57 #'
  58 #' ## Obtain R0 when the serial distribution has a mean of three days.
  59 #' res2 <- seqB(NT, mu = 3 / 7)
  60 #' res2$Rhat
  61 #'
  62 #' # Compute posterior mode instead of posterior mean and plot.
  63 #'
  64 #' Rpost <- res1$posterior
  65 #' loc <- which(Rpost$pmf == max(Rpost$pmf))
  66 #' Rpost$supp[loc] # Posterior mode.
  67 #' res1$Rhat # Compare with the posterior mean.
  68 #'
  69 #' par(mfrow = c(2, 1), mar = c(2, 2, 1, 1))
  70 #'
  71 #' plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "")
  72 #' abline(h = 1 / (20 / 0.01 + 1), col = "red")
  73 #' abline(v = res1$Rhat, col = "blue")
  74 #' abline(v = Rpost$supp[loc], col = "purple")
  75 #' legend("topright",
  76 #'   legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"),
  77 #'   col = c("red", "black", "blue", "purple"), lty = 1)
  78 #'
  79 #' @export
  80 seqB <- function(NT, mu, kappa = 20) {
  81   if (length(NT) < 2)
  82     print("Warning: length of NT should be at least two.")
  83   else {
  84     if (min(NT) > 0) {
  85       times <- 1:length(NT)
  86       tau <- diff(times)
  87     }
  88     group <- FALSE
  89     if (min(NT) == 0) {
  90       times <- which(NT > 0)
  91       NT <- NT[times]
  92       tau <- diff(times)
  93       group <- TRUE
  94     }
  95
  96     R <- seq(0, kappa, 0.01)
  97     prior0 <- rep(1, kappa / 0.01 + 1)
  98     prior0 <- prior0 / sum(prior0)
  99     k <- length(NT) - 1
 100     R0.post <- matrix(0, nrow = k, ncol = length(R))
 101     prior <- prior0
 102     posterior <- seq(0, length(prior0))
 103     gamma <- 1 / mu
 104
 105     for (i in 1:k) {
 106       mm1 <- NT[i]
 107       mm2 <- NT[i + 1]
 108       lambda <- tau[i] * gamma * (R - 1)
 109       lambda <- log(mm1) + lambda
 110       loglik <- mm2 * lambda - exp(lambda)
 111       maxll <- max(loglik)
 112       const <- 0
 113
 114       if (maxll > 700)
 115         const <- maxll - 700
 116
 117       loglik <- loglik - const
 118       posterior <- exp(loglik) * prior
 119       posterior <- posterior / sum(posterior)
 120       prior <- posterior
 121     }
 122
 123     Rhat <- sum(R * posterior)
 124
 125     return(list(Rhat = Rhat,
 126                 posterior = list(supp = R, pmf = posterior),
 127                 group = group))
 128   }
 129 }