nmode's Git Repositories - Rnaught/blob - R/seqB.R

   1 #' seqB method
   2 #'
   3 #' This function implements a sequential Bayesian estimation method of R0 due to Bettencourt and Riberio (PloS One, 2008).
   4 #' See details for important implementation notes.
   5 #'
   6 #' The method sets a uniform prior distribution on R0 with possible values between zero and \code{kappa}, discretized to a fine grid.
   7 #' The distribution of R0 is then updated sequentially, with one update for each new case count observation.
   8 #' The final estimate of R0 is \code{Rhat}, the mean of the (last) posterior distribution.
   9 #' The prior distribution is the initial belief of the distribution of R0; which in this implementation is the uninformative uniform
  10 #' distribution with values between zero and \code{kappa}. Users can change the value of kappa only (ie. the prior distribution
  11 #' cannot be changed from the uniform).  As more case counts are observed, the influence of the prior distribution should lessen on
  12 #' the final estimate \code{Rhat}.
  13 #'
  14 #' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes
  15 #' that the mean of the serial distribution (sometimes called the serial interval) is known. The final estimate can be quite sensitive
  16 #' to this value, so sensitivity testing is strongly recommended. Users should be careful about units of time (e.g. are counts observed
  17 #' daily or weekly?) when implementing.
  18 #'
  19 #' Our code has been modified to provide an estimate even if case counts equal to zero are present in some time intervals. This is done
  20 #' by grouping the counts over such periods of time. Without grouping, and in the presence of zero counts, no estimate can be provided.
  21 #'
  22 #' @param NT Vector of case counts
  23 #' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are
  24 #'           weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one, if case
  25 #'           counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven)
  26 #' @param kappa Largest possible value of uniform prior, defaults to 20. This describes the prior belief on ranges of R0,
  27 #'              so should be set to a higher value if R0 is believed to be larger.
  28 #'
  29 #' @return secB returns a list containing the following components: \code{Rhat} is the estimate of R0 (the posterior mean),
  30 #'              \code{posterior} is the posterior distribution of R0 from which alternate estimates can be obtained (see examples),
  31 #'              \code{group} is an indicator variable (if \code{group=TRUE}, zero values of NT were input and grouping was done to
  32 #'              obtain \code{Rhat}), and \code{inputs} is a list of the original input variables \code{NT, gamma, kappa}. The variable
  33 #'              \code{posterior} is returned as a list made up of \code{supp} the support of the distribution and \code{pmf} the
  34 #'              probability mass function.
  35 #'
  36 #' @examples
  37 #' ## ===================================================== ##
  38 #' ## Illustrate on weekly data                             ##
  39 #' ## ===================================================== ##
  40 #'
  41 #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  42 #' ## obtain Rhat when serial distribution has mean of five days
  43 #' res1 <- seqB(NT=NT, mu=5/7)
  44 #' res1$Rhat
  45 #' ## obtain Rhat when serial distribution has mean of three days
  46 #' res2 <- seqB(NT=NT, mu=3/7)
  47 #' res2$Rhat
  48 #'
  49 #' ## ============================================================= ##
  50 #' ## Compute posterior mode instead of posterior mean and plot     ##
  51 #' ## ============================================================= ##
  52 #'
  53 #' Rpost <-     res1$posterior
  54 #' loc <- which(Rpost$pmf == max(Rpost$pmf))
  55 #' Rpost$supp[loc] # posterior mode
  56 #' res1$Rhat # compare with posterior mean
  57 #'
  58 #' par(mfrow=c(2, 1), mar=c(2, 2, 1, 1))
  59 #' plot(Rpost$supp, Rpost$pmf, col="black", type="l", xlab="", ylab="")
  60 #' abline(h=1/(20/0.01+1), col="red")
  61 #' abline(v=res1$Rhat, col="blue")
  62 #' abline(v=Rpost$supp[loc], col="purple")
  63 #' legend("topright", legend=c("prior", "posterior", "posterior mean (Rhat)", "posterior mode"), col=c("red", "black", "blue", "purple"), lty=1)
  64 #' plot(Rpost$supp, Rpost$pmf, col="black", type="l", xlim=c(0.5, 1.5), xlab="", ylab="")
  65 #' abline(h=1/(20/0.01+1), col="red")
  66 #' abline(v=res1$Rhat, col="blue")
  67 #' abline(v=Rpost$supp[loc], col="purple")
  68 #' legend("topright", legend=c("prior", "posterior", "posterior mean (Rhat)", "posterior mode"), col=c("red", "black", "blue", "purple"), lty=1)
  69 #'
  70 #' ## ========================================================= ##
  71 #' ## Compute Rhat using only the first five weeks of data      ##
  72 #' ## ========================================================= ##
  73 #'
  74 #' res3 <- seqB(NT=NT[1:5], mu=5/7)     # serial distribution has mean of five days
  75 #' res3$Rhat
  76 #'
  77 #' @export
  78 seqB <- function(NT, mu, kappa=20) {
  79     if (length(NT) < 2)
  80         print("Warning: length of NT should be at least two.")
  81     else {
  82         if (min(NT) > 0) {
  83             times <- 1:length(NT)
  84             tau <- diff(times)
  85         }
  86             group <- FALSE
  87         if (min(NT) == 0) {
  88             times <- which(NT > 0)
  89             NT <- NT[times]
  90             tau <- diff(times)
  91             group <- TRUE
  92         }
  93
  94         R <- seq(0, kappa, 0.01)
  95         prior0 <- rep(1, kappa / 0.01 + 1)
  96         prior0 <- prior0 / sum(prior0)
  97         k <- length(NT) - 1
  98         R0.post <- matrix(0, nrow=k, ncol=length(R))
  99         prior <- prior0
 100         posterior <- seq(0, length(prior0))
 101         gamma <- 1 / mu
 102
 103         for (i in 1:k) {
 104             mm1 <- NT[i]
 105             mm2 <- NT[i+1]
 106             lambda <- tau[i] * gamma * (R - 1)
 107             lambda <- log(mm1) + lambda
 108             loglik <- mm2 * lambda - exp(lambda)
 109             maxll <- max(loglik)
 110             const <- 0
 111
 112             if (maxll > 700)
 113                 const <- maxll - 700
 114
 115             loglik <- loglik-const
 116             posterior <- exp(loglik) * prior
 117             posterior <- posterior / sum(posterior)
 118             prior <- posterior
 119         }
 120
 121         Rhat <- sum(R * posterior)
 122
 123         return(list(Rhat=Rhat, posterior=list(supp=R, pmf=posterior), group=group))
 124     }
 125 }