nmode's Git Repositories - Rnaught/blob - R/WP.R

   1 source("WP_known.R")
   2 source("WP_unknown.R")
   3
   4 #' WP method
   5 #'
   6 #' This function implements an R0 estimation due to White and Pagano (Statistics in Medicine, 2008).
   7 #' The method is based on maximum likelihood estimation in a Poisson transmission model.
   8 #' See details for important implementation notes.
   9 #'
  10 #' This method is based on a Poisson transmission model, and hence may be most most valid at the beginning
  11 #' of an epidemic. In their model, the serial distribution is assumed to be discrete with a finite number
  12 #' of posible values. In this implementation, if \code{mu} is not {NA}, the serial distribution is taken to
  13 #' be a discretized version of a gamma distribution with mean \code{mu}, shape parameter one, and largest
  14 #' possible value based on parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a
  15 #' grid search algorithm to find the maximum likelihood estimator over all possible gamma distributions
  16 #' with unknown mean and variance, restricting these to a prespecified grid (see \code{search} parameter).
  17 #'
  18 #' When the serial distribution is known (i.e., \code{mu} is not \code{NA}), sensitivity testing of \code{mu}
  19 #' is strongly recommended. If the serial distribution is unknown (i.e., \code{mu} is \code{NA}), the
  20 #' likelihood function can be flat near the maximum, resulting in numerical instability of the optimizer.
  21 #' When \code{mu} is \code{NA}, the implementation takes considerably longer to run. Users should be careful
  22 #' about units of time (e.g. are counts observed daily or weekly?) when implementing.
  23 #'
  24 #' The model developed in White and Pagano (2008) is discrete, and hence the serial distribution is finite
  25 #' discrete. In our implementation, the input value \code{mu} is that of a continuous distribution. The
  26 #' algorithm discretizes this input when \code{mu} is not \code{NA}, and hence the mean of the serial
  27 #' distribution returned in the list \code{SD} will differ from \code{mu} somewhat. That is to say, if the
  28 #' user notices that the input \code{mu} and output mean of \code{SD} are different, this is to be expected,
  29 #' and is caused by the discretization.
  30 #'
  31 #' @param NT Vector of case counts
  32 #' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case
  33 #'           counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be
  34 #'           set to one). The default value of \code{mu} is set to \code{NA}.
  35 #' @param search List of default values for the grid search algorithm; the list includes three elements: the
  36 #'               first is \code{B} which is the length of the grid in one dimension, the second is
  37 #'               \code{scale.max} which is the largest possible value of the scale parameter, and the third is
  38 #'               \code{shape.max} which is the largest possible value of the shape parameter; defaults to
  39 #'               \code{B=100, scale.max=10, shape.max=10}. For both shape and scale, the smallest possible
  40 #'               value is 1/\code{B}.
  41 #' @param tol Cutoff value for cumulative distribution function of the pre-discretization gamma serial
  42 #'            distribution, defaults to 0.999 (i.e. in the discretization, the maximum is chosen such that the
  43 #'            original gamma distribution has cumulative probability of no more than 0.999 at this maximum).
  44 #'
  45 #' @return WP returns a list containing the following components:  \code{Rhat} is the estimate of R0, \code{SD}
  46 #'            is either the discretized serial distribution (if \code{mu} is not \code{NA}) or the estimated
  47 #'            discretized serial distribution (if \code{mu} is \code{NA}), and \code{inputs} is a list of the
  48 #'            original input variables \code{NT, mu, method, search, tol}. The list also returns the variable
  49 #'            \code{check}, which is equal to the number of non-unique maximum likelihood estimators. The serial
  50 #'            distribution \code{SD} is returned as a list made up of \code{supp} the support of the distribution
  51 #'            and \code{pmf} the probability mass function.
  52 #'
  53 #' @examples
  54 #' ## ===================================================== ##
  55 #' ## Illustrate on weekly data                             ##
  56 #' ## ===================================================== ##
  57 #'
  58 #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  59 #' ## obtain Rhat when serial distribution has mean of five days
  60 #' res1 <- WP(NT=NT, mu=5/7)
  61 #' res1$Rhat
  62 #' ## obtain Rhat when serial distribution has mean of three days
  63 #' res2 <- WP(NT=NT, mu=3/7)
  64 #' res2$Rhat
  65 #' ## obtain Rhat when serial distribution is unknown
  66 #' ## NOTE:  this implementation will take longer to run
  67 #' res3 <- WP(NT=NT)
  68 #' res3$Rhat
  69 #' ## find mean of estimated serial distribution
  70 #' serial <- res3$SD
  71 #' sum(serial$supp * serial$pmf)
  72 #'
  73 #' ## ========================================================= ##
  74 #' ## Compute Rhat using only the first five weeks of data      ##
  75 #' ## ========================================================= ##
  76 #'
  77 #' res4 <- WP(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days
  78 #' res4$Rhat
  79 #'
  80 #' @export
  81 WP <- function(NT, mu=NA, search=list(B=100, shape.max=10, scale.max=10), tol=0.999) {
  82     if (is.na(mu)) {
  83         print("You have assumed that the serial distribution is unknown.")
  84         res <- WP_unknown(NT=NT, B=search$B, shape.max=search$shape.max, scale.max=search$scale.max, tol=tol)
  85         Rhat <- res$Rhat
  86         p <- res$p
  87         range.max <- res$range.max
  88         JJ <- res$JJ
  89     } else {
  90         print("You have assumed that the serial distribution is known.")
  91         range.max <- ceiling(qexp(tol, rate=1/mu))
  92         p <- diff(pexp(0:range.max, 1/mu))
  93         p <- p / sum(p)
  94         res <- WP_known(NT=NT, p=p)
  95         Rhat <- res
  96         JJ <- NA
  97     }
  98
  99     return(list(Rhat=Rhat, check=length(JJ), SD=list(supp=1:range.max, pmf=p)))
 100 }