nmode's Git Repositories - Rnaught/blob - R/WP.R

   1 #' WP method
   2 #'
   3 #' This function implements an R0 estimation due to White and Pagano (Statistics
   4 #' in Medicine, 2008). The method is based on maximum likelihood estimation in a
   5 #' Poisson transmission model. See details for important implementation notes.
   6 #'
   7 #' This method is based on a Poisson transmission model, and hence may be most
   8 #' most valid at the beginning of an epidemic. In their model, the serial
   9 #' distribution is assumed to be discrete with a finite number of posible
  10 #' values. In this implementation, if \code{mu} is not {NA}, the serial
  11 #' distribution is taken to be a discretized version of a gamma distribution
  12 #' with mean \code{mu}, shape parameter one, and largest possible value based on
  13 #' parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a
  14 #' grid search algorithm to find the maximum likelihood estimator over all
  15 #' possible gamma distributions with unknown mean and variance, restricting
  16 #' these to a prespecified grid (see \code{search} parameter).
  17 #'
  18 #' When the serial distribution is known (i.e., \code{mu} is not \code{NA}),
  19 #' sensitivity testing of \code{mu} is strongly recommended. If the serial
  20 #' distribution is unknown (i.e., \code{mu} is \code{NA}), the likelihood
  21 #' function can be flat near the maximum, resulting in numerical instability of
  22 #' the optimizer. When \code{mu} is \code{NA}, the implementation takes
  23 #' considerably longer to run. Users should be careful about units of time
  24 #' (e.g., are counts observed daily or weekly?) when implementing.
  25 #'
  26 #' The model developed in White and Pagano (2008) is discrete, and hence the
  27 #' serial distribution is finite discrete. In our implementation, the input
  28 #' value \code{mu} is that of a continuous distribution. The algorithm
  29 #' discretizes this input when \code{mu} is not \code{NA}, and hence the mean of
  30 #' the serial distribution returned in the list \code{SD} will differ from
  31 #' \code{mu} somewhat. That is to say, if the user notices that the input
  32 #' \code{mu} and output mean of \code{SD} are different, this is to be expected,
  33 #' and is caused by the discretization.
  34 #'
  35 #' @param NT Vector of case counts.
  36 #' @param mu Mean of the serial distribution (needs to match case counts in time
  37 #'           units; for example, if case counts are weekly and the serial
  38 #'           distribution has a mean of seven days, then \code{mu} should be set
  39 #'           to one). The default value of \code{mu} is set to \code{NA}.
  40 #' @param search List of default values for the grid search algorithm. The list
  41 #'               includes three elements: the first is \code{B}, which is the
  42 #'               length of the grid in one dimension; the second is
  43 #'               \code{scale.max}, which is the largest possible value of the
  44 #'               scale parameter; and the third is \code{shape.max}, which is
  45 #'               the largest possible value of the shape parameter. Defaults to
  46 #'               \code{B = 100, scale.max = 10, shape.max = 10}. For both shape
  47 #'               and scale, the smallest possible value is 1/\code{B}.
  48 #' @param tol Cutoff value for cumulative distribution function of the
  49 #'            pre-discretization gamma serial distribution. Defaults to 0.999
  50 #'            (i.e. in the discretization, the maximum is chosen such that the
  51 #'            original gamma distribution has cumulative probability of no more
  52 #'            than 0.999 at this maximum).
  53 #'
  54 #' @return \code{WP} returns a list containing the following components:
  55 #'         \code{Rhat} is the estimate of R0, and \code{SD} is either the
  56 #'         discretized serial distribution (if \code{mu} is not \code{NA}), or
  57 #'         the estimated discretized serial distribution (if \code{mu} is
  58 #'         \code{NA}). The list also returns the variable \code{check}, which is
  59 #'         equal to the number of non-unique maximum likelihood estimators. The
  60 #'         serial distribution \code{SD} is returned as a list made up of
  61 #'         \code{supp} (the support of the distribution) and \code{pmf} (the
  62 #'         probability mass function).
  63 #'
  64 #' @examples
  65 #' # Weekly data.
  66 #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  67 #'
  68 #' # Obtain R0 when the serial distribution has a mean of five days.
  69 #' res1 <- WP(NT, mu = 5 / 7)
  70 #' res1$Rhat
  71 #'
  72 #' # Obtain R0 when the serial distribution has a mean of three days.
  73 #' res2 <- WP(NT, mu = 3 / 7)
  74 #' res2$Rhat
  75 #'
  76 #' # Obtain R0 when the serial distribution is unknown.
  77 #' # NOTE: This implementation will take longer to run.
  78 #' res3 <- WP(NT)
  79 #' res3$Rhat
  80 #'
  81 #' # Find the mean of the estimated serial distribution.
  82 #' serial <- res3$SD
  83 #' sum(serial$supp * serial$pmf)
  84 #'
  85 #' @importFrom stats pexp qexp
  86 #'
  87 #' @export
  88 WP <- function(NT, mu = NA,
  89                search = list(B = 100, shape.max = 10, scale.max = 10),
  90                tol = 0.999) {
  91   if (is.na(mu)) {
  92     print("You have assumed that the serial distribution is unknown.")
  93     res <- WP_unknown(NT, B = search$B, shape.max = search$shape.max,
  94                       scale.max = search$scale.max, tol = tol)
  95     Rhat <- res$Rhat
  96     p <- res$p
  97     range.max <- res$range.max
  98     JJ <- res$JJ
  99   } else {
 100     print("You have assumed that the serial distribution is known.")
 101     range.max <- ceiling(qexp(tol, rate = 1 / mu))
 102     p <- diff(pexp(0:range.max, 1 / mu))
 103     p <- p / sum(p)
 104     res <- WP_known(NT = NT, p = p)
 105     Rhat <- res
 106     JJ <- NA
 107   }
 108
 109   return(list(Rhat = Rhat,
 110               check = length(JJ),
 111               SD = list(supp = 1:range.max, pmf = p)))
 112 }
 113
 114 #' WP method background function WP_known
 115 #'
 116 #' This is a background/internal function called by \code{WP}. It computes the
 117 #' maximum likelihood estimator of R0 assuming that the serial distribution is
 118 #' known and finite discrete.
 119 #'
 120 #' @param NT Vector of case counts.
 121 #' @param p Discretized version of the serial distribution.
 122 #'
 123 #' @return The function returns the maximum likelihood estimator of R0.
 124 #'
 125 #' @noRd
 126 WP_known <- function(NT, p) {
 127   k <- length(p)
 128   TT <- length(NT) - 1
 129   mu_t <- rep(0, TT)
 130
 131   for (i in 1:TT) {
 132     Nt <- NT[i:max(1, i - k + 1)]
 133     mu_t[i] <- sum(p[1:min(k, i)] * Nt)
 134   }
 135
 136   Rhat <- sum(NT[-1]) / sum(mu_t)
 137   return(Rhat)
 138 }
 139
 140 #' WP method background function WP_unknown
 141 #'
 142 #' This is a background/internal function called by \code{WP}. It computes the
 143 #' maximum likelihood estimator of R0 assuming that the serial distribution is
 144 #' unknown but comes from a discretized gamma distribution. The function then
 145 #' implements a simple grid search algorithm to obtain the maximum likelihood
 146 #' estimator of R0 as well as the gamma parameters.
 147 #'
 148 #' @param NT Vector of case counts.
 149 #' @param B Length of grid for shape and scale (grid search parameter).
 150 #' @param shape.max Maximum shape value (grid \code{search} parameter).
 151 #' @param scale.max Maximum scale value (grid \code{search} parameter).
 152 #' @param tol cutoff value for cumulative distribution function of the serial
 153 #'            distribution (defaults to 0.999).
 154 #'
 155 #' @return The function returns \code{Rhat}, the maximum likelihood estimator of
 156 #'         R0, as well as the maximum likelihood estimator of the discretized
 157 #'         serial distribution given by \code{p} (the probability mass function)
 158 #'         and \code{range.max} (the distribution has support on the integers
 159 #'         one to \code{range.max}). The function also returns \code{resLL} (all
 160 #'         values of the log-likelihood) at \code{shape} (grid for shape
 161 #'         parameter) and at \code{scale} (grid for scale parameter), as well as
 162 #'         \code{resR0} (the full vector of maximum likelihood estimators),
 163 #'         \code{JJ} (the locations for the likelihood for these), and \code{J0}
 164 #'         (the location for the maximum likelihood estimator \code{Rhat}). If
 165 #'         \code{JJ} and \code{J0} are not the same, this means that the maximum
 166 #'         likelihood estimator is not unique.
 167 #'
 168 #' @importFrom stats pgamma qgamma
 169 #'
 170 #' @noRd
 171 WP_unknown <- function(NT, B = 100, shape.max = 10, scale.max = 10,
 172                        tol = 0.999) {
 173   shape <- seq(0, shape.max, length.out = B + 1)
 174   scale <- seq(0, scale.max, length.out = B + 1)
 175   shape <- shape[-1]
 176   scale <- scale[-1]
 177
 178   resLL <- matrix(0, B, B)
 179   resR0 <- matrix(0, B, B)
 180
 181   for (i in 1:B)
 182     for (j in 1:B) {
 183       range.max <- ceiling(qgamma(tol, shape = shape[i], scale = scale[j]))
 184       p <- diff(pgamma(0:range.max, shape = shape[i], scale = scale[j]))
 185       p <- p / sum(p)
 186       mle <- WP_known(NT, p)
 187       resLL[i, j] <- computeLL(p, NT, mle)
 188       resR0[i, j] <- mle
 189     }
 190
 191   J0 <- which.max(resLL)
 192   R0hat <- resR0[J0]
 193   JJ <- which(resLL == resLL[J0], arr.ind = TRUE)
 194   range.max <- ceiling(qgamma(tol, shape = shape[JJ[1]], scale = scale[JJ[2]]))
 195   p <- diff(pgamma(0:range.max, shape = shape[JJ[1]], scale = scale[JJ[2]]))
 196   p <- p / sum(p)
 197
 198   return(list(Rhat = R0hat, J0 = J0, ll = resLL, Rs = resR0, scale = scale,
 199               shape = shape, JJ = JJ, p = p, range.max = range.max))
 200 }
 201
 202 #' WP method background function computeLL
 203 #'
 204 #' This is a background/internal function called by \code{WP}. It computes the
 205 #' log-likelihood.
 206 #'
 207 #' @param p Discretized version of the serial distribution.
 208 #' @param NT Vector of case counts.
 209 #' @param R0 Basic reproductive ratio.
 210 #'
 211 #' @return This function returns the log-likelihood at the input variables and
 212 #'         parameters.
 213 #'
 214 #' @noRd
 215 computeLL <- function(p, NT, R0) {
 216   k <- length(p)
 217   TT <- length(NT) - 1
 218   mu_t <- rep(0, TT)
 219
 220   for (i in 1:TT) {
 221     Nt <- NT[i:max(1, i - k + 1)]
 222     mu_t[i] <- sum(p[1:min(k, i)] * Nt)
 223   }
 224
 225   mu_t <- R0 * mu_t
 226   LL <- sum(NT[-1] * log(mu_t)) - sum(mu_t)
 227
 228   return(LL)
 229 }