Refactor WP

author Naeem Model <me@nmode.ca>

Mon, 12 Feb 2024 00:02:07 +0000 (00:02 +0000)

committer Naeem Model <me@nmode.ca>

Mon, 12 Feb 2024 00:02:07 +0000 (00:02 +0000)
author Naeem Model <me@nmode.ca>
Mon, 12 Feb 2024 00:02:07 +0000 (00:02 +0000)
committer Naeem Model <me@nmode.ca>
Mon, 12 Feb 2024 00:02:07 +0000 (00:02 +0000)
diff --git a/NAMESPACE b/NAMESPACE

index e57b2b706b41234580fb938cb654467266e52ec5..42ad2823214d7295e246c8fedd0e52e97e4d6b73 100644 (file)
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,14 +1,12 @@
  # Generated by roxygen2: do not edit by hand
  
  # Generated by roxygen2: do not edit by hand
  
-export(WP)
  export(app)
  export(id)
  export(idea)
  export(seq_bayes)
  export(app)
  export(id)
  export(idea)
  export(seq_bayes)
+export(wp)
  importFrom(methods,is)
  importFrom(methods,is)
-importFrom(stats,pexp)
  importFrom(stats,pgamma)
  importFrom(stats,pgamma)
-importFrom(stats,qexp)
  importFrom(stats,qgamma)
  importFrom(utils,install.packages)
  importFrom(utils,read.csv)
  importFrom(stats,qgamma)
  importFrom(utils,install.packages)
  importFrom(utils,read.csv)
diff --git a/R/server.R b/R/server.R

index 384b341d6f10eb7eaab383b977890390fdd345b3..a9ed5216127811c565d31acefde5dda60994309f 100644 (file)
--- a/R/server.R
+++ b/R/server.R
@@ -110,7 +110,7 @@ server <- function(input, output) {
        if (!is.na(serial)) {
          reactive$estimators[[length(reactive$estimators) + 1]] <- list(
            method = "WP", mu = serial, mu_units = input$serialWPUnits,
        if (!is.na(serial)) {
          reactive$estimators[[length(reactive$estimators) + 1]] <- list(
            method = "WP", mu = serial, mu_units = input$serialWPUnits,
-          search = list(B = 100, shape.max = 10, scale.max = 10))
+          grid_length = 100, max_shape = 10, max_scale = 10)
          reactive$est_table <- update_est_col(input, output, reactive$data_table,
            reactive$estimators[[length(reactive$estimators)]],
            reactive$est_table)
          reactive$est_table <- update_est_col(input, output, reactive$data_table,
            reactive$estimators[[length(reactive$estimators)]],
            reactive$est_table)
@@ -153,8 +153,7 @@ server <- function(input, output) {
        if (checks_passed) {
          reactive$estimators[[length(reactive$estimators) + 1]] <- list(
            method = "WP", mu = NA, mu_units = input$serialWPUnits,
        if (checks_passed) {
          reactive$estimators[[length(reactive$estimators) + 1]] <- list(
            method = "WP", mu = NA, mu_units = input$serialWPUnits,
-          search = list(B = grid_length, shape.max = max_shape,
-                        scale.max = max_scale))
+          grid_length = grid_length, max_shape = max_shape, max_scale = max_scale)
          reactive$est_table <- update_est_col(input, output, reactive$data_table,
            reactive$estimators[[length(reactive$estimators)]],
            reactive$est_table)
          reactive$est_table <- update_est_col(input, output, reactive$data_table,
            reactive$estimators[[length(reactive$estimators)]],
            reactive$est_table)
@@ -279,12 +278,15 @@ eval_estimator <- function(input, output, estimator, dataset) {
    else if (estimator$mu_units == "Weeks" && dataset[2] == "Daily")
      serial <- serial * 7
  
    else if (estimator$mu_units == "Weeks" && dataset[2] == "Daily")
      serial <- serial * 7
  
-  # White and Panago
+  # White and Pagano
    if (estimator$method == "WP") {
    if (estimator$method == "WP") {
-    estimate <- WP(unlist(dataset[3]), mu = serial, search = estimator$search)
+    estimate <- wp(unlist(dataset[3]), mu = serial, serial = TRUE,
+                   grid_length = estimator$grid_length,
+                   max_shape = estimator$max_shape,
+                   max_scale = estimator$max_scale)
  
      if (!is.na(estimator$mu))
  
      if (!is.na(estimator$mu))
-      estimate <- round(estimate$Rhat, 2)
+      estimate <- round(estimate$r0, 2)
      # Display the estimated mean of the serial distribution if mu was not
      # specified.
      else {
      # Display the estimated mean of the serial distribution if mu was not
      # specified.
      else {
@@ -292,8 +294,8 @@ eval_estimator <- function(input, output, estimator, dataset) {
          mu_units <- "days"
        else
          mu_units <- "weeks"
          mu_units <- "days"
        else
          mu_units <- "weeks"
-      MSI <- sum(estimate$SD$supp * estimate$SD$pmf)
-      estimate <- shiny::HTML(paste0(round(estimate$Rhat, 2), "<br/>(&mu; = ",
+      MSI <- sum(estimate$supp * estimate$pmf)
+      estimate <- shiny::HTML(paste0(round(estimate$r0, 2), "<br/>(&mu; = ",
                                       round(MSI, 2), " ", mu_units, ")"))
      }
    }
                                       round(MSI, 2), " ", mu_units, ")"))
      }
    }
diff --git a/R/ui.R b/R/ui.R

index 355b08ecb0b124599ce45c20c27668df3e518c78..911061ab6236b30ac774ce5a6c1a933436bef501 100644 (file)
--- a/R/ui.R
+++ b/R/ui.R
@@ -90,10 +90,10 @@ est_sidebar <- function() {
    )
  }
  
    )
  }
  
-# Collapsable entry for White & Panago (WP) method.
+# Collapsable entry for White & Pagano (WP) method.
  WP_collapse <- function() {
    shiny::tags$details(
  WP_collapse <- function() {
    shiny::tags$details(
-    shiny::tags$summary(shiny::h4("White & Panago (WP)")),
+    shiny::tags$summary(shiny::h4("White & Pagano (WP)")),
      shiny::p("Method due to White and Pagano (2008), assumes a branching process
               based model. Serial distribution can be assumed known or can be
               estimated using maximum likelihood;  When serial interval is
      shiny::p("Method due to White and Pagano (2008), assumes a branching process
               based model. Serial distribution can be assumed known or can be
               estimated using maximum likelihood;  When serial interval is
diff --git a/R/wp.R b/R/wp.R

index 04791e2b6a4e60a5c7394175a7ea601178eb0224..674a7232a33828c8185199f4993d771e42708441 100644 (file)
--- a/R/wp.R
+++ b/R/wp.R
@@ -1,4 +1,4 @@
-#' WP method
+#' White and Pagano (WP)
  #'
  #' This function implements an R0 estimation due to White and Pagano (Statistics
  #' in Medicine, 2008). The method is based on maximum likelihood estimation in a
  #'
  #' This function implements an R0 estimation due to White and Pagano (Statistics
  #' in Medicine, 2008). The method is based on maximum likelihood estimation in a
@@ -6,224 +6,218 @@
  #'
  #' This method is based on a Poisson transmission model, and hence may be most
  #' most valid at the beginning of an epidemic. In their model, the serial
  #'
  #' This method is based on a Poisson transmission model, and hence may be most
  #' most valid at the beginning of an epidemic. In their model, the serial
-#' distribution is assumed to be discrete with a finite number of posible
-#' values. In this implementation, if \code{mu} is not {NA}, the serial
-#' distribution is taken to be a discretized version of a gamma distribution
-#' with mean \code{mu}, shape parameter one, and largest possible value based on
-#' parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a
-#' grid search algorithm to find the maximum likelihood estimator over all
-#' possible gamma distributions with unknown mean and variance, restricting
-#' these to a prespecified grid (see \code{search} parameter).
-#'
-#' When the serial distribution is known (i.e., \code{mu} is not \code{NA}),
-#' sensitivity testing of \code{mu} is strongly recommended. If the serial
-#' distribution is unknown (i.e., \code{mu} is \code{NA}), the likelihood
-#' function can be flat near the maximum, resulting in numerical instability of
-#' the optimizer. When \code{mu} is \code{NA}, the implementation takes
-#' considerably longer to run. Users should be careful about units of time
-#' (e.g., are counts observed daily or weekly?) when implementing.
+#' distribution is assumed to be discrete with a finite number of possible
+#' values. In this implementation, if `mu` is not `NA`, the serial distribution
+#' is taken to be a discretized version of a gamma distribution with shape
+#' parameter `1` and scale parameter `mu` (and hence mean `mu`). When `mu` is
+#' `NA`, the function implements a grid search algorithm to find the maximum
+#' likelihood estimator over all possible gamma distributions with unknown shape
+#' and scale, restricting these to a prespecified grid (see the parameters
+#' `grid_length`, `max_shape` and `max_scale`). In both cases, the largest value
+#' of the support is chosen such that the cumulative distribution function of
+#' the original (pre-discretized) gamma distribution has cumulative probability
+#' of no more than 0.999 at this value.
+#'
+#' When the serial distribution is known (i.e., `mu` is not `NA`), sensitivity
+#' testing of `mu` is strongly recommended. If the serial distribution is
+#' unknown (i.e., `mu` is `NA`), the likelihood function can be flat near the
+#' maximum, resulting in numerical instability of the optimizer. When `mu` is
+#' `NA`, the implementation takes considerably longer to run. Users should be
+#' careful about units of time (e.g., are counts observed daily or weekly?) when
+#' implementing.
  #'
  #' The model developed in White and Pagano (2008) is discrete, and hence the
  #' serial distribution is finite discrete. In our implementation, the input
  #'
  #' The model developed in White and Pagano (2008) is discrete, and hence the
  #' serial distribution is finite discrete. In our implementation, the input
-#' value \code{mu} is that of a continuous distribution. The algorithm
-#' discretizes this input when \code{mu} is not \code{NA}, and hence the mean of
-#' the serial distribution returned in the list \code{SD} will differ from
-#' \code{mu} somewhat. That is to say, if the user notices that the input
-#' \code{mu} and output mean of \code{SD} are different, this is to be expected,
-#' and is caused by the discretization.
-#'
-#' @param NT Vector of case counts.
-#' @param mu Mean of the serial distribution (needs to match case counts in time
-#'           units; for example, if case counts are weekly and the serial
-#'           distribution has a mean of seven days, then \code{mu} should be set
-#'           to one). The default value of \code{mu} is set to \code{NA}.
-#' @param search List of default values for the grid search algorithm. The list
-#'               includes three elements: the first is \code{B}, which is the
-#'               length of the grid in one dimension; the second is
-#'               \code{scale.max}, which is the largest possible value of the
-#'               scale parameter; and the third is \code{shape.max}, which is
-#'               the largest possible value of the shape parameter. Defaults to
-#'               \code{B = 100, scale.max = 10, shape.max = 10}. For both shape
-#'               and scale, the smallest possible value is 1/\code{B}.
-#' @param tol Cutoff value for cumulative distribution function of the
-#'            pre-discretization gamma serial distribution. Defaults to 0.999
-#'            (i.e. in the discretization, the maximum is chosen such that the
-#'            original gamma distribution has cumulative probability of no more
-#'            than 0.999 at this maximum).
-#'
-#' @return \code{WP} returns a list containing the following components:
-#'         \code{Rhat} is the estimate of R0, and \code{SD} is either the
-#'         discretized serial distribution (if \code{mu} is not \code{NA}), or
-#'         the estimated discretized serial distribution (if \code{mu} is
-#'         \code{NA}). The list also returns the variable \code{check}, which is
-#'         equal to the number of non-unique maximum likelihood estimators. The
-#'         serial distribution \code{SD} is returned as a list made up of
-#'         \code{supp} (the support of the distribution) and \code{pmf} (the
-#'         probability mass function).
+#' value `mu` is that of a continuous distribution. The algorithm discretizes
+#' this input, and so the mean of the estimated serial distribution returned
+#' (when `serial` is set to `TRUE`) will differ from `mu` somewhat. That is to
+#' say, if the user notices that the input `mu` and the mean of the estimated
+#' serial distribution are different, this is to be expected, and is caused by
+#' the discretization.
+#'
+#' @param cases Vector of case counts. The vector must be of length at least two
+#'   and only contain positive integers.
+#' @param mu Mean of the serial distribution. This must be a positive number or
+#'   `NA`. If a number is specified, the value should match the case counts in
+#'   time units. For example, if case counts are weekly and the serial
+#'   distribution has a mean of seven days, then `mu` should be set to `1`. If
+#'   case counts are daily and the serial distribution has a mean of seven days,
+#'   then `mu` should be set to `7`.
+#' @param serial Whether to return the estimated serial distribution in addition
+#'   to the estimate of R0. This must be a value identical to `TRUE` or `FALSE`.
+#' @param grid_length The length of the grid in the grid search (defaults to
+#'   100). This must be a positive integer. It will only be used if `mu` is set
+#'   to `NA`. The grid search will go through all combinations of the shape and
+#'   scale parameters for the gamma distribution, which are `grid_length` evenly
+#'   spaced values from `0` (exclusive) to `max_shape` and `max_scale`
+#'   (inclusive), respectively. Note that larger values will result in a longer
+#'   search time.
+#' @param max_shape The largest possible value of the shape parameter in the
+#'   grid search (defaults to 10). This must be a positive number. It will only
+#'   be used if `mu` is set to `NA`. Note that larger values will result in a
+#'   longer search time, and may cause numerical instabilities.
+#' @param max_scale The largest possible value of the scale parameter in the
+#'   grid search (defaults to 10). This must be a positive number. It will only
+#'   be used if `mu` is set to `NA`. Note that larger values will result in a
+#'   longer search time, and may cause numerical instabilities.
+#'
+#' @return If `serial` is identical to `TRUE`, a list containing the following
+#'   components is returned:
+#'   * `r0` - the estimate of R0
+#'   * `supp` - the support of the estimated serial distribution
+#'   * `pmf` - the probability mass function of the estimated serial
+#'     distribution
+#'
+#'   Otherwise, if `serial` is identical to `FALSE`, only the estimate of R0 is
+#'   returned.
+#'
+#' @references
+#' [White and Pagano (Statistics in Medicine, 2008)](
+#' https://doi.org/10.1002/sim.3136)
+#'
+#' @importFrom stats pgamma qgamma
+#'
+#' @export
  #'
  #' @examples
  #' # Weekly data.
  #'
  #' @examples
  #' # Weekly data.
-#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
+#' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  #'
  #' # Obtain R0 when the serial distribution has a mean of five days.
  #'
  #' # Obtain R0 when the serial distribution has a mean of five days.
-#' res1 <- WP(NT, mu = 5 / 7)
-#' res1$Rhat
+#' wp(cases, mu = 5 / 7)
  #'
  #' # Obtain R0 when the serial distribution has a mean of three days.
  #'
  #' # Obtain R0 when the serial distribution has a mean of three days.
-#' res2 <- WP(NT, mu = 3 / 7)
-#' res2$Rhat
+#' wp(cases, mu = 3 / 7)
  #'
  #' # Obtain R0 when the serial distribution is unknown.
  #'
  #' # Obtain R0 when the serial distribution is unknown.
-#' # NOTE: This implementation will take longer to run.
-#' res3 <- WP(NT)
-#' res3$Rhat
-#'
-#' # Find the mean of the estimated serial distribution.
-#' serial <- res3$SD
-#' sum(serial$supp * serial$pmf)
-#'
-#' @importFrom stats pexp qexp
-#'
-#' @export
-WP <- function(NT, mu = NA,
-               search = list(B = 100, shape.max = 10, scale.max = 10),
-               tol = 0.999) {
+#' # Note that this will take longer to run than when `mu` is known.
+#' wp(cases)
+#'
+#' # Same as above, but specify custom grid search parameters. The larger any of
+#' # the parameters, the longer the search will take, but with potentially more
+#' # accurate estimates.
+#' wp(cases, grid_length = 40, max_shape = 4, max_scale = 4)
+#'
+#' # Return the estimated serial distribution in addition to the estimate of R0.
+#' estimate <- wp(cases, serial = TRUE)
+#'
+#' # Display the estimate of R0, as well as the support and probability mass
+#' # function of the estimated serial distribution returned by the grid search.
+#' estimate$r0
+#' estimate$supp
+#' estimate$pmf
+wp <- function(cases, mu = NA, serial = FALSE,
+               grid_length = 100, max_shape = 10, max_scale = 10) {
    if (is.na(mu)) {
    if (is.na(mu)) {
-    print("You have assumed that the serial distribution is unknown.")
-    res <- WP_unknown(NT, B = search$B, shape.max = search$shape.max,
-                      scale.max = search$scale.max, tol = tol)
-    Rhat <- res$Rhat
-    p <- res$p
-    range.max <- res$range.max
-    JJ <- res$JJ
+    search <- wp_search(cases, grid_length, max_shape, max_scale)
+    r0 <- search$r0
+    serial_supp <- search$supp
+    serial_pmf <- search$pmf
    } else {
    } else {
-    print("You have assumed that the serial distribution is known.")
-    range.max <- ceiling(qexp(tol, rate = 1 / mu))
-    p <- diff(pexp(0:range.max, 1 / mu))
-    p <- p / sum(p)
-    res <- WP_known(NT = NT, p = p)
-    Rhat <- res
-    JJ <- NA
+    max_range <- ceiling(qgamma(0.999, shape = 1, scale = mu))
+    serial_supp <- seq_len(max_range)
+    serial_pmf <- diff(pgamma(0:max_range, shape = 1, scale = mu))
+    serial_pmf <- serial_pmf / sum(serial_pmf)
+    r0 <- sum(cases[-1]) / sum(wp_mu_t_sigma(cases, serial_pmf))
    }
  
    }
  
-  return(list(Rhat = Rhat,
-              check = length(JJ),
-              SD = list(supp = 1:range.max, pmf = p)))
+  if (!serial) {
+    return(r0)
+  }
+  list(r0 = r0, supp = serial_supp, pmf = serial_pmf)
  }
  
  }
  
-#' WP method background function WP_known
+#' White and Pagano (WP) Grid Search
  #'
  #'
-#' This is a background/internal function called by \code{WP}. It computes the
+#' This is a background/internal function called by [wp()]. It computes the
  #' maximum likelihood estimator of R0 assuming that the serial distribution is
  #' maximum likelihood estimator of R0 assuming that the serial distribution is
-#' known and finite discrete.
-#'
-#' @param NT Vector of case counts.
-#' @param p Discretized version of the serial distribution.
+#' unknown (i.e., [wp()] is called with `mu` set to `NA`) but comes from a
+#' discretized gamma distribution. The function implements a simple grid search
+#' to obtain the maximum likelihood estimator of R0 as well as the gamma
+#' parameters.
+#'
+#' @param cases Vector of case counts.
+#' @param grid_length The length of the grid in the grid search.
+#' @param max_shape The largest possible value of the shape parameter in the
+#'   grid search.
+#' @param max_scale The largest possible value of the scale parameter in the
+#'   grid search.
+#'
+#' @return A list containing the following components is returned:
+#'   * `r0` - the estimate of R0
+#'   * `supp` - the support of the estimated serial distribution
+#'   * `pmf` - the probability mass function of the estimated serial
+#'     distribution
+#'
+#' @references
+#' [White and Pagano (Statistics in Medicine, 2008)](
+#' https://doi.org/10.1002/sim.3136)
+#'
+#' @seealso [wp()] for the function in which this grid search is called.
  #'
  #'
-#' @return The function returns the maximum likelihood estimator of R0.
+#' @importFrom stats pgamma qgamma
  #'
  #' @noRd
  #'
  #' @noRd
-WP_known <- function(NT, p) {
-  k <- length(p)
-  TT <- length(NT) - 1
-  mu_t <- rep(0, TT)
+wp_search <- function(cases, grid_length, max_shape, max_scale) {
+  shapes <- seq(0, max_shape, length.out = grid_length + 1)[-1]
+  scales <- seq(0, max_scale, length.out = grid_length + 1)[-1]
  
  
-  for (i in 1:TT) {
-    Nt <- NT[i:max(1, i - k + 1)]
-    mu_t[i] <- sum(p[1:min(k, i)] * Nt)
-  }
+  best_log_like <- -Inf
+  best_serial_pmf <- NA
+  best_max_range <- NA
+  r0 <- NA
  
  
-  Rhat <- sum(NT[-1]) / sum(mu_t)
-  return(Rhat)
-}
+  for (i in seq_len(grid_length)) {
+    for (j in seq_len(grid_length)) {
+      max_range <- ceiling(qgamma(0.999, shape = shapes[i], scale = scales[j]))
  
  
-#' WP method background function WP_unknown
-#'
-#' This is a background/internal function called by \code{WP}. It computes the
-#' maximum likelihood estimator of R0 assuming that the serial distribution is
-#' unknown but comes from a discretized gamma distribution. The function then
-#' implements a simple grid search algorithm to obtain the maximum likelihood
-#' estimator of R0 as well as the gamma parameters.
-#'
-#' @param NT Vector of case counts.
-#' @param B Length of grid for shape and scale (grid search parameter).
-#' @param shape.max Maximum shape value (grid \code{search} parameter).
-#' @param scale.max Maximum scale value (grid \code{search} parameter).
-#' @param tol cutoff value for cumulative distribution function of the serial
-#'            distribution (defaults to 0.999).
-#'
-#' @return The function returns \code{Rhat}, the maximum likelihood estimator of
-#'         R0, as well as the maximum likelihood estimator of the discretized
-#'         serial distribution given by \code{p} (the probability mass function)
-#'         and \code{range.max} (the distribution has support on the integers
-#'         one to \code{range.max}). The function also returns \code{resLL} (all
-#'         values of the log-likelihood) at \code{shape} (grid for shape
-#'         parameter) and at \code{scale} (grid for scale parameter), as well as
-#'         \code{resR0} (the full vector of maximum likelihood estimators),
-#'         \code{JJ} (the locations for the likelihood for these), and \code{J0}
-#'         (the location for the maximum likelihood estimator \code{Rhat}). If
-#'         \code{JJ} and \code{J0} are not the same, this means that the maximum
-#'         likelihood estimator is not unique.
-#'
-#' @importFrom stats pgamma qgamma
-#'
-#' @noRd
-WP_unknown <- function(NT, B = 100, shape.max = 10, scale.max = 10,
-                       tol = 0.999) {
-  shape <- seq(0, shape.max, length.out = B + 1)
-  scale <- seq(0, scale.max, length.out = B + 1)
-  shape <- shape[-1]
-  scale <- scale[-1]
+      serial_pmf <- diff(
+        pgamma(0:max_range, shape = shapes[i], scale = scales[j])
+      )
+      serial_pmf <- serial_pmf / sum(serial_pmf)
  
  
-  resLL <- matrix(0, B, B)
-  resR0 <- matrix(0, B, B)
+      mu_t_sigma <- wp_mu_t_sigma(cases, serial_pmf)
+      mle <- sum(cases[-1]) / sum(mu_t_sigma)
+      mu_t <- mle * mu_t_sigma
  
  
-  for (i in 1:B)
-    for (j in 1:B) {
-      range.max <- ceiling(qgamma(tol, shape = shape[i], scale = scale[j]))
-      p <- diff(pgamma(0:range.max, shape = shape[i], scale = scale[j]))
-      p <- p / sum(p)
-      mle <- WP_known(NT, p)
-      resLL[i, j] <- computeLL(p, NT, mle)
-      resR0[i, j] <- mle
+      log_like <- sum(cases[-1] * log(mu_t)) - sum(mu_t)
+      if (log_like > best_log_like) {
+        best_log_like <- log_like
+        best_serial_pmf <- serial_pmf
+        best_max_range <- max_range
+        r0 <- mle
+      }
      }
      }
+  }
  
  
-  J0 <- which.max(resLL)
-  R0hat <- resR0[J0]
-  JJ <- which(resLL == resLL[J0], arr.ind = TRUE)
-  range.max <- ceiling(qgamma(tol, shape = shape[JJ[1]], scale = scale[JJ[2]]))
-  p <- diff(pgamma(0:range.max, shape = shape[JJ[1]], scale = scale[JJ[2]]))
-  p <- p / sum(p)
-
-  return(list(Rhat = R0hat, J0 = J0, ll = resLL, Rs = resR0, scale = scale,
-              shape = shape, JJ = JJ, p = p, range.max = range.max))
+  list(r0 = r0, supp = seq_len(best_max_range), pmf = best_serial_pmf)
  }
  
  }
  
-#' WP method background function computeLL
+#' White and Pagano (WP) Mu Function Helper
+#'
+#' This is a background/internal function called by [wp()] and [wp_search()]. It
+#' computes the sum inside the function `mu(t)`, which is present in the log
+#' likelihood function. See the referenced article for more details.
+#'
+#' @param cases Vector of case counts.
+#' @param serial_pmf The probability mass function of the serial distribution.
  #'
  #'
-#' This is a background/internal function called by \code{WP}. It computes the
-#' log-likelihood.
+#' @return The sum inside the function `mu(t)` of the log likelihood.
  #'
  #'
-#' @param p Discretized version of the serial distribution.
-#' @param NT Vector of case counts.
-#' @param R0 Basic reproductive ratio.
+#' @references
+#' [White and Pagano (Statistics in Medicine, 2008)](
+#' https://doi.org/10.1002/sim.3136)
  #'
  #'
-#' @return This function returns the log-likelihood at the input variables and
-#'         parameters.
+#' @seealso [wp()] and [wp_search()] for the functions which require this sum.
  #'
  #' @noRd
  #'
  #' @noRd
-computeLL <- function(p, NT, R0) {
-  k <- length(p)
-  TT <- length(NT) - 1
-  mu_t <- rep(0, TT)
-
-  for (i in 1:TT) {
-    Nt <- NT[i:max(1, i - k + 1)]
-    mu_t[i] <- sum(p[1:min(k, i)] * Nt)
+wp_mu_t_sigma <- function(cases, serial_pmf) {
+  mu_t_sigma <- rep(0, length(cases) - 1)
+  for (i in seq_len(length(cases) - 1)) {
+    mu_t_sigma[i] <- sum(
+      serial_pmf[seq_len(min(length(serial_pmf), i))] *
+        cases[i:max(1, i - length(serial_pmf) + 1)]
+    )
    }
    }
-
-  mu_t <- R0 * mu_t
-  LL <- sum(NT[-1] * log(mu_t)) - sum(mu_t)
-
-  return(LL)
+  mu_t_sigma
  }
  }
diff --git a/man/wp.Rd b/man/wp.Rd

index 479593bab331d1c375d4413a3f5b262a760d91d8..450c948a857ec3abc268fc794b1e2c1dab987cc2 100644 (file)
--- a/man/wp.Rd
+++ b/man/wp.Rd
@@ -1,49 +1,62 @@
  % Generated by roxygen2: do not edit by hand
  % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/WP.R
-\name{WP}
-\alias{WP}
-\title{WP method}
+% Please edit documentation in R/wp.R
+\name{wp}
+\alias{wp}
+\title{White and Pagano (WP)}
  \usage{
  \usage{
-WP(
-  NT,
+wp(
+  cases,
    mu = NA,
    mu = NA,
-  search = list(B = 100, shape.max = 10, scale.max = 10),
-  tol = 0.999
+  serial = FALSE,
+  grid_length = 100,
+  max_shape = 10,
+  max_scale = 10
  )
  }
  \arguments{
  )
  }
  \arguments{
-\item{NT}{Vector of case counts.}
+\item{cases}{Vector of case counts. The vector must be of length at least two
+and only contain positive integers.}
  
  
-\item{mu}{Mean of the serial distribution (needs to match case counts in time
-units; for example, if case counts are weekly and the serial
-distribution has a mean of seven days, then \code{mu} should be set
-to one). The default value of \code{mu} is set to \code{NA}.}
+\item{mu}{Mean of the serial distribution. This must be a positive number or
+\code{NA}. If a number is specified, the value should match the case counts in
+time units. For example, if case counts are weekly and the serial
+distribution has a mean of seven days, then \code{mu} should be set to \code{1}. If
+case counts are daily and the serial distribution has a mean of seven days,
+then \code{mu} should be set to \code{7}.}
  
  
-\item{search}{List of default values for the grid search algorithm. The list
-includes three elements: the first is \code{B}, which is the
-length of the grid in one dimension; the second is
-\code{scale.max}, which is the largest possible value of the
-scale parameter; and the third is \code{shape.max}, which is
-the largest possible value of the shape parameter. Defaults to
-\code{B = 100, scale.max = 10, shape.max = 10}. For both shape
-and scale, the smallest possible value is 1/\code{B}.}
+\item{serial}{Whether to return the estimated serial distribution in addition
+to the estimate of R0. This must be a value identical to \code{TRUE} or \code{FALSE}.}
  
  
-\item{tol}{Cutoff value for cumulative distribution function of the
-pre-discretization gamma serial distribution. Defaults to 0.999
-(i.e. in the discretization, the maximum is chosen such that the
-original gamma distribution has cumulative probability of no more
-than 0.999 at this maximum).}
+\item{grid_length}{The length of the grid in the grid search (defaults to
+100). This must be a positive integer. It will only be used if \code{mu} is set
+to \code{NA}. The grid search will go through all combinations of the shape and
+scale parameters for the gamma distribution, which are \code{grid_length} evenly
+spaced values from \code{0} (exclusive) to \code{max_shape} and \code{max_scale}
+(inclusive), respectively. Note that larger values will result in a longer
+search time.}
+
+\item{max_shape}{The largest possible value of the shape parameter in the
+grid search (defaults to 10). This must be a positive number. It will only
+be used if \code{mu} is set to \code{NA}. Note that larger values will result in a
+longer search time, and may cause numerical instabilities.}
+
+\item{max_scale}{The largest possible value of the scale parameter in the
+grid search (defaults to 10). This must be a positive number. It will only
+be used if \code{mu} is set to \code{NA}. Note that larger values will result in a
+longer search time, and may cause numerical instabilities.}
  }
  \value{
  }
  \value{
-\code{WP} returns a list containing the following components:
-        \code{Rhat} is the estimate of R0, and \code{SD} is either the
-        discretized serial distribution (if \code{mu} is not \code{NA}), or
-        the estimated discretized serial distribution (if \code{mu} is
-        \code{NA}). The list also returns the variable \code{check}, which is
-        equal to the number of non-unique maximum likelihood estimators. The
-        serial distribution \code{SD} is returned as a list made up of
-        \code{supp} (the support of the distribution) and \code{pmf} (the
-        probability mass function).
+If \code{serial} is identical to \code{TRUE}, a list containing the following
+components is returned:
+\itemize{
+\item \code{r0} - the estimate of R0
+\item \code{supp} - the support of the estimated serial distribution
+\item \code{pmf} - the probability mass function of the estimated serial
+distribution
+}
+
+Otherwise, if \code{serial} is identical to \code{FALSE}, only the estimate of R0 is
+returned.
  }
  \description{
  This function implements an R0 estimation due to White and Pagano (Statistics
  }
  \description{
  This function implements an R0 estimation due to White and Pagano (Statistics
@@ -53,51 +66,63 @@ Poisson transmission model. See details for important implementation notes.
  \details{
  This method is based on a Poisson transmission model, and hence may be most
  most valid at the beginning of an epidemic. In their model, the serial
  \details{
  This method is based on a Poisson transmission model, and hence may be most
  most valid at the beginning of an epidemic. In their model, the serial
-distribution is assumed to be discrete with a finite number of posible
-values. In this implementation, if \code{mu} is not {NA}, the serial
-distribution is taken to be a discretized version of a gamma distribution
-with mean \code{mu}, shape parameter one, and largest possible value based on
-parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a
-grid search algorithm to find the maximum likelihood estimator over all
-possible gamma distributions with unknown mean and variance, restricting
-these to a prespecified grid (see \code{search} parameter).
+distribution is assumed to be discrete with a finite number of possible
+values. In this implementation, if \code{mu} is not \code{NA}, the serial distribution
+is taken to be a discretized version of a gamma distribution with shape
+parameter \code{1} and scale parameter \code{mu} (and hence mean \code{mu}). When \code{mu} is
+\code{NA}, the function implements a grid search algorithm to find the maximum
+likelihood estimator over all possible gamma distributions with unknown shape
+and scale, restricting these to a prespecified grid (see the parameters
+\code{grid_length}, \code{max_shape} and \code{max_scale}). In both cases, the largest value
+of the support is chosen such that the cumulative distribution function of
+the original (pre-discretized) gamma distribution has cumulative probability
+of no more than 0.999 at this value.
  
  
-When the serial distribution is known (i.e., \code{mu} is not \code{NA}),
-sensitivity testing of \code{mu} is strongly recommended. If the serial
-distribution is unknown (i.e., \code{mu} is \code{NA}), the likelihood
-function can be flat near the maximum, resulting in numerical instability of
-the optimizer. When \code{mu} is \code{NA}, the implementation takes
-considerably longer to run. Users should be careful about units of time
-(e.g., are counts observed daily or weekly?) when implementing.
+When the serial distribution is known (i.e., \code{mu} is not \code{NA}), sensitivity
+testing of \code{mu} is strongly recommended. If the serial distribution is
+unknown (i.e., \code{mu} is \code{NA}), the likelihood function can be flat near the
+maximum, resulting in numerical instability of the optimizer. When \code{mu} is
+\code{NA}, the implementation takes considerably longer to run. Users should be
+careful about units of time (e.g., are counts observed daily or weekly?) when
+implementing.
  
  The model developed in White and Pagano (2008) is discrete, and hence the
  serial distribution is finite discrete. In our implementation, the input
  
  The model developed in White and Pagano (2008) is discrete, and hence the
  serial distribution is finite discrete. In our implementation, the input
-value \code{mu} is that of a continuous distribution. The algorithm
-discretizes this input when \code{mu} is not \code{NA}, and hence the mean of
-the serial distribution returned in the list \code{SD} will differ from
-\code{mu} somewhat. That is to say, if the user notices that the input
-\code{mu} and output mean of \code{SD} are different, this is to be expected,
-and is caused by the discretization.
+value \code{mu} is that of a continuous distribution. The algorithm discretizes
+this input, and so the mean of the estimated serial distribution returned
+(when \code{serial} is set to \code{TRUE}) will differ from \code{mu} somewhat. That is to
+say, if the user notices that the input \code{mu} and the mean of the estimated
+serial distribution are different, this is to be expected, and is caused by
+the discretization.
  }
  \examples{
  # Weekly data.
  }
  \examples{
  # Weekly data.
-NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
+cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  
  # Obtain R0 when the serial distribution has a mean of five days.
  
  # Obtain R0 when the serial distribution has a mean of five days.
-res1 <- WP(NT, mu = 5 / 7)
-res1$Rhat
+wp(cases, mu = 5 / 7)
  
  # Obtain R0 when the serial distribution has a mean of three days.
  
  # Obtain R0 when the serial distribution has a mean of three days.
-res2 <- WP(NT, mu = 3 / 7)
-res2$Rhat
+wp(cases, mu = 3 / 7)
  
  # Obtain R0 when the serial distribution is unknown.
  
  # Obtain R0 when the serial distribution is unknown.
-# NOTE: This implementation will take longer to run.
-res3 <- WP(NT)
-res3$Rhat
+# Note that this will take longer to run than when `mu` is known.
+wp(cases)
+
+# Same as above, but specify custom grid search parameters. The larger any of
+# the parameters, the longer the search will take, but with potentially more
+# accurate estimates.
+wp(cases, grid_length = 40, max_shape = 4, max_scale = 4)
  
  
-# Find the mean of the estimated serial distribution.
-serial <- res3$SD
-sum(serial$supp * serial$pmf)
+# Return the estimated serial distribution in addition to the estimate of R0.
+estimate <- wp(cases, serial = TRUE)
  
  
+# Display the estimate of R0, as well as the support and probability mass
+# function of the estimated serial distribution returned by the grid search.
+estimate$r0
+estimate$supp
+estimate$pmf
+}
+\references{
+\href{https://doi.org/10.1002/sim.3136}{White and Pagano (Statistics in Medicine, 2008)}
  }
  }
author	Naeem Model <me@nmode.ca>
	Mon, 12 Feb 2024 00:02:07 +0000 (00:02 +0000)
committer	Naeem Model <me@nmode.ca>
	Mon, 12 Feb 2024 00:02:07 +0000 (00:02 +0000)
NAMESPACE		patch \| blob \| blame \| history
R/server.R		patch \| blob \| blame \| history
R/ui.R		patch \| blob \| blame \| history
R/wp.R		patch \| blob \| blame \| history
man/wp.Rd		patch \| blob \| blame \| history