From e5e312d01bccbabbb21e69d269ccf0d947e5abdb Mon Sep 17 00:00:00 2001
From: Naeem Model <me@nmode.ca>
Date: Mon, 12 Feb 2024 00:02:07 +0000
Subject: Refactor WP

---
 man/wp.Rd | 159 ++++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 92 insertions(+), 67 deletions(-)

(limited to 'man')

diff --git a/man/wp.Rd b/man/wp.Rd
index 479593b..450c948 100644
--- a/man/wp.Rd
+++ b/man/wp.Rd
@@ -1,49 +1,62 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/WP.R
-\name{WP}
-\alias{WP}
-\title{WP method}
+% Please edit documentation in R/wp.R
+\name{wp}
+\alias{wp}
+\title{White and Pagano (WP)}
 \usage{
-WP(
-  NT,
+wp(
+  cases,
   mu = NA,
-  search = list(B = 100, shape.max = 10, scale.max = 10),
-  tol = 0.999
+  serial = FALSE,
+  grid_length = 100,
+  max_shape = 10,
+  max_scale = 10
 )
 }
 \arguments{
-\item{NT}{Vector of case counts.}
+\item{cases}{Vector of case counts. The vector must be of length at least two
+and only contain positive integers.}
 
-\item{mu}{Mean of the serial distribution (needs to match case counts in time
-units; for example, if case counts are weekly and the serial
-distribution has a mean of seven days, then \code{mu} should be set
-to one). The default value of \code{mu} is set to \code{NA}.}
+\item{mu}{Mean of the serial distribution. This must be a positive number or
+\code{NA}. If a number is specified, the value should match the case counts in
+time units. For example, if case counts are weekly and the serial
+distribution has a mean of seven days, then \code{mu} should be set to \code{1}. If
+case counts are daily and the serial distribution has a mean of seven days,
+then \code{mu} should be set to \code{7}.}
 
-\item{search}{List of default values for the grid search algorithm. The list
-includes three elements: the first is \code{B}, which is the
-length of the grid in one dimension; the second is
-\code{scale.max}, which is the largest possible value of the
-scale parameter; and the third is \code{shape.max}, which is
-the largest possible value of the shape parameter. Defaults to
-\code{B = 100, scale.max = 10, shape.max = 10}. For both shape
-and scale, the smallest possible value is 1/\code{B}.}
+\item{serial}{Whether to return the estimated serial distribution in addition
+to the estimate of R0. This must be a value identical to \code{TRUE} or \code{FALSE}.}
 
-\item{tol}{Cutoff value for cumulative distribution function of the
-pre-discretization gamma serial distribution. Defaults to 0.999
-(i.e. in the discretization, the maximum is chosen such that the
-original gamma distribution has cumulative probability of no more
-than 0.999 at this maximum).}
+\item{grid_length}{The length of the grid in the grid search (defaults to
+100). This must be a positive integer. It will only be used if \code{mu} is set
+to \code{NA}. The grid search will go through all combinations of the shape and
+scale parameters for the gamma distribution, which are \code{grid_length} evenly
+spaced values from \code{0} (exclusive) to \code{max_shape} and \code{max_scale}
+(inclusive), respectively. Note that larger values will result in a longer
+search time.}
+
+\item{max_shape}{The largest possible value of the shape parameter in the
+grid search (defaults to 10). This must be a positive number. It will only
+be used if \code{mu} is set to \code{NA}. Note that larger values will result in a
+longer search time, and may cause numerical instabilities.}
+
+\item{max_scale}{The largest possible value of the scale parameter in the
+grid search (defaults to 10). This must be a positive number. It will only
+be used if \code{mu} is set to \code{NA}. Note that larger values will result in a
+longer search time, and may cause numerical instabilities.}
 }
 \value{
-\code{WP} returns a list containing the following components:
-        \code{Rhat} is the estimate of R0, and \code{SD} is either the
-        discretized serial distribution (if \code{mu} is not \code{NA}), or
-        the estimated discretized serial distribution (if \code{mu} is
-        \code{NA}). The list also returns the variable \code{check}, which is
-        equal to the number of non-unique maximum likelihood estimators. The
-        serial distribution \code{SD} is returned as a list made up of
-        \code{supp} (the support of the distribution) and \code{pmf} (the
-        probability mass function).
+If \code{serial} is identical to \code{TRUE}, a list containing the following
+components is returned:
+\itemize{
+\item \code{r0} - the estimate of R0
+\item \code{supp} - the support of the estimated serial distribution
+\item \code{pmf} - the probability mass function of the estimated serial
+distribution
+}
+
+Otherwise, if \code{serial} is identical to \code{FALSE}, only the estimate of R0 is
+returned.
 }
 \description{
 This function implements an R0 estimation due to White and Pagano (Statistics
@@ -53,51 +66,63 @@ Poisson transmission model. See details for important implementation notes.
 \details{
 This method is based on a Poisson transmission model, and hence may be most
 most valid at the beginning of an epidemic. In their model, the serial
-distribution is assumed to be discrete with a finite number of posible
-values. In this implementation, if \code{mu} is not {NA}, the serial
-distribution is taken to be a discretized version of a gamma distribution
-with mean \code{mu}, shape parameter one, and largest possible value based on
-parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a
-grid search algorithm to find the maximum likelihood estimator over all
-possible gamma distributions with unknown mean and variance, restricting
-these to a prespecified grid (see \code{search} parameter).
+distribution is assumed to be discrete with a finite number of possible
+values. In this implementation, if \code{mu} is not \code{NA}, the serial distribution
+is taken to be a discretized version of a gamma distribution with shape
+parameter \code{1} and scale parameter \code{mu} (and hence mean \code{mu}). When \code{mu} is
+\code{NA}, the function implements a grid search algorithm to find the maximum
+likelihood estimator over all possible gamma distributions with unknown shape
+and scale, restricting these to a prespecified grid (see the parameters
+\code{grid_length}, \code{max_shape} and \code{max_scale}). In both cases, the largest value
+of the support is chosen such that the cumulative distribution function of
+the original (pre-discretized) gamma distribution has cumulative probability
+of no more than 0.999 at this value.
 
-When the serial distribution is known (i.e., \code{mu} is not \code{NA}),
-sensitivity testing of \code{mu} is strongly recommended. If the serial
-distribution is unknown (i.e., \code{mu} is \code{NA}), the likelihood
-function can be flat near the maximum, resulting in numerical instability of
-the optimizer. When \code{mu} is \code{NA}, the implementation takes
-considerably longer to run. Users should be careful about units of time
-(e.g., are counts observed daily or weekly?) when implementing.
+When the serial distribution is known (i.e., \code{mu} is not \code{NA}), sensitivity
+testing of \code{mu} is strongly recommended. If the serial distribution is
+unknown (i.e., \code{mu} is \code{NA}), the likelihood function can be flat near the
+maximum, resulting in numerical instability of the optimizer. When \code{mu} is
+\code{NA}, the implementation takes considerably longer to run. Users should be
+careful about units of time (e.g., are counts observed daily or weekly?) when
+implementing.
 
 The model developed in White and Pagano (2008) is discrete, and hence the
 serial distribution is finite discrete. In our implementation, the input
-value \code{mu} is that of a continuous distribution. The algorithm
-discretizes this input when \code{mu} is not \code{NA}, and hence the mean of
-the serial distribution returned in the list \code{SD} will differ from
-\code{mu} somewhat. That is to say, if the user notices that the input
-\code{mu} and output mean of \code{SD} are different, this is to be expected,
-and is caused by the discretization.
+value \code{mu} is that of a continuous distribution. The algorithm discretizes
+this input, and so the mean of the estimated serial distribution returned
+(when \code{serial} is set to \code{TRUE}) will differ from \code{mu} somewhat. That is to
+say, if the user notices that the input \code{mu} and the mean of the estimated
+serial distribution are different, this is to be expected, and is caused by
+the discretization.
 }
 \examples{
 # Weekly data.
-NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
+cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
 
 # Obtain R0 when the serial distribution has a mean of five days.
-res1 <- WP(NT, mu = 5 / 7)
-res1$Rhat
+wp(cases, mu = 5 / 7)
 
 # Obtain R0 when the serial distribution has a mean of three days.
-res2 <- WP(NT, mu = 3 / 7)
-res2$Rhat
+wp(cases, mu = 3 / 7)
 
 # Obtain R0 when the serial distribution is unknown.
-# NOTE: This implementation will take longer to run.
-res3 <- WP(NT)
-res3$Rhat
+# Note that this will take longer to run than when `mu` is known.
+wp(cases)
+
+# Same as above, but specify custom grid search parameters. The larger any of
+# the parameters, the longer the search will take, but with potentially more
+# accurate estimates.
+wp(cases, grid_length = 40, max_shape = 4, max_scale = 4)
 
-# Find the mean of the estimated serial distribution.
-serial <- res3$SD
-sum(serial$supp * serial$pmf)
+# Return the estimated serial distribution in addition to the estimate of R0.
+estimate <- wp(cases, serial = TRUE)
 
+# Display the estimate of R0, as well as the support and probability mass
+# function of the estimated serial distribution returned by the grid search.
+estimate$r0
+estimate$supp
+estimate$pmf
+}
+\references{
+\href{https://doi.org/10.1002/sim.3136}{White and Pagano (Statistics in Medicine, 2008)}
 }
-- 
cgit v1.2.3