]>
nmode's Git Repositories - Rnaught/blob - R/WP.R
6 #' This function implements an R0 estimation due to White and Pagano (Statistics in Medicine, 2008).
7 #' The method is based on maximum likelihood estimation in a Poisson transmission model.
8 #' See details for important implementation notes.
10 #' This method is based on a Poisson transmission model, and hence may be most most valid at the beginning
11 #' of an epidemic. In their model, the serial distribution is assumed to be discrete with a finite number
12 #' of posible values. In this implementation, if the serial distribution is assumed known, it is taken to
13 #' be a discretized version of a gamma distribution with mean \code{mu}, shape parameter one, and largest
14 #' possible value based on parameter \code{tol}. When the serial distribution is unknown, the function
15 #' implements a grid search algorithm to find the maximum likelihood estimator over all possible gamma
16 #' distributions with unknown mean and variance, restricting these to a prespecified grid (see
17 #' \code{search} parameter).
19 #' When the serial distribution is taken to be \code{known}, sensitivity testing of the parameter \code{mu}
20 #' is strongly recommended. If the serial distribution is \code{unknown}, the likelihood function can be
21 #' flat near the maximum, resulting in numerical instability of the optimizer. When the serial distribution
22 #' is \code{unkown} the implementation takes considerably longer to run. Users should be careful about units
23 #' of time (e.g. are counts observed daily or weekly?) when implementing.
25 #' The model developed in White and Pagano (2008) is discrete, and hence the serial distribution is finite
26 #' discrete. In our implementation, the input value \code{mu} is that of a continuous distribution. The
27 #' algorithm when \code{method="known"} disretizes this input, and hence the mean of the serial distribution
28 #' returned in the list \code{SD} will differ from \code{mu} somewhat. That is to say, if the user notices that
29 #' the input \code{mu} and out put mean of \code{SD} are different, this is to be expected, and is caused by
30 #' the discretization.
32 #' @param NT Vector of case counts
33 #' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case
34 #' counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be
35 #' set to one). The default value of \code{mu} is set to \code{NA}.
36 #' @param method Variable taking one of two possible values: \code{known} or \code{unknown}. If "known", the
37 #' serial distribution is assumed to be gamma with rate 1/\code{mu} and shape equal to one, if
38 #' "unknown" then the serial distribution is gamma with unknown parameters. Defaults to "unknown"
39 #' @param search List of default values for the grid search algorithm; the list includes three elements: the
40 #' first is \code{B} which is the length of the grid in one dimension, the second is
41 #' \code{scale.max} which is the largest possible value of the scale parameter, and the third is
42 #' \code{shape.max} which is the largest possible value of the shape parameter; defaults to
43 #' \code{B=100, scale.max=10, shape.max=10}. For both shape and scale, the smallest possible
44 #' value is 1/\code{B}.
45 #' @param tol Cutoff value for cumulative distribution function of the pre-discretization gamma serial
46 #' distribution, defaults to 0.999 (i.e. in the discretization, the maximum is chosen such that the
47 #' original gamma distribution has cumulative probability of no more than 0.999 at this maximum).
49 #' @return WP returns a list containing the following components: \code{Rhat} is the estimate of R0, \code{SD}
50 #' is either the discretized serial distribution (if \code{method="known"}) or the estimated
51 #' discretized serial distribution (if \code{method="unknown"}), and \code{inputs} is a list of the
52 #' original input variables \code{NT, mu, method, search, tol}. The list also returns the variable
53 #' \code{check}, which is equal to the number of non-unique maximum likelihood estimators. The serial
54 #' distribution \code{SD} is returned as a list made up of \code{supp} the support of the distribution
55 #' and \code{pmf} the probability mass function.
58 #' ## ===================================================== ##
59 #' ## Illustrate on weekly data ##
60 #' ## ===================================================== ##
62 #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
63 #' ## obtain Rhat when serial distribution has mean of five days
64 #' res1 <- WP(NT=NT, mu=5/7, method="known")
66 #' ## obtain Rhat when serial distribution has mean of three days
67 #' res2 <- WP(NT=NT, mu=3/7, method="known")
69 #' ## obtain Rhat when serial distribution is unknown
70 #' ## NOTE: this implementation will take longer to run
73 #' ## find mean of estimated serial distribution
75 #' sum(serial$supp * serial$pmf)
77 #' ## ========================================================= ##
78 #' ## Compute Rhat using only the first five weeks of data ##
79 #' ## ========================================================= ##
81 #' res4 <- WP(NT=NT[1:5], mu=5/7, method="known") # serial distribution has mean of five days
85 WP
<- function(NT
, mu
="NA", method
="unknown", search
=list(B
=100, shape.max
=10, scale.max
=10), tol
=0.999) {
86 if (method
== "unknown") {
87 print("You have assumed that the serial distribution is unknown.")
88 res
<- WP_unknown(NT
=NT
, B
=search$B
, shape.max
=search$shape.max
, scale.max
=search$scale.max
, tol
=tol
)
91 range.max
<- res$range.max
95 if (method
== "known") {
98 print("For method=known, the mean of the serial distribution must be specified.")
100 print("You have assumed that the serial distribution is known.")
101 range.max
<- ceiling(qexp(tol
, rate
=1/mu
))
102 p
<- diff(pexp(0:range.max
, 1/mu
))
104 res
<- WP_known(NT
=NT
, p
=p
)
110 return(list(Rhat
=Rhat
, check
=length(JJ
), SD
=list(supp
=1:range.max
, pmf
=p
)))