]>
nmode's Git Repositories - Rnaught/blob - R/WP.R
04791e2b6a4e60a5c7394175a7ea601178eb0224
3 #' This function implements an R0 estimation due to White and Pagano (Statistics
4 #' in Medicine, 2008). The method is based on maximum likelihood estimation in a
5 #' Poisson transmission model. See details for important implementation notes.
7 #' This method is based on a Poisson transmission model, and hence may be most
8 #' most valid at the beginning of an epidemic. In their model, the serial
9 #' distribution is assumed to be discrete with a finite number of posible
10 #' values. In this implementation, if \code{mu} is not {NA}, the serial
11 #' distribution is taken to be a discretized version of a gamma distribution
12 #' with mean \code{mu}, shape parameter one, and largest possible value based on
13 #' parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a
14 #' grid search algorithm to find the maximum likelihood estimator over all
15 #' possible gamma distributions with unknown mean and variance, restricting
16 #' these to a prespecified grid (see \code{search} parameter).
18 #' When the serial distribution is known (i.e., \code{mu} is not \code{NA}),
19 #' sensitivity testing of \code{mu} is strongly recommended. If the serial
20 #' distribution is unknown (i.e., \code{mu} is \code{NA}), the likelihood
21 #' function can be flat near the maximum, resulting in numerical instability of
22 #' the optimizer. When \code{mu} is \code{NA}, the implementation takes
23 #' considerably longer to run. Users should be careful about units of time
24 #' (e.g., are counts observed daily or weekly?) when implementing.
26 #' The model developed in White and Pagano (2008) is discrete, and hence the
27 #' serial distribution is finite discrete. In our implementation, the input
28 #' value \code{mu} is that of a continuous distribution. The algorithm
29 #' discretizes this input when \code{mu} is not \code{NA}, and hence the mean of
30 #' the serial distribution returned in the list \code{SD} will differ from
31 #' \code{mu} somewhat. That is to say, if the user notices that the input
32 #' \code{mu} and output mean of \code{SD} are different, this is to be expected,
33 #' and is caused by the discretization.
35 #' @param NT Vector of case counts.
36 #' @param mu Mean of the serial distribution (needs to match case counts in time
37 #' units; for example, if case counts are weekly and the serial
38 #' distribution has a mean of seven days, then \code{mu} should be set
39 #' to one). The default value of \code{mu} is set to \code{NA}.
40 #' @param search List of default values for the grid search algorithm. The list
41 #' includes three elements: the first is \code{B}, which is the
42 #' length of the grid in one dimension; the second is
43 #' \code{scale.max}, which is the largest possible value of the
44 #' scale parameter; and the third is \code{shape.max}, which is
45 #' the largest possible value of the shape parameter. Defaults to
46 #' \code{B = 100, scale.max = 10, shape.max = 10}. For both shape
47 #' and scale, the smallest possible value is 1/\code{B}.
48 #' @param tol Cutoff value for cumulative distribution function of the
49 #' pre-discretization gamma serial distribution. Defaults to 0.999
50 #' (i.e. in the discretization, the maximum is chosen such that the
51 #' original gamma distribution has cumulative probability of no more
52 #' than 0.999 at this maximum).
54 #' @return \code{WP} returns a list containing the following components:
55 #' \code{Rhat} is the estimate of R0, and \code{SD} is either the
56 #' discretized serial distribution (if \code{mu} is not \code{NA}), or
57 #' the estimated discretized serial distribution (if \code{mu} is
58 #' \code{NA}). The list also returns the variable \code{check}, which is
59 #' equal to the number of non-unique maximum likelihood estimators. The
60 #' serial distribution \code{SD} is returned as a list made up of
61 #' \code{supp} (the support of the distribution) and \code{pmf} (the
62 #' probability mass function).
66 #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
68 #' # Obtain R0 when the serial distribution has a mean of five days.
69 #' res1 <- WP(NT, mu = 5 / 7)
72 #' # Obtain R0 when the serial distribution has a mean of three days.
73 #' res2 <- WP(NT, mu = 3 / 7)
76 #' # Obtain R0 when the serial distribution is unknown.
77 #' # NOTE: This implementation will take longer to run.
81 #' # Find the mean of the estimated serial distribution.
83 #' sum(serial$supp * serial$pmf)
85 #' @importFrom stats pexp qexp
88 WP
<- function(NT
, mu
= NA,
89 search
= list(B
= 100, shape.max
= 10, scale.max
= 10),
92 print("You have assumed that the serial distribution is unknown.")
93 res
<- WP_unknown(NT
, B
= search$B
, shape.max
= search$shape.max
,
94 scale.max
= search$scale.max
, tol
= tol
)
97 range.max
<- res$range.max
100 print("You have assumed that the serial distribution is known.")
101 range.max
<- ceiling(qexp(tol
, rate
= 1 / mu
))
102 p
<- diff(pexp(0:range.max
, 1 / mu
))
104 res
<- WP_known(NT
= NT
, p
= p
)
109 return(list(Rhat
= Rhat
,
111 SD
= list(supp
= 1:range.max
, pmf
= p
)))
114 #' WP method background function WP_known
116 #' This is a background/internal function called by \code{WP}. It computes the
117 #' maximum likelihood estimator of R0 assuming that the serial distribution is
118 #' known and finite discrete.
120 #' @param NT Vector of case counts.
121 #' @param p Discretized version of the serial distribution.
123 #' @return The function returns the maximum likelihood estimator of R0.
126 WP_known
<- function(NT
, p
) {
132 Nt
<- NT
[i
:max(1, i
- k
+ 1)]
133 mu_t
[i
] <- sum(p
[1:min(k
, i
)] * Nt
)
136 Rhat
<- sum(NT
[-1]) / sum(mu_t
)
140 #' WP method background function WP_unknown
142 #' This is a background/internal function called by \code{WP}. It computes the
143 #' maximum likelihood estimator of R0 assuming that the serial distribution is
144 #' unknown but comes from a discretized gamma distribution. The function then
145 #' implements a simple grid search algorithm to obtain the maximum likelihood
146 #' estimator of R0 as well as the gamma parameters.
148 #' @param NT Vector of case counts.
149 #' @param B Length of grid for shape and scale (grid search parameter).
150 #' @param shape.max Maximum shape value (grid \code{search} parameter).
151 #' @param scale.max Maximum scale value (grid \code{search} parameter).
152 #' @param tol cutoff value for cumulative distribution function of the serial
153 #' distribution (defaults to 0.999).
155 #' @return The function returns \code{Rhat}, the maximum likelihood estimator of
156 #' R0, as well as the maximum likelihood estimator of the discretized
157 #' serial distribution given by \code{p} (the probability mass function)
158 #' and \code{range.max} (the distribution has support on the integers
159 #' one to \code{range.max}). The function also returns \code{resLL} (all
160 #' values of the log-likelihood) at \code{shape} (grid for shape
161 #' parameter) and at \code{scale} (grid for scale parameter), as well as
162 #' \code{resR0} (the full vector of maximum likelihood estimators),
163 #' \code{JJ} (the locations for the likelihood for these), and \code{J0}
164 #' (the location for the maximum likelihood estimator \code{Rhat}). If
165 #' \code{JJ} and \code{J0} are not the same, this means that the maximum
166 #' likelihood estimator is not unique.
168 #' @importFrom stats pgamma qgamma
171 WP_unknown
<- function(NT
, B
= 100, shape.max
= 10, scale.max
= 10,
173 shape
<- seq(0, shape.max
, length.out
= B
+ 1)
174 scale
<- seq(0, scale.max
, length.out
= B
+ 1)
178 resLL
<- matrix(0, B
, B
)
179 resR0
<- matrix(0, B
, B
)
183 range.max
<- ceiling(qgamma(tol
, shape
= shape
[i
], scale
= scale
[j
]))
184 p
<- diff(pgamma(0:range.max
, shape
= shape
[i
], scale
= scale
[j
]))
186 mle
<- WP_known(NT
, p
)
187 resLL
[i
, j
] <- computeLL(p
, NT
, mle
)
191 J0
<- which.max(resLL
)
193 JJ
<- which(resLL
== resLL
[J0
], arr.ind
= TRUE)
194 range.max
<- ceiling(qgamma(tol
, shape
= shape
[JJ
[1]], scale
= scale
[JJ
[2]]))
195 p
<- diff(pgamma(0:range.max
, shape
= shape
[JJ
[1]], scale
= scale
[JJ
[2]]))
198 return(list(Rhat
= R0hat
, J0
= J0
, ll
= resLL
, Rs
= resR0
, scale
= scale
,
199 shape
= shape
, JJ
= JJ
, p
= p
, range.max
= range.max
))
202 #' WP method background function computeLL
204 #' This is a background/internal function called by \code{WP}. It computes the
207 #' @param p Discretized version of the serial distribution.
208 #' @param NT Vector of case counts.
209 #' @param R0 Basic reproductive ratio.
211 #' @return This function returns the log-likelihood at the input variables and
215 computeLL
<- function(p
, NT
, R0
) {
221 Nt
<- NT
[i
:max(1, i
- k
+ 1)]
222 mu_t
[i
] <- sum(p
[1:min(k
, i
)] * Nt
)
226 LL
<- sum(NT
[-1] * log(mu_t
)) - sum(mu_t
)