]>
nmode's Git Repositories - Rnaught/blob - R/wp.R
1 #' White and Pagano (WP)
3 #' This function implements an R0 estimation due to White and Pagano (Statistics
4 #' in Medicine, 2008). The method is based on maximum likelihood estimation in a
5 #' Poisson transmission model. See details for important implementation notes.
7 #' This method is based on a Poisson transmission model, and hence may be most
8 #' most valid at the beginning of an epidemic. In their model, the serial
9 #' distribution is assumed to be discrete with a finite number of possible
10 #' values. In this implementation, if `mu` is not `NA`, the serial distribution
11 #' is taken to be a discretized version of a gamma distribution with shape
12 #' parameter `1` and scale parameter `mu` (and hence mean `mu`). When `mu` is
13 #' `NA`, the function implements a grid search algorithm to find the maximum
14 #' likelihood estimator over all possible gamma distributions with unknown shape
15 #' and scale, restricting these to a prespecified grid (see the parameters
16 #' `grid_length`, `max_shape` and `max_scale`). In both cases, the largest value
17 #' of the support is chosen such that the cumulative distribution function of
18 #' the original (pre-discretized) gamma distribution has cumulative probability
19 #' of no more than 0.999 at this value.
21 #' When the serial distribution is known (i.e., `mu` is not `NA`), sensitivity
22 #' testing of `mu` is strongly recommended. If the serial distribution is
23 #' unknown (i.e., `mu` is `NA`), the likelihood function can be flat near the
24 #' maximum, resulting in numerical instability of the optimizer. When `mu` is
25 #' `NA`, the implementation takes considerably longer to run. Users should be
26 #' careful about units of time (e.g., are counts observed daily or weekly?) when
29 #' The model developed in White and Pagano (2008) is discrete, and hence the
30 #' serial distribution is finite discrete. In our implementation, the input
31 #' value `mu` is that of a continuous distribution. The algorithm discretizes
32 #' this input, and so the mean of the estimated serial distribution returned
33 #' (when `serial` is set to `TRUE`) will differ from `mu` somewhat. That is to
34 #' say, if the user notices that the input `mu` and the mean of the estimated
35 #' serial distribution are different, this is to be expected, and is caused by
36 #' the discretization.
38 #' @param cases Vector of case counts. The vector must be of length at least two
39 #' and only contain positive integers.
40 #' @param mu Mean of the serial distribution. This must be a positive number or
41 #' `NA`. If a number is specified, the value should match the case counts in
42 #' time units. For example, if case counts are weekly and the serial
43 #' distribution has a mean of seven days, then `mu` should be set to `1`. If
44 #' case counts are daily and the serial distribution has a mean of seven days,
45 #' then `mu` should be set to `7`.
46 #' @param serial Whether to return the estimated serial distribution in addition
47 #' to the estimate of R0. This must be a value identical to `TRUE` or `FALSE`.
48 #' @param grid_length The length of the grid in the grid search (defaults to
49 #' 100). This must be a positive integer. It will only be used if `mu` is set
50 #' to `NA`. The grid search will go through all combinations of the shape and
51 #' scale parameters for the gamma distribution, which are `grid_length` evenly
52 #' spaced values from `0` (exclusive) to `max_shape` and `max_scale`
53 #' (inclusive), respectively. Note that larger values will result in a longer
55 #' @param max_shape The largest possible value of the shape parameter in the
56 #' grid search (defaults to 10). This must be a positive number. It will only
57 #' be used if `mu` is set to `NA`. Note that larger values will result in a
58 #' longer search time, and may cause numerical instabilities.
59 #' @param max_scale The largest possible value of the scale parameter in the
60 #' grid search (defaults to 10). This must be a positive number. It will only
61 #' be used if `mu` is set to `NA`. Note that larger values will result in a
62 #' longer search time, and may cause numerical instabilities.
64 #' @return If `serial` is identical to `TRUE`, a list containing the following
65 #' components is returned:
66 #' * `r0` - the estimate of R0
67 #' * `supp` - the support of the estimated serial distribution
68 #' * `pmf` - the probability mass function of the estimated serial
71 #' Otherwise, if `serial` is identical to `FALSE`, only the estimate of R0 is
75 #' [White and Pagano (Statistics in Medicine, 2008)](
76 #' https://doi.org/10.1002/sim.3136)
78 #' @seealso `vignette("wp_serial", package="Rnaught")` for examples of using the
79 #' serial distribution.
81 #' @importFrom stats pgamma qgamma
87 #' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
89 #' # Obtain R0 when the serial distribution has a mean of five days.
90 #' wp(cases, mu = 5 / 7)
92 #' # Obtain R0 when the serial distribution has a mean of three days.
93 #' wp(cases, mu = 3 / 7)
95 #' # Obtain R0 when the serial distribution is unknown.
96 #' # Note that this will take longer to run than when `mu` is known.
99 #' # Same as above, but specify custom grid search parameters. The larger any of
100 #' # the parameters, the longer the search will take, but with potentially more
101 #' # accurate estimates.
102 #' wp(cases, grid_length = 40, max_shape = 4, max_scale = 4)
104 #' # Return the estimated serial distribution in addition to the estimate of R0.
105 #' estimate <- wp(cases, serial = TRUE)
107 #' # Display the estimate of R0, as well as the support and probability mass
108 #' # function of the estimated serial distribution returned by the grid search.
112 wp
<- function(cases
, mu
= NA, serial
= FALSE,
113 grid_length
= 100, max_shape
= 10, max_scale
= 10) {
115 search
<- wp_search(cases
, grid_length
, max_shape
, max_scale
)
117 serial_supp
<- search$supp
118 serial_pmf
<- search$pmf
120 max_range
<- ceiling(qgamma(0.999, shape
= 1, scale
= mu
))
121 serial_supp
<- seq_len(max_range
)
122 serial_pmf
<- diff(pgamma(0:max_range
, shape
= 1, scale
= mu
))
123 serial_pmf
<- serial_pmf
/ sum(serial_pmf
)
124 r0
<- sum(cases
[-1]) / sum(wp_mu_t_sigma(cases
, serial_pmf
))
130 list(r0
= r0
, supp
= serial_supp
, pmf
= serial_pmf
)
133 #' White and Pagano (WP) Grid Search
135 #' This is a background/internal function called by [wp()]. It computes the
136 #' maximum likelihood estimator of R0 assuming that the serial distribution is
137 #' unknown (i.e., [wp()] is called with `mu` set to `NA`) but comes from a
138 #' discretized gamma distribution. The function implements a simple grid search
139 #' to obtain the maximum likelihood estimator of R0 as well as the gamma
142 #' @param cases Vector of case counts.
143 #' @param grid_length The length of the grid in the grid search.
144 #' @param max_shape The largest possible value of the shape parameter in the
146 #' @param max_scale The largest possible value of the scale parameter in the
149 #' @return A list containing the following components is returned:
150 #' * `r0` - the estimate of R0
151 #' * `supp` - the support of the estimated serial distribution
152 #' * `pmf` - the probability mass function of the estimated serial
156 #' [White and Pagano (Statistics in Medicine, 2008)](
157 #' https://doi.org/10.1002/sim.3136)
159 #' @seealso [wp()] for the function in which this grid search is called.
161 #' @importFrom stats pgamma qgamma
164 wp_search
<- function(cases
, grid_length
, max_shape
, max_scale
) {
165 shapes
<- seq(0, max_shape
, length.out
= grid_length
+ 1)[-1]
166 scales
<- seq(0, max_scale
, length.out
= grid_length
+ 1)[-1]
168 best_log_like
<- -Inf
169 best_serial_pmf
<- NA
173 for (i
in seq_len(grid_length
)) {
174 for (j
in seq_len(grid_length
)) {
175 max_range
<- ceiling(qgamma(0.999, shape
= shapes
[i
], scale
= scales
[j
]))
178 pgamma(0:max_range
, shape
= shapes
[i
], scale
= scales
[j
])
180 serial_pmf
<- serial_pmf
/ sum(serial_pmf
)
182 mu_t_sigma
<- wp_mu_t_sigma(cases
, serial_pmf
)
183 mle
<- sum(cases
[-1]) / sum(mu_t_sigma
)
184 mu_t
<- mle
* mu_t_sigma
186 log_like
<- sum(cases
[-1] * log(mu_t
)) - sum(mu_t
)
187 if (log_like
> best_log_like
) {
188 best_log_like
<- log_like
189 best_serial_pmf
<- serial_pmf
190 best_max_range
<- max_range
196 list(r0
= r0
, supp
= seq_len(best_max_range
), pmf
= best_serial_pmf
)
199 #' White and Pagano (WP) Mu Function Helper
201 #' This is a background/internal function called by [wp()] and [wp_search()]. It
202 #' computes the sum inside the function `mu(t)`, which is present in the log
203 #' likelihood function. See the referenced article for more details.
205 #' @param cases Vector of case counts.
206 #' @param serial_pmf The probability mass function of the serial distribution.
208 #' @return The sum inside the function `mu(t)` of the log likelihood.
211 #' [White and Pagano (Statistics in Medicine, 2008)](
212 #' https://doi.org/10.1002/sim.3136)
214 #' @seealso [wp()] and [wp_search()] for the functions which require this sum.
217 wp_mu_t_sigma
<- function(cases
, serial_pmf
) {
218 mu_t_sigma
<- rep(0, length(cases
) - 1)
219 for (i
in seq_len(length(cases
) - 1)) {
220 mu_t_sigma
[i
] <- sum(
221 serial_pmf
[seq_len(min(length(serial_pmf
), i
))] *
222 cases
[i
:max(1, i
- length(serial_pmf
) + 1)]