]> nmode's Git Repositories - Rnaught/blob - R/wp.R
674a7232a33828c8185199f4993d771e42708441
[Rnaught] / R / wp.R
1 #' White and Pagano (WP)
2 #'
3 #' This function implements an R0 estimation due to White and Pagano (Statistics
4 #' in Medicine, 2008). The method is based on maximum likelihood estimation in a
5 #' Poisson transmission model. See details for important implementation notes.
6 #'
7 #' This method is based on a Poisson transmission model, and hence may be most
8 #' most valid at the beginning of an epidemic. In their model, the serial
9 #' distribution is assumed to be discrete with a finite number of possible
10 #' values. In this implementation, if `mu` is not `NA`, the serial distribution
11 #' is taken to be a discretized version of a gamma distribution with shape
12 #' parameter `1` and scale parameter `mu` (and hence mean `mu`). When `mu` is
13 #' `NA`, the function implements a grid search algorithm to find the maximum
14 #' likelihood estimator over all possible gamma distributions with unknown shape
15 #' and scale, restricting these to a prespecified grid (see the parameters
16 #' `grid_length`, `max_shape` and `max_scale`). In both cases, the largest value
17 #' of the support is chosen such that the cumulative distribution function of
18 #' the original (pre-discretized) gamma distribution has cumulative probability
19 #' of no more than 0.999 at this value.
20 #'
21 #' When the serial distribution is known (i.e., `mu` is not `NA`), sensitivity
22 #' testing of `mu` is strongly recommended. If the serial distribution is
23 #' unknown (i.e., `mu` is `NA`), the likelihood function can be flat near the
24 #' maximum, resulting in numerical instability of the optimizer. When `mu` is
25 #' `NA`, the implementation takes considerably longer to run. Users should be
26 #' careful about units of time (e.g., are counts observed daily or weekly?) when
27 #' implementing.
28 #'
29 #' The model developed in White and Pagano (2008) is discrete, and hence the
30 #' serial distribution is finite discrete. In our implementation, the input
31 #' value `mu` is that of a continuous distribution. The algorithm discretizes
32 #' this input, and so the mean of the estimated serial distribution returned
33 #' (when `serial` is set to `TRUE`) will differ from `mu` somewhat. That is to
34 #' say, if the user notices that the input `mu` and the mean of the estimated
35 #' serial distribution are different, this is to be expected, and is caused by
36 #' the discretization.
37 #'
38 #' @param cases Vector of case counts. The vector must be of length at least two
39 #' and only contain positive integers.
40 #' @param mu Mean of the serial distribution. This must be a positive number or
41 #' `NA`. If a number is specified, the value should match the case counts in
42 #' time units. For example, if case counts are weekly and the serial
43 #' distribution has a mean of seven days, then `mu` should be set to `1`. If
44 #' case counts are daily and the serial distribution has a mean of seven days,
45 #' then `mu` should be set to `7`.
46 #' @param serial Whether to return the estimated serial distribution in addition
47 #' to the estimate of R0. This must be a value identical to `TRUE` or `FALSE`.
48 #' @param grid_length The length of the grid in the grid search (defaults to
49 #' 100). This must be a positive integer. It will only be used if `mu` is set
50 #' to `NA`. The grid search will go through all combinations of the shape and
51 #' scale parameters for the gamma distribution, which are `grid_length` evenly
52 #' spaced values from `0` (exclusive) to `max_shape` and `max_scale`
53 #' (inclusive), respectively. Note that larger values will result in a longer
54 #' search time.
55 #' @param max_shape The largest possible value of the shape parameter in the
56 #' grid search (defaults to 10). This must be a positive number. It will only
57 #' be used if `mu` is set to `NA`. Note that larger values will result in a
58 #' longer search time, and may cause numerical instabilities.
59 #' @param max_scale The largest possible value of the scale parameter in the
60 #' grid search (defaults to 10). This must be a positive number. It will only
61 #' be used if `mu` is set to `NA`. Note that larger values will result in a
62 #' longer search time, and may cause numerical instabilities.
63 #'
64 #' @return If `serial` is identical to `TRUE`, a list containing the following
65 #' components is returned:
66 #' * `r0` - the estimate of R0
67 #' * `supp` - the support of the estimated serial distribution
68 #' * `pmf` - the probability mass function of the estimated serial
69 #' distribution
70 #'
71 #' Otherwise, if `serial` is identical to `FALSE`, only the estimate of R0 is
72 #' returned.
73 #'
74 #' @references
75 #' [White and Pagano (Statistics in Medicine, 2008)](
76 #' https://doi.org/10.1002/sim.3136)
77 #'
78 #' @importFrom stats pgamma qgamma
79 #'
80 #' @export
81 #'
82 #' @examples
83 #' # Weekly data.
84 #' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
85 #'
86 #' # Obtain R0 when the serial distribution has a mean of five days.
87 #' wp(cases, mu = 5 / 7)
88 #'
89 #' # Obtain R0 when the serial distribution has a mean of three days.
90 #' wp(cases, mu = 3 / 7)
91 #'
92 #' # Obtain R0 when the serial distribution is unknown.
93 #' # Note that this will take longer to run than when `mu` is known.
94 #' wp(cases)
95 #'
96 #' # Same as above, but specify custom grid search parameters. The larger any of
97 #' # the parameters, the longer the search will take, but with potentially more
98 #' # accurate estimates.
99 #' wp(cases, grid_length = 40, max_shape = 4, max_scale = 4)
100 #'
101 #' # Return the estimated serial distribution in addition to the estimate of R0.
102 #' estimate <- wp(cases, serial = TRUE)
103 #'
104 #' # Display the estimate of R0, as well as the support and probability mass
105 #' # function of the estimated serial distribution returned by the grid search.
106 #' estimate$r0
107 #' estimate$supp
108 #' estimate$pmf
109 wp <- function(cases, mu = NA, serial = FALSE,
110 grid_length = 100, max_shape = 10, max_scale = 10) {
111 if (is.na(mu)) {
112 search <- wp_search(cases, grid_length, max_shape, max_scale)
113 r0 <- search$r0
114 serial_supp <- search$supp
115 serial_pmf <- search$pmf
116 } else {
117 max_range <- ceiling(qgamma(0.999, shape = 1, scale = mu))
118 serial_supp <- seq_len(max_range)
119 serial_pmf <- diff(pgamma(0:max_range, shape = 1, scale = mu))
120 serial_pmf <- serial_pmf / sum(serial_pmf)
121 r0 <- sum(cases[-1]) / sum(wp_mu_t_sigma(cases, serial_pmf))
122 }
123
124 if (!serial) {
125 return(r0)
126 }
127 list(r0 = r0, supp = serial_supp, pmf = serial_pmf)
128 }
129
130 #' White and Pagano (WP) Grid Search
131 #'
132 #' This is a background/internal function called by [wp()]. It computes the
133 #' maximum likelihood estimator of R0 assuming that the serial distribution is
134 #' unknown (i.e., [wp()] is called with `mu` set to `NA`) but comes from a
135 #' discretized gamma distribution. The function implements a simple grid search
136 #' to obtain the maximum likelihood estimator of R0 as well as the gamma
137 #' parameters.
138 #'
139 #' @param cases Vector of case counts.
140 #' @param grid_length The length of the grid in the grid search.
141 #' @param max_shape The largest possible value of the shape parameter in the
142 #' grid search.
143 #' @param max_scale The largest possible value of the scale parameter in the
144 #' grid search.
145 #'
146 #' @return A list containing the following components is returned:
147 #' * `r0` - the estimate of R0
148 #' * `supp` - the support of the estimated serial distribution
149 #' * `pmf` - the probability mass function of the estimated serial
150 #' distribution
151 #'
152 #' @references
153 #' [White and Pagano (Statistics in Medicine, 2008)](
154 #' https://doi.org/10.1002/sim.3136)
155 #'
156 #' @seealso [wp()] for the function in which this grid search is called.
157 #'
158 #' @importFrom stats pgamma qgamma
159 #'
160 #' @noRd
161 wp_search <- function(cases, grid_length, max_shape, max_scale) {
162 shapes <- seq(0, max_shape, length.out = grid_length + 1)[-1]
163 scales <- seq(0, max_scale, length.out = grid_length + 1)[-1]
164
165 best_log_like <- -Inf
166 best_serial_pmf <- NA
167 best_max_range <- NA
168 r0 <- NA
169
170 for (i in seq_len(grid_length)) {
171 for (j in seq_len(grid_length)) {
172 max_range <- ceiling(qgamma(0.999, shape = shapes[i], scale = scales[j]))
173
174 serial_pmf <- diff(
175 pgamma(0:max_range, shape = shapes[i], scale = scales[j])
176 )
177 serial_pmf <- serial_pmf / sum(serial_pmf)
178
179 mu_t_sigma <- wp_mu_t_sigma(cases, serial_pmf)
180 mle <- sum(cases[-1]) / sum(mu_t_sigma)
181 mu_t <- mle * mu_t_sigma
182
183 log_like <- sum(cases[-1] * log(mu_t)) - sum(mu_t)
184 if (log_like > best_log_like) {
185 best_log_like <- log_like
186 best_serial_pmf <- serial_pmf
187 best_max_range <- max_range
188 r0 <- mle
189 }
190 }
191 }
192
193 list(r0 = r0, supp = seq_len(best_max_range), pmf = best_serial_pmf)
194 }
195
196 #' White and Pagano (WP) Mu Function Helper
197 #'
198 #' This is a background/internal function called by [wp()] and [wp_search()]. It
199 #' computes the sum inside the function `mu(t)`, which is present in the log
200 #' likelihood function. See the referenced article for more details.
201 #'
202 #' @param cases Vector of case counts.
203 #' @param serial_pmf The probability mass function of the serial distribution.
204 #'
205 #' @return The sum inside the function `mu(t)` of the log likelihood.
206 #'
207 #' @references
208 #' [White and Pagano (Statistics in Medicine, 2008)](
209 #' https://doi.org/10.1002/sim.3136)
210 #'
211 #' @seealso [wp()] and [wp_search()] for the functions which require this sum.
212 #'
213 #' @noRd
214 wp_mu_t_sigma <- function(cases, serial_pmf) {
215 mu_t_sigma <- rep(0, length(cases) - 1)
216 for (i in seq_len(length(cases) - 1)) {
217 mu_t_sigma[i] <- sum(
218 serial_pmf[seq_len(min(length(serial_pmf), i))] *
219 cases[i:max(1, i - length(serial_pmf) + 1)]
220 )
221 }
222 mu_t_sigma
223 }