]> nmode's Git Repositories - Rnaught/blob - R/seqB.R
a64b598a4d653535a49c6c17a47f7dbacfedc9a8
[Rnaught] / R / seqB.R
1 #' seqB method
2 #'
3 #' This function implements a sequential Bayesian estimation method of R0 due to Bettencourt and Riberio (PloS One, 2008).
4 #' See details for important implementation notes.
5 #'
6 #' The method sets a uniform prior distribution on R0 with possible values between zero and \code{kappa}, discretized to a fine grid.
7 #' The distribution of R0 is then updated sequentially, with one update for each new case count observation.
8 #' The final estimate of R0 is \code{Rhat}, the mean of the (last) posterior distribution.
9 #' The prior distribution is the initial belief of the distribution of R0; which in this implementation is the uninformative uniform
10 #' distribution with values between zero and \code{kappa}. Users can change the value of /code{kappa} only (i.e., the prior distribution
11 #' cannot be changed from the uniform). As more case counts are observed, the influence of the prior distribution should lessen on
12 #' the final estimate \code{Rhat}.
13 #'
14 #' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes
15 #' that the mean of the serial distribution (sometimes called the serial interval) is known. The final estimate can be quite sensitive
16 #' to this value, so sensitivity testing is strongly recommended. Users should be careful about units of time (e.g., are counts observed
17 #' daily or weekly?) when implementing.
18 #'
19 #' Our code has been modified to provide an estimate even if case counts equal to zero are present in some time intervals. This is done
20 #' by grouping the counts over such periods of time. Without grouping, and in the presence of zero counts, no estimate can be provided.
21 #'
22 #' @param NT Vector of case counts.
23 #' @param mu Mean of the serial distribution. This needs to match case counts in time units. For example, if case counts
24 #' are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one. If case
25 #' counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven.
26 #' @param kappa Largest possible value of uniform prior (defaults to 20). This describes the prior belief on ranges of R0,
27 #' and should be set to a higher value if R0 is believed to be larger.
28 #'
29 #' @return \code{secB} returns a list containing the following components: \code{Rhat} is the estimate of R0 (the posterior mean),
30 #' \code{posterior} is the posterior distribution of R0 from which alternate estimates can be obtained (see examples),
31 #' and \code{group} is an indicator variable (if \code{group=TRUE}, zero values of NT were input and grouping was done
32 #' to obtain \code{Rhat}). The variable \code{posterior} is returned as a list made up of \code{supp} (the support of
33 #' the distribution) and \code{pmf} (the probability mass function).
34 #'
35 #' @examples
36 #' ## ===================================================== ##
37 #' ## Illustrate on weekly data ##
38 #' ## ===================================================== ##
39 #'
40 #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
41 #' ## obtain Rhat when serial distribution has mean of five days
42 #' res1 <- seqB(NT=NT, mu=5/7)
43 #' res1$Rhat
44 #' ## obtain Rhat when serial distribution has mean of three days
45 #' res2 <- seqB(NT=NT, mu=3/7)
46 #' res2$Rhat
47 #'
48 #' ## ============================================================= ##
49 #' ## Compute posterior mode instead of posterior mean and plot ##
50 #' ## ============================================================= ##
51 #'
52 #' Rpost <- res1$posterior
53 #' loc <- which(Rpost$pmf == max(Rpost$pmf))
54 #' Rpost$supp[loc] # posterior mode
55 #' res1$Rhat # compare with posterior mean
56 #'
57 #' par(mfrow=c(2, 1), mar=c(2, 2, 1, 1))
58 #' plot(Rpost$supp, Rpost$pmf, col="black", type="l", xlab="", ylab="")
59 #' abline(h=1/(20/0.01+1), col="red")
60 #' abline(v=res1$Rhat, col="blue")
61 #' abline(v=Rpost$supp[loc], col="purple")
62 #' legend("topright", legend=c("prior", "posterior", "posterior mean (Rhat)", "posterior mode"), col=c("red", "black", "blue", "purple"), lty=1)
63 #' plot(Rpost$supp, Rpost$pmf, col="black", type="l", xlim=c(0.5, 1.5), xlab="", ylab="")
64 #' abline(h=1/(20/0.01+1), col="red")
65 #' abline(v=res1$Rhat, col="blue")
66 #' abline(v=Rpost$supp[loc], col="purple")
67 #' legend("topright", legend=c("prior", "posterior", "posterior mean (Rhat)", "posterior mode"), col=c("red", "black", "blue", "purple"), lty=1)
68 #'
69 #' ## ========================================================= ##
70 #' ## Compute Rhat using only the first five weeks of data ##
71 #' ## ========================================================= ##
72 #'
73 #' res3 <- seqB(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days
74 #' res3$Rhat
75 #'
76 #' @export
77 seqB <- function(NT, mu, kappa=20) {
78 if (length(NT) < 2)
79 print("Warning: length of NT should be at least two.")
80 else {
81 if (min(NT) > 0) {
82 times <- 1:length(NT)
83 tau <- diff(times)
84 }
85 group <- FALSE
86 if (min(NT) == 0) {
87 times <- which(NT > 0)
88 NT <- NT[times]
89 tau <- diff(times)
90 group <- TRUE
91 }
92
93 R <- seq(0, kappa, 0.01)
94 prior0 <- rep(1, kappa / 0.01 + 1)
95 prior0 <- prior0 / sum(prior0)
96 k <- length(NT) - 1
97 R0.post <- matrix(0, nrow=k, ncol=length(R))
98 prior <- prior0
99 posterior <- seq(0, length(prior0))
100 gamma <- 1 / mu
101
102 for (i in 1:k) {
103 mm1 <- NT[i]
104 mm2 <- NT[i+1]
105 lambda <- tau[i] * gamma * (R - 1)
106 lambda <- log(mm1) + lambda
107 loglik <- mm2 * lambda - exp(lambda)
108 maxll <- max(loglik)
109 const <- 0
110
111 if (maxll > 700)
112 const <- maxll - 700
113
114 loglik <- loglik-const
115 posterior <- exp(loglik) * prior
116 posterior <- posterior / sum(posterior)
117 prior <- posterior
118 }
119
120 Rhat <- sum(R * posterior)
121
122 return(list(Rhat=Rhat, posterior=list(supp=R, pmf=posterior), group=group))
123 }
124 }