aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorNaeem Model <me@nmode.ca>2024-01-13 19:31:08 +0000
committerNaeem Model <me@nmode.ca>2024-01-13 19:31:08 +0000
commit2a6de5fb5270f70eb83233a10bb84301c119cc16 (patch)
tree64fbe8d9d4738ef519c78327672738aa138f5960 /R
parentf5fcb4e1d46bfe8dc2d79cf4f3022f964b08a321 (diff)
Refactor seqB
Diffstat (limited to 'R')
-rw-r--r--R/seq_bayes.R167
-rw-r--r--R/server.R4
2 files changed, 79 insertions, 92 deletions
diff --git a/R/seq_bayes.R b/R/seq_bayes.R
index 1dcf927..107d428 100644
--- a/R/seq_bayes.R
+++ b/R/seq_bayes.R
@@ -1,19 +1,19 @@
-#' seqB method
+#' Sequential Bayes (seqB)
#'
#' This function implements a sequential Bayesian estimation method of R0 due to
#' Bettencourt and Riberio (PloS One, 2008). See details for important
#' implementation notes.
#'
#' The method sets a uniform prior distribution on R0 with possible values
-#' between zero and \code{kappa}, discretized to a fine grid. The distribution
-#' of R0 is then updated sequentially, with one update for each new case count
-#' observation. The final estimate of R0 is \code{Rhat}, the mean of the (last)
-#' posterior distribution. The prior distribution is the initial belief of the
+#' between `0` and `kappa`, discretized to a fine grid. The distribution of R0
+#' is then updated sequentially, with one update for each new case count
+#' observation. The final estimate of R0 is the mean of the (last) posterior
+#' distribution. The prior distribution is the initial belief of the
#' distribution of R0, which is the uninformative uniform distribution with
-#' values between zero and \code{kappa}. Users can change the value of
-#' \code{kappa} only (i.e., the prior distribution cannot be changed from the
-#' uniform). As more case counts are observed, the influence of the prior
-#' distribution should lessen on the final estimate \code{Rhat}.
+#' values between `0` and `kappa`. Users can change the value of `kappa` only
+#' (i.e., the prior distribution cannot be changed from the uniform). As more
+#' case counts are observed, the influence of the prior distribution should
+#' lessen on the final estimate.
#'
#' This method is based on an approximation of the SIR model, which is most
#' valid at the beginning of an epidemic. The method assumes that the mean of
@@ -27,103 +27,90 @@
#' counts over such periods of time. Without grouping, and in the presence of
#' zero counts, no estimate can be provided.
#'
-#' @param NT Vector of case counts.
-#' @param mu Mean of the serial distribution. This needs to match case counts in
-#' time units. For example, if case counts are weekly and the serial
-#' distribution has a mean of seven days, then \code{mu} should be set
-#' to one. If case counts are daily and the serial distribution has a
-#' mean of seven days, then \code{mu} should be set to seven.
-#' @param kappa Largest possible value of uniform prior (defaults to 20). This
-#' describes the prior belief on ranges of R0, and should be set to
-#' a higher value if R0 is believed to be larger.
+#' @param cases Vector of case counts. The vector must only contain non-negative
+#' integers, and have at least two positive integers.
+#' @param mu Mean of the serial distribution. This must be a positive number.
+#' The value should match the case counts in time units. For example, if case
+#' counts are weekly and the serial distribution has a mean of seven days,
+#' then `mu` should be set to `1`. If case counts are daily and the serial
+#' distribution has a mean of seven days, then `mu` should be set to `7`.
+#' @param kappa Largest possible value of the uniform prior (defaults to `20`).
+#' This must be a number greater than or equal to `1`. It describes the prior
+#' belief on the ranges of R0, and should be set to a higher value if R0 is
+#' believed to be larger.
+#' @param post Whether to return the posterior distribution of R0 instead of the
+#' estimate of R0 (defaults to `FALSE`). This must be a value identical to
+#' `TRUE` or `FALSE`.
#'
-#' @return \code{seqB} returns a list containing the following components:
-#' \code{Rhat} is the estimate of R0 (the posterior mean),
-#' \code{posterior} is the posterior distribution of R0 from which
-#' alternate estimates can be obtained (see examples), and \code{group}
-#' is an indicator variable (if \code{group == TRUE}, zero values of NT
-#' were input and grouping was done to obtain \code{Rhat}). The variable
-#' \code{posterior} is returned as a list made up of \code{supp} (the
-#' support of the distribution) and \code{pmf} (the probability mass
-#' function).
+#' @return If `post` is identical to `TRUE`, a list containing the following
+#' components is returned:
+#' * `supp` - the support of the posterior distribution of R0
+#' * `pmf` - the probability mass function of the posterior distribution
#'
-#' @examples
-#' # Weekly data.
-#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
+#' Otherwise, if `post` is identical to `FALSE`, only the estimate of R0 is
+#' returned. Note that the estimate is equal to `sum(supp * pmf)` (i.e., the
+#' posterior mean).
#'
-#' ## Obtain R0 when the serial distribution has a mean of five days.
-#' res1 <- seqB(NT, mu = 5 / 7)
-#' res1$Rhat
+#' @references
+#' [Bettencourt and Riberio (PloS One, 2008)](
+#' https://doi.org/10.1371/journal.pone.0002185)
#'
-#' ## Obtain R0 when the serial distribution has a mean of three days.
-#' res2 <- seqB(NT, mu = 3 / 7)
-#' res2$Rhat
+#' @export
+#'
+#' @examples
+#' # Weekly data.
+#' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
#'
-#' # Compute posterior mode instead of posterior mean and plot.
+#' # Obtain R0 when the serial distribution has a mean of five days.
+#' seq_bayes(cases, mu = 5 / 7)
#'
-#' Rpost <- res1$posterior
-#' loc <- which(Rpost$pmf == max(Rpost$pmf))
-#' Rpost$supp[loc] # Posterior mode.
-#' res1$Rhat # Compare with the posterior mean.
+#' # Obtain R0 when the serial distribution has a mean of three days.
+#' seq_bayes(cases, mu = 3 / 7)
#'
-#' par(mfrow = c(2, 1), mar = c(2, 2, 1, 1))
+#' # Obtain R0 when the serial distribution has a mean of seven days, and R0 is
+#' # believed to be at most 4.
+#' estimate <- seq_bayes(cases, mu = 1, kappa = 4)
#'
-#' plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "")
-#' abline(h = 1 / (20 / 0.01 + 1), col = "red")
-#' abline(v = res1$Rhat, col = "blue")
-#' abline(v = Rpost$supp[loc], col = "purple")
-#' legend("topright",
-#' legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"),
-#' col = c("red", "black", "blue", "purple"), lty = 1)
+#' # Same as above, but return the posterior distribution instead of the
+#' # estimate.
+#' posterior <- seq_bayes(cases, mu = 1, kappa = 4, post = TRUE)
#'
-#' @export
-seqB <- function(NT, mu, kappa = 20) {
- if (length(NT) < 2)
- print("Warning: length of NT should be at least two.")
- else {
- if (min(NT) > 0) {
- times <- 1:length(NT)
- tau <- diff(times)
- }
- group <- FALSE
- if (min(NT) == 0) {
- times <- which(NT > 0)
- NT <- NT[times]
- tau <- diff(times)
- group <- TRUE
+#' # Note that the following always holds:
+#' estimate == sum(posterior$supp * posterior$pmf)
+seq_bayes <- function(cases, mu, kappa = 20, post = FALSE) {
+ if (any(cases == 0)) {
+ times <- which(cases > 0)
+ if (length(times) < 2) {
+ stop("Vector of case counts must contain at least two positive integers.")
}
+ cases <- cases[times]
+ } else {
+ times <- seq_along(cases)
+ }
- R <- seq(0, kappa, 0.01)
- prior0 <- rep(1, kappa / 0.01 + 1)
- prior0 <- prior0 / sum(prior0)
- k <- length(NT) - 1
- R0.post <- matrix(0, nrow = k, ncol = length(R))
- prior <- prior0
- posterior <- seq(0, length(prior0))
- gamma <- 1 / mu
+ support <- seq(0, kappa, 0.01)
+ tau <- diff(times)
- for (i in 1:k) {
- mm1 <- NT[i]
- mm2 <- NT[i + 1]
- lambda <- tau[i] * gamma * (R - 1)
- lambda <- log(mm1) + lambda
- loglik <- mm2 * lambda - exp(lambda)
- maxll <- max(loglik)
- const <- 0
+ prior <- rep(1, kappa / 0.01 + 1)
+ prior <- prior / sum(prior)
+ posterior <- seq(0, length(prior))
- if (maxll > 700)
- const <- maxll - 700
+ for (i in seq_len(length(cases) - 1)) {
+ lambda <- tau[i] / mu * (support - 1) + log(cases[i])
+ log_like <- cases[i + 1] * lambda - exp(lambda)
+ max_log_like <- max(log_like)
- loglik <- loglik - const
- posterior <- exp(loglik) * prior
- posterior <- posterior / sum(posterior)
- prior <- posterior
+ if (max_log_like > 700) {
+ log_like <- log_like - max_log_like + 700
}
- Rhat <- sum(R * posterior)
+ posterior <- exp(log_like) * prior
+ posterior <- posterior / sum(posterior)
+ prior <- posterior
+ }
- return(list(Rhat = Rhat,
- posterior = list(supp = R, pmf = posterior),
- group = group))
+ if (!post) {
+ return(sum(support * posterior))
}
+ list(supp = support, pmf = posterior)
}
diff --git a/R/server.R b/R/server.R
index 4cf87db..384b341 100644
--- a/R/server.R
+++ b/R/server.R
@@ -299,8 +299,8 @@ eval_estimator <- function(input, output, estimator, dataset) {
}
# Sequential Bayes
else if (estimator$method == "seqB")
- estimate <- round(seqB(unlist(dataset[3]), mu = serial,
- kappa = estimator$kappa)$Rhat, 2)
+ estimate <- round(seq_bayes(unlist(dataset[3]), mu = serial,
+ kappa = estimator$kappa), 2)
# Incidence Decay
else if (estimator$method == "ID")
estimate <- round(id(unlist(dataset[3]), mu = serial), 2)