Update docs

[Rnaught] / R / seq_bayes.R
diff --git a/R/seq_bayes.R b/R/seq_bayes.R

index 1dcf927d4071b5d4505cff56b1aa1931cf3029ef..d486d2b8e439e34436fb3919345a56c79066f543 100644 (file)
--- a/R/seq_bayes.R
+++ b/R/seq_bayes.R
@@ -1,19 +1,19 @@
-#' seqB method
+#' Sequential Bayes (seqB)
  #'
  #' This function implements a sequential Bayesian estimation method of R0 due to
  #' Bettencourt and Riberio (PloS One, 2008). See details for important
  #' implementation notes.
  #'
  #' The method sets a uniform prior distribution on R0 with possible values
-#' between zero and \code{kappa}, discretized to a fine grid. The distribution
-#' of R0 is then updated sequentially, with one update for each new case count
-#' observation. The final estimate of R0 is \code{Rhat}, the mean of the (last)
-#' posterior distribution. The prior distribution is the initial belief of the
+#' between `0` and `kappa`, discretized to a fine grid. The distribution of R0
+#' is then updated sequentially, with one update for each new case count
+#' observation. The final estimate of R0 is the mean of the (last) posterior
+#' distribution. The prior distribution is the initial belief of the
  #' distribution of R0, which is the uninformative uniform distribution with
-#' values between zero and \code{kappa}. Users can change the value of
-#' \code{kappa} only (i.e., the prior distribution cannot be changed from the
-#' uniform). As more case counts are observed, the influence of the prior
-#' distribution should lessen on the final estimate \code{Rhat}.
+#' values between `0` and `kappa`. Users can change the value of `kappa` only
+#' (i.e., the prior distribution cannot be changed from the uniform). As more
+#' case counts are observed, the influence of the prior distribution should
+#' lessen on the final estimate.
  #'
  #' This method is based on an approximation of the SIR model, which is most
  #' valid at the beginning of an epidemic. The method assumes that the mean of
@@ -27,103 +27,96 @@
  #' counts over such periods of time. Without grouping, and in the presence of
  #' zero counts, no estimate can be provided.
  #'
-#' @param NT Vector of case counts.
-#' @param mu Mean of the serial distribution. This needs to match case counts in
-#'           time units. For example, if case counts are weekly and the serial
-#'           distribution has a mean of seven days, then \code{mu} should be set
-#'           to one. If case counts are daily and the serial distribution has a
-#'           mean of seven days, then \code{mu} should be set to seven.
-#' @param kappa Largest possible value of uniform prior (defaults to 20). This
-#'              describes the prior belief on ranges of R0, and should be set to
-#'              a higher value if R0 is believed to be larger.
-#'
-#' @return \code{seqB} returns a list containing the following components:
-#'         \code{Rhat} is the estimate of R0 (the posterior mean),
-#'         \code{posterior} is the posterior distribution of R0 from which
-#'         alternate estimates can be obtained (see examples), and \code{group}
-#'         is an indicator variable (if \code{group == TRUE}, zero values of NT
-#'         were input and grouping was done to obtain \code{Rhat}). The variable
-#'         \code{posterior} is returned as a list made up of \code{supp} (the
-#'         support of the distribution) and \code{pmf} (the probability mass
-#'         function).
+#' @param cases Vector of case counts. The vector must only contain non-negative
+#'   integers, and have at least two positive integers.
+#' @param mu Mean of the serial distribution. This must be a positive number.
+#'   The value should match the case counts in time units. For example, if case
+#'   counts are weekly and the serial distribution has a mean of seven days,
+#'   then `mu` should be set to `1`. If case counts are daily and the serial
+#'   distribution has a mean of seven days, then `mu` should be set to `7`.
+#' @param kappa Largest possible value of the uniform prior (defaults to `20`).
+#'   This must be a number greater than or equal to `1`. It describes the prior
+#'   belief on the ranges of R0, and should be set to a higher value if R0 is
+#'   believed to be larger.
+#' @param post Whether to return the posterior distribution of R0 instead of the
+#'   estimate of R0 (defaults to `FALSE`). This must be a value identical to
+#'   `TRUE` or `FALSE`.
+#'
+#' @return If `post` is identical to `TRUE`, a list containing the following
+#'   components is returned:
+#'   * `supp` - the support of the posterior distribution of R0
+#'   * `pmf` - the probability mass function of the posterior distribution of R0
+#'
+#'   Otherwise, if `post` is identical to `FALSE`, only the estimate of R0 is
+#'   returned. Note that the estimate is equal to `sum(supp * pmf)` (i.e., the
+#'   posterior mean).
+#'
+#' @references [Bettencourt and Riberio (PloS One, 2008)](
+#'   https://doi.org/10.1371/journal.pone.0002185)
+#'
+#' @seealso `vignette("seq_bayes_post", package = "Rnaught")` for examples of
+#'   using the posterior distribution.
+#'
+#' @export
  #'
  #' @examples
  #' # Weekly data.
-#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
+#' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
  #'
-#' ## Obtain R0 when the serial distribution has a mean of five days.
-#' res1 <- seqB(NT, mu = 5 / 7)
-#' res1$Rhat
+#' # Obtain R0 when the serial distribution has a mean of five days.
+#' seq_bayes(cases, mu = 5 / 7)
  #'
-#' ## Obtain R0 when the serial distribution has a mean of three days.
-#' res2 <- seqB(NT, mu = 3 / 7)
-#' res2$Rhat
+#' # Obtain R0 when the serial distribution has a mean of three days.
+#' seq_bayes(cases, mu = 3 / 7)
  #'
-#' # Compute posterior mode instead of posterior mean and plot.
+#' # Obtain R0 when the serial distribution has a mean of seven days, and R0 is
+#' # believed to be at most 4.
+#' estimate <- seq_bayes(cases, mu = 1, kappa = 4)
  #'
-#' Rpost <- res1$posterior
-#' loc <- which(Rpost$pmf == max(Rpost$pmf))
-#' Rpost$supp[loc] # Posterior mode.
-#' res1$Rhat # Compare with the posterior mean.
+#' # Same as above, but return the posterior distribution of R0 instead of the
+#' # estimate.
+#' posterior <- seq_bayes(cases, mu = 1, kappa = 4, post = TRUE)
  #'
-#' par(mfrow = c(2, 1), mar = c(2, 2, 1, 1))
+#' # Display the support and probability mass function of the posterior.
+#' posterior$supp
+#' posterior$pmf
  #'
-#' plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "")
-#' abline(h = 1 / (20 / 0.01 + 1), col = "red")
-#' abline(v = res1$Rhat, col = "blue")
-#' abline(v = Rpost$supp[loc], col = "purple")
-#' legend("topright",
-#'   legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"),
-#'   col = c("red", "black", "blue", "purple"), lty = 1)
-#'
-#' @export
-seqB <- function(NT, mu, kappa = 20) {
-  if (length(NT) < 2)
-    print("Warning: length of NT should be at least two.")
-  else {
-    if (min(NT) > 0) {
-      times <- 1:length(NT)
-      tau <- diff(times)
-    }
-    group <- FALSE
-    if (min(NT) == 0) {
-      times <- which(NT > 0)
-      NT <- NT[times]
-      tau <- diff(times)
-      group <- TRUE
+#' # Note that the following always holds:
+#' estimate == sum(posterior$supp * posterior$pmf)
+seq_bayes <- function(cases, mu, kappa = 20, post = FALSE) {
+  if (any(cases == 0)) {
+    times <- which(cases > 0)
+    if (length(times) < 2) {
+      stop("Vector of case counts must contain at least two positive integers.")
      }
+    cases <- cases[times]
+  } else {
+    times <- seq_along(cases)
+  }
  
-    R <- seq(0, kappa, 0.01)
-    prior0 <- rep(1, kappa / 0.01 + 1)
-    prior0 <- prior0 / sum(prior0)
-    k <- length(NT) - 1
-    R0.post <- matrix(0, nrow = k, ncol = length(R))
-    prior <- prior0
-    posterior <- seq(0, length(prior0))
-    gamma <- 1 / mu
+  support <- seq(0, kappa, 0.01)
+  tau <- diff(times)
  
-    for (i in 1:k) {
-      mm1 <- NT[i]
-      mm2 <- NT[i + 1]
-      lambda <- tau[i] * gamma * (R - 1)
-      lambda <- log(mm1) + lambda
-      loglik <- mm2 * lambda - exp(lambda)
-      maxll <- max(loglik)
-      const <- 0
+  prior <- rep(1, kappa / 0.01 + 1)
+  prior <- prior / sum(prior)
+  posterior <- seq(0, length(prior))
  
-      if (maxll > 700)
-        const <- maxll - 700
+  for (i in seq_len(length(cases) - 1)) {
+    lambda <- tau[i] / mu * (support - 1) + log(cases[i])
+    log_like <- cases[i + 1] * lambda - exp(lambda)
+    max_log_like <- max(log_like)
  
-      loglik <- loglik - const
-      posterior <- exp(loglik) * prior
-      posterior <- posterior / sum(posterior)
-      prior <- posterior
+    if (max_log_like > 700) {
+      log_like <- log_like - max_log_like + 700
      }
  
-    Rhat <- sum(R * posterior)
+    posterior <- exp(log_like) * prior
+    posterior <- posterior / sum(posterior)
+    prior <- posterior
+  }
  
-    return(list(Rhat = Rhat,
-                posterior = list(supp = R, pmf = posterior),
-                group = group))
+  if (!post) {
+    return(sum(support * posterior))
    }
+  list(supp = support, pmf = posterior)
  }