From 2a6de5fb5270f70eb83233a10bb84301c119cc16 Mon Sep 17 00:00:00 2001
From: Naeem Model <me@nmode.ca>
Date: Sat, 13 Jan 2024 19:31:08 +0000
Subject: Refactor seqB

---
 R/seq_bayes.R | 167 +++++++++++++++++++++++++++-------------------------------
 R/server.R    |   4 +-
 2 files changed, 79 insertions(+), 92 deletions(-)

(limited to 'R')

diff --git a/R/seq_bayes.R b/R/seq_bayes.R
index 1dcf927..107d428 100644
--- a/R/seq_bayes.R
+++ b/R/seq_bayes.R
@@ -1,19 +1,19 @@
-#' seqB method
+#' Sequential Bayes (seqB)
 #'
 #' This function implements a sequential Bayesian estimation method of R0 due to
 #' Bettencourt and Riberio (PloS One, 2008). See details for important
 #' implementation notes.
 #'
 #' The method sets a uniform prior distribution on R0 with possible values
-#' between zero and \code{kappa}, discretized to a fine grid. The distribution
-#' of R0 is then updated sequentially, with one update for each new case count
-#' observation. The final estimate of R0 is \code{Rhat}, the mean of the (last)
-#' posterior distribution. The prior distribution is the initial belief of the
+#' between `0` and `kappa`, discretized to a fine grid. The distribution of R0
+#' is then updated sequentially, with one update for each new case count
+#' observation. The final estimate of R0 is the mean of the (last) posterior
+#' distribution. The prior distribution is the initial belief of the
 #' distribution of R0, which is the uninformative uniform distribution with
-#' values between zero and \code{kappa}. Users can change the value of
-#' \code{kappa} only (i.e., the prior distribution cannot be changed from the
-#' uniform). As more case counts are observed, the influence of the prior
-#' distribution should lessen on the final estimate \code{Rhat}.
+#' values between `0` and `kappa`. Users can change the value of `kappa` only
+#' (i.e., the prior distribution cannot be changed from the uniform). As more
+#' case counts are observed, the influence of the prior distribution should
+#' lessen on the final estimate.
 #'
 #' This method is based on an approximation of the SIR model, which is most
 #' valid at the beginning of an epidemic. The method assumes that the mean of
@@ -27,103 +27,90 @@
 #' counts over such periods of time. Without grouping, and in the presence of
 #' zero counts, no estimate can be provided.
 #'
-#' @param NT Vector of case counts.
-#' @param mu Mean of the serial distribution. This needs to match case counts in
-#'           time units. For example, if case counts are weekly and the serial
-#'           distribution has a mean of seven days, then \code{mu} should be set
-#'           to one. If case counts are daily and the serial distribution has a
-#'           mean of seven days, then \code{mu} should be set to seven.
-#' @param kappa Largest possible value of uniform prior (defaults to 20). This
-#'              describes the prior belief on ranges of R0, and should be set to
-#'              a higher value if R0 is believed to be larger.
+#' @param cases Vector of case counts. The vector must only contain non-negative
+#'   integers, and have at least two positive integers.
+#' @param mu Mean of the serial distribution. This must be a positive number.
+#'   The value should match the case counts in time units. For example, if case
+#'   counts are weekly and the serial distribution has a mean of seven days,
+#'   then `mu` should be set to `1`. If case counts are daily and the serial
+#'   distribution has a mean of seven days, then `mu` should be set to `7`.
+#' @param kappa Largest possible value of the uniform prior (defaults to `20`).
+#'   This must be a number greater than or equal to `1`. It describes the prior
+#'   belief on the ranges of R0, and should be set to a higher value if R0 is
+#'   believed to be larger.
+#' @param post Whether to return the posterior distribution of R0 instead of the
+#'   estimate of R0 (defaults to `FALSE`). This must be a value identical to
+#'   `TRUE` or `FALSE`.
 #'
-#' @return \code{seqB} returns a list containing the following components:
-#'         \code{Rhat} is the estimate of R0 (the posterior mean),
-#'         \code{posterior} is the posterior distribution of R0 from which
-#'         alternate estimates can be obtained (see examples), and \code{group}
-#'         is an indicator variable (if \code{group == TRUE}, zero values of NT
-#'         were input and grouping was done to obtain \code{Rhat}). The variable
-#'         \code{posterior} is returned as a list made up of \code{supp} (the
-#'         support of the distribution) and \code{pmf} (the probability mass
-#'         function).
+#' @return If `post` is identical to `TRUE`, a list containing the following
+#'   components is returned:
+#'   * `supp` - the support of the posterior distribution of R0
+#'   * `pmf` - the probability mass function of the posterior distribution
 #'
-#' @examples
-#' # Weekly data.
-#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
+#'   Otherwise, if `post` is identical to `FALSE`, only the estimate of R0 is
+#'   returned. Note that the estimate is equal to `sum(supp * pmf)` (i.e., the
+#'   posterior mean).
 #'
-#' ## Obtain R0 when the serial distribution has a mean of five days.
-#' res1 <- seqB(NT, mu = 5 / 7)
-#' res1$Rhat
+#' @references
+#' [Bettencourt and Riberio (PloS One, 2008)](
+#' https://doi.org/10.1371/journal.pone.0002185)
 #'
-#' ## Obtain R0 when the serial distribution has a mean of three days.
-#' res2 <- seqB(NT, mu = 3 / 7)
-#' res2$Rhat
+#' @export
+#'
+#' @examples
+#' # Weekly data.
+#' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)
 #'
-#' # Compute posterior mode instead of posterior mean and plot.
+#' # Obtain R0 when the serial distribution has a mean of five days.
+#' seq_bayes(cases, mu = 5 / 7)
 #'
-#' Rpost <- res1$posterior
-#' loc <- which(Rpost$pmf == max(Rpost$pmf))
-#' Rpost$supp[loc] # Posterior mode.
-#' res1$Rhat # Compare with the posterior mean.
+#' # Obtain R0 when the serial distribution has a mean of three days.
+#' seq_bayes(cases, mu = 3 / 7)
 #'
-#' par(mfrow = c(2, 1), mar = c(2, 2, 1, 1))
+#' # Obtain R0 when the serial distribution has a mean of seven days, and R0 is
+#' # believed to be at most 4.
+#' estimate <- seq_bayes(cases, mu = 1, kappa = 4)
 #'
-#' plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "")
-#' abline(h = 1 / (20 / 0.01 + 1), col = "red")
-#' abline(v = res1$Rhat, col = "blue")
-#' abline(v = Rpost$supp[loc], col = "purple")
-#' legend("topright",
-#'   legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"),
-#'   col = c("red", "black", "blue", "purple"), lty = 1)
+#' # Same as above, but return the posterior distribution instead of the
+#' # estimate.
+#' posterior <- seq_bayes(cases, mu = 1, kappa = 4, post = TRUE)
 #'
-#' @export
-seqB <- function(NT, mu, kappa = 20) {
-  if (length(NT) < 2)
-    print("Warning: length of NT should be at least two.")
-  else {
-    if (min(NT) > 0) {
-      times <- 1:length(NT)
-      tau <- diff(times)
-    }
-    group <- FALSE
-    if (min(NT) == 0) {
-      times <- which(NT > 0)
-      NT <- NT[times]
-      tau <- diff(times)
-      group <- TRUE
+#' # Note that the following always holds:
+#' estimate == sum(posterior$supp * posterior$pmf)
+seq_bayes <- function(cases, mu, kappa = 20, post = FALSE) {
+  if (any(cases == 0)) {
+    times <- which(cases > 0)
+    if (length(times) < 2) {
+      stop("Vector of case counts must contain at least two positive integers.")
     }
+    cases <- cases[times]
+  } else {
+    times <- seq_along(cases)
+  }
 
-    R <- seq(0, kappa, 0.01)
-    prior0 <- rep(1, kappa / 0.01 + 1)
-    prior0 <- prior0 / sum(prior0)
-    k <- length(NT) - 1
-    R0.post <- matrix(0, nrow = k, ncol = length(R))
-    prior <- prior0
-    posterior <- seq(0, length(prior0))
-    gamma <- 1 / mu
+  support <- seq(0, kappa, 0.01)
+  tau <- diff(times)
 
-    for (i in 1:k) {
-      mm1 <- NT[i]
-      mm2 <- NT[i + 1]
-      lambda <- tau[i] * gamma * (R - 1)
-      lambda <- log(mm1) + lambda
-      loglik <- mm2 * lambda - exp(lambda)
-      maxll <- max(loglik)
-      const <- 0
+  prior <- rep(1, kappa / 0.01 + 1)
+  prior <- prior / sum(prior)
+  posterior <- seq(0, length(prior))
 
-      if (maxll > 700)
-        const <- maxll - 700
+  for (i in seq_len(length(cases) - 1)) {
+    lambda <- tau[i] / mu * (support - 1) + log(cases[i])
+    log_like <- cases[i + 1] * lambda - exp(lambda)
+    max_log_like <- max(log_like)
 
-      loglik <- loglik - const
-      posterior <- exp(loglik) * prior
-      posterior <- posterior / sum(posterior)
-      prior <- posterior
+    if (max_log_like > 700) {
+      log_like <- log_like - max_log_like + 700
     }
 
-    Rhat <- sum(R * posterior)
+    posterior <- exp(log_like) * prior
+    posterior <- posterior / sum(posterior)
+    prior <- posterior
+  }
 
-    return(list(Rhat = Rhat,
-                posterior = list(supp = R, pmf = posterior),
-                group = group))
+  if (!post) {
+    return(sum(support * posterior))
   }
+  list(supp = support, pmf = posterior)
 }
diff --git a/R/server.R b/R/server.R
index 4cf87db..384b341 100644
--- a/R/server.R
+++ b/R/server.R
@@ -299,8 +299,8 @@ eval_estimator <- function(input, output, estimator, dataset) {
   }
   # Sequential Bayes
   else if (estimator$method == "seqB")
-    estimate <- round(seqB(unlist(dataset[3]), mu = serial,
-                           kappa = estimator$kappa)$Rhat, 2)
+    estimate <- round(seq_bayes(unlist(dataset[3]), mu = serial,
+                           kappa = estimator$kappa), 2)
   # Incidence Decay
   else if (estimator$method == "ID")
     estimate <- round(id(unlist(dataset[3]), mu = serial), 2)
-- 
cgit v1.2.3