From 2a6de5fb5270f70eb83233a10bb84301c119cc16 Mon Sep 17 00:00:00 2001 From: Naeem Model Date: Sat, 13 Jan 2024 19:31:08 +0000 Subject: Refactor seqB --- R/seq_bayes.R | 167 +++++++++++++++++++++++++++------------------------------- R/server.R | 4 +- 2 files changed, 79 insertions(+), 92 deletions(-) (limited to 'R') diff --git a/R/seq_bayes.R b/R/seq_bayes.R index 1dcf927..107d428 100644 --- a/R/seq_bayes.R +++ b/R/seq_bayes.R @@ -1,19 +1,19 @@ -#' seqB method +#' Sequential Bayes (seqB) #' #' This function implements a sequential Bayesian estimation method of R0 due to #' Bettencourt and Riberio (PloS One, 2008). See details for important #' implementation notes. #' #' The method sets a uniform prior distribution on R0 with possible values -#' between zero and \code{kappa}, discretized to a fine grid. The distribution -#' of R0 is then updated sequentially, with one update for each new case count -#' observation. The final estimate of R0 is \code{Rhat}, the mean of the (last) -#' posterior distribution. The prior distribution is the initial belief of the +#' between `0` and `kappa`, discretized to a fine grid. The distribution of R0 +#' is then updated sequentially, with one update for each new case count +#' observation. The final estimate of R0 is the mean of the (last) posterior +#' distribution. The prior distribution is the initial belief of the #' distribution of R0, which is the uninformative uniform distribution with -#' values between zero and \code{kappa}. Users can change the value of -#' \code{kappa} only (i.e., the prior distribution cannot be changed from the -#' uniform). As more case counts are observed, the influence of the prior -#' distribution should lessen on the final estimate \code{Rhat}. +#' values between `0` and `kappa`. Users can change the value of `kappa` only +#' (i.e., the prior distribution cannot be changed from the uniform). As more +#' case counts are observed, the influence of the prior distribution should +#' lessen on the final estimate. #' #' This method is based on an approximation of the SIR model, which is most #' valid at the beginning of an epidemic. The method assumes that the mean of @@ -27,103 +27,90 @@ #' counts over such periods of time. Without grouping, and in the presence of #' zero counts, no estimate can be provided. #' -#' @param NT Vector of case counts. -#' @param mu Mean of the serial distribution. This needs to match case counts in -#' time units. For example, if case counts are weekly and the serial -#' distribution has a mean of seven days, then \code{mu} should be set -#' to one. If case counts are daily and the serial distribution has a -#' mean of seven days, then \code{mu} should be set to seven. -#' @param kappa Largest possible value of uniform prior (defaults to 20). This -#' describes the prior belief on ranges of R0, and should be set to -#' a higher value if R0 is believed to be larger. +#' @param cases Vector of case counts. The vector must only contain non-negative +#' integers, and have at least two positive integers. +#' @param mu Mean of the serial distribution. This must be a positive number. +#' The value should match the case counts in time units. For example, if case +#' counts are weekly and the serial distribution has a mean of seven days, +#' then `mu` should be set to `1`. If case counts are daily and the serial +#' distribution has a mean of seven days, then `mu` should be set to `7`. +#' @param kappa Largest possible value of the uniform prior (defaults to `20`). +#' This must be a number greater than or equal to `1`. It describes the prior +#' belief on the ranges of R0, and should be set to a higher value if R0 is +#' believed to be larger. +#' @param post Whether to return the posterior distribution of R0 instead of the +#' estimate of R0 (defaults to `FALSE`). This must be a value identical to +#' `TRUE` or `FALSE`. #' -#' @return \code{seqB} returns a list containing the following components: -#' \code{Rhat} is the estimate of R0 (the posterior mean), -#' \code{posterior} is the posterior distribution of R0 from which -#' alternate estimates can be obtained (see examples), and \code{group} -#' is an indicator variable (if \code{group == TRUE}, zero values of NT -#' were input and grouping was done to obtain \code{Rhat}). The variable -#' \code{posterior} is returned as a list made up of \code{supp} (the -#' support of the distribution) and \code{pmf} (the probability mass -#' function). +#' @return If `post` is identical to `TRUE`, a list containing the following +#' components is returned: +#' * `supp` - the support of the posterior distribution of R0 +#' * `pmf` - the probability mass function of the posterior distribution #' -#' @examples -#' # Weekly data. -#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) +#' Otherwise, if `post` is identical to `FALSE`, only the estimate of R0 is +#' returned. Note that the estimate is equal to `sum(supp * pmf)` (i.e., the +#' posterior mean). #' -#' ## Obtain R0 when the serial distribution has a mean of five days. -#' res1 <- seqB(NT, mu = 5 / 7) -#' res1$Rhat +#' @references +#' [Bettencourt and Riberio (PloS One, 2008)]( +#' https://doi.org/10.1371/journal.pone.0002185) #' -#' ## Obtain R0 when the serial distribution has a mean of three days. -#' res2 <- seqB(NT, mu = 3 / 7) -#' res2$Rhat +#' @export +#' +#' @examples +#' # Weekly data. +#' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) #' -#' # Compute posterior mode instead of posterior mean and plot. +#' # Obtain R0 when the serial distribution has a mean of five days. +#' seq_bayes(cases, mu = 5 / 7) #' -#' Rpost <- res1$posterior -#' loc <- which(Rpost$pmf == max(Rpost$pmf)) -#' Rpost$supp[loc] # Posterior mode. -#' res1$Rhat # Compare with the posterior mean. +#' # Obtain R0 when the serial distribution has a mean of three days. +#' seq_bayes(cases, mu = 3 / 7) #' -#' par(mfrow = c(2, 1), mar = c(2, 2, 1, 1)) +#' # Obtain R0 when the serial distribution has a mean of seven days, and R0 is +#' # believed to be at most 4. +#' estimate <- seq_bayes(cases, mu = 1, kappa = 4) #' -#' plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "") -#' abline(h = 1 / (20 / 0.01 + 1), col = "red") -#' abline(v = res1$Rhat, col = "blue") -#' abline(v = Rpost$supp[loc], col = "purple") -#' legend("topright", -#' legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"), -#' col = c("red", "black", "blue", "purple"), lty = 1) +#' # Same as above, but return the posterior distribution instead of the +#' # estimate. +#' posterior <- seq_bayes(cases, mu = 1, kappa = 4, post = TRUE) #' -#' @export -seqB <- function(NT, mu, kappa = 20) { - if (length(NT) < 2) - print("Warning: length of NT should be at least two.") - else { - if (min(NT) > 0) { - times <- 1:length(NT) - tau <- diff(times) - } - group <- FALSE - if (min(NT) == 0) { - times <- which(NT > 0) - NT <- NT[times] - tau <- diff(times) - group <- TRUE +#' # Note that the following always holds: +#' estimate == sum(posterior$supp * posterior$pmf) +seq_bayes <- function(cases, mu, kappa = 20, post = FALSE) { + if (any(cases == 0)) { + times <- which(cases > 0) + if (length(times) < 2) { + stop("Vector of case counts must contain at least two positive integers.") } + cases <- cases[times] + } else { + times <- seq_along(cases) + } - R <- seq(0, kappa, 0.01) - prior0 <- rep(1, kappa / 0.01 + 1) - prior0 <- prior0 / sum(prior0) - k <- length(NT) - 1 - R0.post <- matrix(0, nrow = k, ncol = length(R)) - prior <- prior0 - posterior <- seq(0, length(prior0)) - gamma <- 1 / mu + support <- seq(0, kappa, 0.01) + tau <- diff(times) - for (i in 1:k) { - mm1 <- NT[i] - mm2 <- NT[i + 1] - lambda <- tau[i] * gamma * (R - 1) - lambda <- log(mm1) + lambda - loglik <- mm2 * lambda - exp(lambda) - maxll <- max(loglik) - const <- 0 + prior <- rep(1, kappa / 0.01 + 1) + prior <- prior / sum(prior) + posterior <- seq(0, length(prior)) - if (maxll > 700) - const <- maxll - 700 + for (i in seq_len(length(cases) - 1)) { + lambda <- tau[i] / mu * (support - 1) + log(cases[i]) + log_like <- cases[i + 1] * lambda - exp(lambda) + max_log_like <- max(log_like) - loglik <- loglik - const - posterior <- exp(loglik) * prior - posterior <- posterior / sum(posterior) - prior <- posterior + if (max_log_like > 700) { + log_like <- log_like - max_log_like + 700 } - Rhat <- sum(R * posterior) + posterior <- exp(log_like) * prior + posterior <- posterior / sum(posterior) + prior <- posterior + } - return(list(Rhat = Rhat, - posterior = list(supp = R, pmf = posterior), - group = group)) + if (!post) { + return(sum(support * posterior)) } + list(supp = support, pmf = posterior) } diff --git a/R/server.R b/R/server.R index 4cf87db..384b341 100644 --- a/R/server.R +++ b/R/server.R @@ -299,8 +299,8 @@ eval_estimator <- function(input, output, estimator, dataset) { } # Sequential Bayes else if (estimator$method == "seqB") - estimate <- round(seqB(unlist(dataset[3]), mu = serial, - kappa = estimator$kappa)$Rhat, 2) + estimate <- round(seq_bayes(unlist(dataset[3]), mu = serial, + kappa = estimator$kappa), 2) # Incidence Decay else if (estimator$method == "ID") estimate <- round(id(unlist(dataset[3]), mu = serial), 2) -- cgit v1.2.3