From 2a6de5fb5270f70eb83233a10bb84301c119cc16 Mon Sep 17 00:00:00 2001 From: Naeem Model Date: Sat, 13 Jan 2024 19:31:08 +0000 Subject: Refactor seqB --- NAMESPACE | 2 +- R/seq_bayes.R | 167 +++++++++++++++++++++++++------------------------------ R/server.R | 4 +- man/seq_bayes.Rd | 107 +++++++++++++++++------------------ 4 files changed, 134 insertions(+), 146 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index dc19a2c..e57b2b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,7 @@ export(WP) export(app) export(id) export(idea) -export(seqB) +export(seq_bayes) importFrom(methods,is) importFrom(stats,pexp) importFrom(stats,pgamma) diff --git a/R/seq_bayes.R b/R/seq_bayes.R index 1dcf927..107d428 100644 --- a/R/seq_bayes.R +++ b/R/seq_bayes.R @@ -1,19 +1,19 @@ -#' seqB method +#' Sequential Bayes (seqB) #' #' This function implements a sequential Bayesian estimation method of R0 due to #' Bettencourt and Riberio (PloS One, 2008). See details for important #' implementation notes. #' #' The method sets a uniform prior distribution on R0 with possible values -#' between zero and \code{kappa}, discretized to a fine grid. The distribution -#' of R0 is then updated sequentially, with one update for each new case count -#' observation. The final estimate of R0 is \code{Rhat}, the mean of the (last) -#' posterior distribution. The prior distribution is the initial belief of the +#' between `0` and `kappa`, discretized to a fine grid. The distribution of R0 +#' is then updated sequentially, with one update for each new case count +#' observation. The final estimate of R0 is the mean of the (last) posterior +#' distribution. The prior distribution is the initial belief of the #' distribution of R0, which is the uninformative uniform distribution with -#' values between zero and \code{kappa}. Users can change the value of -#' \code{kappa} only (i.e., the prior distribution cannot be changed from the -#' uniform). As more case counts are observed, the influence of the prior -#' distribution should lessen on the final estimate \code{Rhat}. +#' values between `0` and `kappa`. Users can change the value of `kappa` only +#' (i.e., the prior distribution cannot be changed from the uniform). As more +#' case counts are observed, the influence of the prior distribution should +#' lessen on the final estimate. #' #' This method is based on an approximation of the SIR model, which is most #' valid at the beginning of an epidemic. The method assumes that the mean of @@ -27,103 +27,90 @@ #' counts over such periods of time. Without grouping, and in the presence of #' zero counts, no estimate can be provided. #' -#' @param NT Vector of case counts. -#' @param mu Mean of the serial distribution. This needs to match case counts in -#' time units. For example, if case counts are weekly and the serial -#' distribution has a mean of seven days, then \code{mu} should be set -#' to one. If case counts are daily and the serial distribution has a -#' mean of seven days, then \code{mu} should be set to seven. -#' @param kappa Largest possible value of uniform prior (defaults to 20). This -#' describes the prior belief on ranges of R0, and should be set to -#' a higher value if R0 is believed to be larger. +#' @param cases Vector of case counts. The vector must only contain non-negative +#' integers, and have at least two positive integers. +#' @param mu Mean of the serial distribution. This must be a positive number. +#' The value should match the case counts in time units. For example, if case +#' counts are weekly and the serial distribution has a mean of seven days, +#' then `mu` should be set to `1`. If case counts are daily and the serial +#' distribution has a mean of seven days, then `mu` should be set to `7`. +#' @param kappa Largest possible value of the uniform prior (defaults to `20`). +#' This must be a number greater than or equal to `1`. It describes the prior +#' belief on the ranges of R0, and should be set to a higher value if R0 is +#' believed to be larger. +#' @param post Whether to return the posterior distribution of R0 instead of the +#' estimate of R0 (defaults to `FALSE`). This must be a value identical to +#' `TRUE` or `FALSE`. #' -#' @return \code{seqB} returns a list containing the following components: -#' \code{Rhat} is the estimate of R0 (the posterior mean), -#' \code{posterior} is the posterior distribution of R0 from which -#' alternate estimates can be obtained (see examples), and \code{group} -#' is an indicator variable (if \code{group == TRUE}, zero values of NT -#' were input and grouping was done to obtain \code{Rhat}). The variable -#' \code{posterior} is returned as a list made up of \code{supp} (the -#' support of the distribution) and \code{pmf} (the probability mass -#' function). +#' @return If `post` is identical to `TRUE`, a list containing the following +#' components is returned: +#' * `supp` - the support of the posterior distribution of R0 +#' * `pmf` - the probability mass function of the posterior distribution #' -#' @examples -#' # Weekly data. -#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) +#' Otherwise, if `post` is identical to `FALSE`, only the estimate of R0 is +#' returned. Note that the estimate is equal to `sum(supp * pmf)` (i.e., the +#' posterior mean). #' -#' ## Obtain R0 when the serial distribution has a mean of five days. -#' res1 <- seqB(NT, mu = 5 / 7) -#' res1$Rhat +#' @references +#' [Bettencourt and Riberio (PloS One, 2008)]( +#' https://doi.org/10.1371/journal.pone.0002185) #' -#' ## Obtain R0 when the serial distribution has a mean of three days. -#' res2 <- seqB(NT, mu = 3 / 7) -#' res2$Rhat +#' @export +#' +#' @examples +#' # Weekly data. +#' cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) #' -#' # Compute posterior mode instead of posterior mean and plot. +#' # Obtain R0 when the serial distribution has a mean of five days. +#' seq_bayes(cases, mu = 5 / 7) #' -#' Rpost <- res1$posterior -#' loc <- which(Rpost$pmf == max(Rpost$pmf)) -#' Rpost$supp[loc] # Posterior mode. -#' res1$Rhat # Compare with the posterior mean. +#' # Obtain R0 when the serial distribution has a mean of three days. +#' seq_bayes(cases, mu = 3 / 7) #' -#' par(mfrow = c(2, 1), mar = c(2, 2, 1, 1)) +#' # Obtain R0 when the serial distribution has a mean of seven days, and R0 is +#' # believed to be at most 4. +#' estimate <- seq_bayes(cases, mu = 1, kappa = 4) #' -#' plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "") -#' abline(h = 1 / (20 / 0.01 + 1), col = "red") -#' abline(v = res1$Rhat, col = "blue") -#' abline(v = Rpost$supp[loc], col = "purple") -#' legend("topright", -#' legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"), -#' col = c("red", "black", "blue", "purple"), lty = 1) +#' # Same as above, but return the posterior distribution instead of the +#' # estimate. +#' posterior <- seq_bayes(cases, mu = 1, kappa = 4, post = TRUE) #' -#' @export -seqB <- function(NT, mu, kappa = 20) { - if (length(NT) < 2) - print("Warning: length of NT should be at least two.") - else { - if (min(NT) > 0) { - times <- 1:length(NT) - tau <- diff(times) - } - group <- FALSE - if (min(NT) == 0) { - times <- which(NT > 0) - NT <- NT[times] - tau <- diff(times) - group <- TRUE +#' # Note that the following always holds: +#' estimate == sum(posterior$supp * posterior$pmf) +seq_bayes <- function(cases, mu, kappa = 20, post = FALSE) { + if (any(cases == 0)) { + times <- which(cases > 0) + if (length(times) < 2) { + stop("Vector of case counts must contain at least two positive integers.") } + cases <- cases[times] + } else { + times <- seq_along(cases) + } - R <- seq(0, kappa, 0.01) - prior0 <- rep(1, kappa / 0.01 + 1) - prior0 <- prior0 / sum(prior0) - k <- length(NT) - 1 - R0.post <- matrix(0, nrow = k, ncol = length(R)) - prior <- prior0 - posterior <- seq(0, length(prior0)) - gamma <- 1 / mu + support <- seq(0, kappa, 0.01) + tau <- diff(times) - for (i in 1:k) { - mm1 <- NT[i] - mm2 <- NT[i + 1] - lambda <- tau[i] * gamma * (R - 1) - lambda <- log(mm1) + lambda - loglik <- mm2 * lambda - exp(lambda) - maxll <- max(loglik) - const <- 0 + prior <- rep(1, kappa / 0.01 + 1) + prior <- prior / sum(prior) + posterior <- seq(0, length(prior)) - if (maxll > 700) - const <- maxll - 700 + for (i in seq_len(length(cases) - 1)) { + lambda <- tau[i] / mu * (support - 1) + log(cases[i]) + log_like <- cases[i + 1] * lambda - exp(lambda) + max_log_like <- max(log_like) - loglik <- loglik - const - posterior <- exp(loglik) * prior - posterior <- posterior / sum(posterior) - prior <- posterior + if (max_log_like > 700) { + log_like <- log_like - max_log_like + 700 } - Rhat <- sum(R * posterior) + posterior <- exp(log_like) * prior + posterior <- posterior / sum(posterior) + prior <- posterior + } - return(list(Rhat = Rhat, - posterior = list(supp = R, pmf = posterior), - group = group)) + if (!post) { + return(sum(support * posterior)) } + list(supp = support, pmf = posterior) } diff --git a/R/server.R b/R/server.R index 4cf87db..384b341 100644 --- a/R/server.R +++ b/R/server.R @@ -299,8 +299,8 @@ eval_estimator <- function(input, output, estimator, dataset) { } # Sequential Bayes else if (estimator$method == "seqB") - estimate <- round(seqB(unlist(dataset[3]), mu = serial, - kappa = estimator$kappa)$Rhat, 2) + estimate <- round(seq_bayes(unlist(dataset[3]), mu = serial, + kappa = estimator$kappa), 2) # Incidence Decay else if (estimator$method == "ID") estimate <- round(id(unlist(dataset[3]), mu = serial), 2) diff --git a/man/seq_bayes.Rd b/man/seq_bayes.Rd index 0864294..41445a9 100644 --- a/man/seq_bayes.Rd +++ b/man/seq_bayes.Rd @@ -1,34 +1,41 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seqB.R -\name{seqB} -\alias{seqB} -\title{seqB method} +% Please edit documentation in R/seq_bayes.R +\name{seq_bayes} +\alias{seq_bayes} +\title{Sequential Bayes (seqB)} \usage{ -seqB(NT, mu, kappa = 20) +seq_bayes(cases, mu, kappa = 20, post = FALSE) } \arguments{ -\item{NT}{Vector of case counts.} +\item{cases}{Vector of case counts. The vector must only contain non-negative +integers, and have at least two positive integers.} -\item{mu}{Mean of the serial distribution. This needs to match case counts in -time units. For example, if case counts are weekly and the serial -distribution has a mean of seven days, then \code{mu} should be set -to one. If case counts are daily and the serial distribution has a -mean of seven days, then \code{mu} should be set to seven.} +\item{mu}{Mean of the serial distribution. This must be a positive number. +The value should match the case counts in time units. For example, if case +counts are weekly and the serial distribution has a mean of seven days, +then \code{mu} should be set to \code{1}. If case counts are daily and the serial +distribution has a mean of seven days, then \code{mu} should be set to \code{7}.} -\item{kappa}{Largest possible value of uniform prior (defaults to 20). This -describes the prior belief on ranges of R0, and should be set to -a higher value if R0 is believed to be larger.} +\item{kappa}{Largest possible value of the uniform prior (defaults to \code{20}). +This must be a number greater than or equal to \code{1}. It describes the prior +belief on the ranges of R0, and should be set to a higher value if R0 is +believed to be larger.} + +\item{post}{Whether to return the posterior distribution of R0 instead of the +estimate of R0 (defaults to \code{FALSE}). This must be a value identical to +\code{TRUE} or \code{FALSE}.} } \value{ -\code{seqB} returns a list containing the following components: - \code{Rhat} is the estimate of R0 (the posterior mean), - \code{posterior} is the posterior distribution of R0 from which - alternate estimates can be obtained (see examples), and \code{group} - is an indicator variable (if \code{group == TRUE}, zero values of NT - were input and grouping was done to obtain \code{Rhat}). The variable - \code{posterior} is returned as a list made up of \code{supp} (the - support of the distribution) and \code{pmf} (the probability mass - function). +If \code{post} is identical to \code{TRUE}, a list containing the following +components is returned: +\itemize{ +\item \code{supp} - the support of the posterior distribution of R0 +\item \code{pmf} - the probability mass function of the posterior distribution +} + +Otherwise, if \code{post} is identical to \code{FALSE}, only the estimate of R0 is +returned. Note that the estimate is equal to \code{sum(supp * pmf)} (i.e., the +posterior mean). } \description{ This function implements a sequential Bayesian estimation method of R0 due to @@ -37,15 +44,15 @@ implementation notes. } \details{ The method sets a uniform prior distribution on R0 with possible values -between zero and \code{kappa}, discretized to a fine grid. The distribution -of R0 is then updated sequentially, with one update for each new case count -observation. The final estimate of R0 is \code{Rhat}, the mean of the (last) -posterior distribution. The prior distribution is the initial belief of the +between \code{0} and \code{kappa}, discretized to a fine grid. The distribution of R0 +is then updated sequentially, with one update for each new case count +observation. The final estimate of R0 is the mean of the (last) posterior +distribution. The prior distribution is the initial belief of the distribution of R0, which is the uninformative uniform distribution with -values between zero and \code{kappa}. Users can change the value of -\code{kappa} only (i.e., the prior distribution cannot be changed from the -uniform). As more case counts are observed, the influence of the prior -distribution should lessen on the final estimate \code{Rhat}. +values between \code{0} and \code{kappa}. Users can change the value of \code{kappa} only +(i.e., the prior distribution cannot be changed from the uniform). As more +case counts are observed, the influence of the prior distribution should +lessen on the final estimate. This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes that the mean of @@ -61,31 +68,25 @@ zero counts, no estimate can be provided. } \examples{ # Weekly data. -NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) +cases <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) -## Obtain R0 when the serial distribution has a mean of five days. -res1 <- seqB(NT, mu = 5 / 7) -res1$Rhat +# Obtain R0 when the serial distribution has a mean of five days. +seq_bayes(cases, mu = 5 / 7) -## Obtain R0 when the serial distribution has a mean of three days. -res2 <- seqB(NT, mu = 3 / 7) -res2$Rhat +# Obtain R0 when the serial distribution has a mean of three days. +seq_bayes(cases, mu = 3 / 7) -# Compute posterior mode instead of posterior mean and plot. +# Obtain R0 when the serial distribution has a mean of seven days, and R0 is +# believed to be at most 4. +estimate <- seq_bayes(cases, mu = 1, kappa = 4) -Rpost <- res1$posterior -loc <- which(Rpost$pmf == max(Rpost$pmf)) -Rpost$supp[loc] # Posterior mode. -res1$Rhat # Compare with the posterior mean. - -par(mfrow = c(2, 1), mar = c(2, 2, 1, 1)) - -plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "") -abline(h = 1 / (20 / 0.01 + 1), col = "red") -abline(v = res1$Rhat, col = "blue") -abline(v = Rpost$supp[loc], col = "purple") -legend("topright", - legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"), - col = c("red", "black", "blue", "purple"), lty = 1) +# Same as above, but return the posterior distribution instead of the +# estimate. +posterior <- seq_bayes(cases, mu = 1, kappa = 4, post = TRUE) +# Note that the following always holds: +estimate == sum(posterior$supp * posterior$pmf) +} +\references{ +\href{https://doi.org/10.1371/journal.pone.0002185}{Bettencourt and Riberio (PloS One, 2008)} } -- cgit v1.2.3