From 80df3ed7a280f86a3b9b5443309487d428f4fe95 Mon Sep 17 00:00:00 2001 From: Naeem Model Date: Thu, 29 Jun 2023 23:47:01 +0000 Subject: Re-style code and enforce 80 character line limit --- R/ID.R | 56 ++++++++-------- R/IDEA.R | 72 ++++++++++---------- R/WP.R | 164 ++++++++++++++++++++++++--------------------- R/WP_internal.R | 132 ++++++++++++++++++++----------------- R/seqB.R | 201 ++++++++++++++++++++++++++++---------------------------- 5 files changed, 324 insertions(+), 301 deletions(-) diff --git a/R/ID.R b/R/ID.R index 0e3cc35..7e8a04d 100644 --- a/R/ID.R +++ b/R/ID.R @@ -1,48 +1,46 @@ #' ID method #' -#' This function implements a least squares estimation method of R0 due to Fisman et al. (PloS One, 2013). -#' See details for implementation notes. +#' This function implements a least squares estimation method of R0 due to +#' Fisman et al. (PloS One, 2013). See details for implementation notes. #' -#' The method is based on a straightforward incidence decay model. The estimate of R0 is the value which -#' minimizes the sum of squares between observed case counts and cases counts 'expected' under the model. +#' The method is based on a straightforward incidence decay model. The estimate +#' of R0 is the value which minimizes the sum of squares between observed case +#' counts and cases counts 'expected' under the model. #' -#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. -#' The method assumes that the mean of the serial distribution (sometimes called the serial interval) is known. -#' The final estimate can be quite sensitive to this value, so sensitivity testing is strongly recommended. -#' Users should be careful about units of time (e.g., are counts observed daily or weekly?) when implementing. +#' This method is based on an approximation of the SIR model, which is most +#' valid at the beginning of an epidemic. The method assumes that the mean of +#' the serial distribution (sometimes called the serial interval) is known. The +#' final estimate can be quite sensitive to this value, so sensitivity testing +#' is strongly recommended. Users should be careful about units of time (e.g., +#' are counts observed daily or weekly?) when implementing. #' #' @param NT Vector of case counts. -#' @param mu Mean of the serial distribution. This needs to match case counts in time units. For example, if case counts -#' are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one If case -#' counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven. +#' @param mu Mean of the serial distribution. This needs to match case counts +#' in time units. For example, if case counts are weekly and the +#' serial distribution has a mean of seven days, then \code{mu} should +#' be set to one. If case counts are daily and the serial distribution +#' has a mean of seven days, then \code{mu} should be set to seven. #' #' @return \code{ID} returns a single value, the estimate of R0. #' #' @examples -#' ## ===================================================== ## -#' ## Illustrate on weekly data ## -#' ## ===================================================== ## -#' +#' # Weekly data: #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) -#' ## obtain Rhat when serial distribution has mean of five days -#' ID(NT=NT, mu=5/7) -#' ## obtain Rhat when serial distribution has mean of three days -#' ID(NT=NT, mu=3/7) #' -#' ## ========================================================= ## -#' ## Compute Rhat using only the first five weeks of data ## -#' ## ========================================================= ## +#' # Obtain R0 when the serial distribution has a mean of five days. +#' ID(NT, mu = 5 / 7) #' -#' ID(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days +#' # Obtain R0 when the serial distribution has a mean of three days. +#' ID(NT, mu = 3 / 7) #' #' @export ID <- function(NT, mu) { - NT <- as.numeric(NT) - TT <- length(NT) - s <- (1:TT) / mu - y <- log(NT) / s + NT <- as.numeric(NT) + TT <- length(NT) + s <- (1:TT) / mu + y <- log(NT) / s - R0_ID <- exp(sum(y) / TT) + R0_ID <- exp(sum(y) / TT) - return(R0_ID) + return(R0_ID) } diff --git a/R/IDEA.R b/R/IDEA.R index 854acd7..53fa653 100644 --- a/R/IDEA.R +++ b/R/IDEA.R @@ -1,58 +1,56 @@ #' IDEA method #' -#' This function implements a least squares estimation method of R0 due to Fisman et al. (PloS One, 2013). -#' See details for implementation notes. +#' This function implements a least squares estimation method of R0 due to +#' Fisman et al. (PloS One, 2013). See details for implementation notes. #' -#' This method is closely related to that implemented in \code{ID}. The method is based on an incidence decay model. -#' The estimate of R0 is the value which minimizes the sum of squares between observed case counts and cases counts +#' This method is closely related to that implemented in \code{ID}. The method +#' is based on an incidence decay model. The estimate of R0 is the value which +#' minimizes the sum of squares between observed case counts and cases counts #' expected under the model. #' -#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. -#' The method assumes that the mean of the serial distribution (sometimes called the serial interval) is known. -#' The final estimate can be quite sensitive to this value, so sensitivity testing is strongly recommended. -#' Users should be careful about units of time (e.g., are counts observed daily or weekly?) when implementing. +#' This method is based on an approximation of the SIR model, which is most +#' valid at the beginning of an epidemic. The method assumes that the mean of +#' the serial distribution (sometimes called the serial interval) is known. The +#' final estimate can be quite sensitive to this value, so sensitivity testing +#' is strongly recommended. Users should be careful about units of time (e.g., +#' are counts observed daily or weekly?) when implementing. #' #' @param NT Vector of case counts. -#' @param mu Mean of the serial distribution. This needs to match case counts in time units. For example, if case counts -#' are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one. If case -#' counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven. +#' @param mu Mean of the serial distribution. This needs to match case counts in +#' time units. For example, if case counts are weekly and the serial +#' distribution has a mean of seven days, then \code{mu} should be set +#' to one. If case counts are daily and the serial distribution has a +#' mean of seven days, then \code{mu} should be set to seven. #' #' @return \code{IDEA} returns a single value, the estimate of R0. #' #' @examples -#' ## ===================================================== ## -#' ## Illustrate on weekly data ## -#' ## ===================================================== ## -#' +#' # Weekly data. #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) -#' ## obtain Rhat when serial distribution has mean of five days -#' IDEA(NT=NT, mu=5/7) -#' ## obtain Rhat when serial distribution has mean of three days -#' IDEA(NT=NT, mu=3/7) #' -#' ## ========================================================= ## -#' ## Compute Rhat using only the first five weeks of data ## -#' ## ========================================================= ## +#' # Obtain R0 when the serial distribution has a mean of five days. +#' IDEA(NT, mu = 5 / 7) #' -#' IDEA(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days +#' # Obtain R0 when the serial distribution has a mean of three days. +#' IDEA(NT, mu = 3 / 7) #' #' @export IDEA <- function(NT, mu) { - if (length(NT) < 2) - print("Warning: length of NT should be at least two.") - else { - NT <- as.numeric(NT) - TT <- length(NT) - s <- (1:TT) / mu + if (length(NT) < 2) + print("Warning: length of NT should be at least two.") + else { + NT <- as.numeric(NT) + TT <- length(NT) + s <- (1:TT) / mu - y1 <- log(NT) / s - y2 <- s^2 - y3 <- log(NT) + y1 <- log(NT) / s + y2 <- s^2 + y3 <- log(NT) - IDEA1 <- sum(y2) * sum(y1) - sum(s) * sum(y3) - IDEA2 <- TT * sum(y2) - sum(s)^2 - IDEA <- exp(IDEA1 / IDEA2) + IDEA1 <- sum(y2) * sum(y1) - sum(s) * sum(y3) + IDEA2 <- TT * sum(y2) - sum(s)^2 + IDEA <- exp(IDEA1 / IDEA2) - return(IDEA) - } + return(IDEA) + } } diff --git a/R/WP.R b/R/WP.R index c8178b7..a6e5354 100644 --- a/R/WP.R +++ b/R/WP.R @@ -1,98 +1,112 @@ #' WP method #' -#' This function implements an R0 estimation due to White and Pagano (Statistics in Medicine, 2008). -#' The method is based on maximum likelihood estimation in a Poisson transmission model. -#' See details for important implementation notes. +#' This function implements an R0 estimation due to White and Pagano (Statistics +#' in Medicine, 2008). The method is based on maximum likelihood estimation in a +#' Poisson transmission model. See details for important implementation notes. #' -#' This method is based on a Poisson transmission model, and hence may be most most valid at the beginning -#' of an epidemic. In their model, the serial distribution is assumed to be discrete with a finite number -#' of posible values. In this implementation, if \code{mu} is not {NA}, the serial distribution is taken to -#' be a discretized version of a gamma distribution with mean \code{mu}, shape parameter one, and largest -#' possible value based on parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a -#' grid search algorithm to find the maximum likelihood estimator over all possible gamma distributions -#' with unknown mean and variance, restricting these to a prespecified grid (see \code{search} parameter). +#' This method is based on a Poisson transmission model, and hence may be most +#' most valid at the beginning of an epidemic. In their model, the serial +#' distribution is assumed to be discrete with a finite number of posible +#' values. In this implementation, if \code{mu} is not {NA}, the serial +#' distribution is taken to be a discretized version of a gamma distribution +#' with mean \code{mu}, shape parameter one, and largest possible value based on +#' parameter \code{tol}. When \code{mu} is \code{NA}, the function implements a +#' grid search algorithm to find the maximum likelihood estimator over all +#' possible gamma distributions with unknown mean and variance, restricting +#' these to a prespecified grid (see \code{search} parameter). #' -#' When the serial distribution is known (i.e., \code{mu} is not \code{NA}), sensitivity testing of \code{mu} -#' is strongly recommended. If the serial distribution is unknown (i.e., \code{mu} is \code{NA}), the -#' likelihood function can be flat near the maximum, resulting in numerical instability of the optimizer. -#' When \code{mu} is \code{NA}, the implementation takes considerably longer to run. Users should be careful -#' about units of time (e.g., are counts observed daily or weekly?) when implementing. +#' When the serial distribution is known (i.e., \code{mu} is not \code{NA}), +#' sensitivity testing of \code{mu} is strongly recommended. If the serial +#' distribution is unknown (i.e., \code{mu} is \code{NA}), the likelihood +#' function can be flat near the maximum, resulting in numerical instability of +#' the optimizer. When \code{mu} is \code{NA}, the implementation takes +#' considerably longer to run. Users should be careful about units of time +#' (e.g., are counts observed daily or weekly?) when implementing. #' -#' The model developed in White and Pagano (2008) is discrete, and hence the serial distribution is finite -#' discrete. In our implementation, the input value \code{mu} is that of a continuous distribution. The -#' algorithm discretizes this input when \code{mu} is not \code{NA}, and hence the mean of the serial -#' distribution returned in the list \code{SD} will differ from \code{mu} somewhat. That is to say, if the -#' user notices that the input \code{mu} and output mean of \code{SD} are different, this is to be expected, +#' The model developed in White and Pagano (2008) is discrete, and hence the +#' serial distribution is finite discrete. In our implementation, the input +#' value \code{mu} is that of a continuous distribution. The algorithm +#' discretizes this input when \code{mu} is not \code{NA}, and hence the mean of +#' the serial distribution returned in the list \code{SD} will differ from +#' \code{mu} somewhat. That is to say, if the user notices that the input +#' \code{mu} and output mean of \code{SD} are different, this is to be expected, #' and is caused by the discretization. #' #' @param NT Vector of case counts. -#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case -#' counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be -#' set to one). The default value of \code{mu} is set to \code{NA}. -#' @param search List of default values for the grid search algorithm. The list includes three elements: the -#' first is \code{B}, which is the length of the grid in one dimension; the second is -#' \code{scale.max}, which is the largest possible value of the scale parameter; and the third -#' is \code{shape.max}, which is the largest possible value of the shape parameter. Defaults to -#' \code{B=100, scale.max=10, shape.max=10}. For both shape and scale, the smallest possible -#' value is 1/\code{B}. -#' @param tol Cutoff value for cumulative distribution function of the pre-discretization gamma serial -#' distribution. Defaults to 0.999 (i.e. in the discretization, the maximum is chosen such that the -#' original gamma distribution has cumulative probability of no more than 0.999 at this maximum). +#' @param mu Mean of the serial distribution (needs to match case counts in time +#' units; for example, if case counts are weekly and the serial +#' distribution has a mean of seven days, then \code{mu} should be set +#' to one). The default value of \code{mu} is set to \code{NA}. +#' @param search List of default values for the grid search algorithm. The list +#' includes three elements: the first is \code{B}, which is the +#' length of the grid in one dimension; the second is +#' \code{scale.max}, which is the largest possible value of the +#' scale parameter; and the third is \code{shape.max}, which is +#' the largest possible value of the shape parameter. Defaults to +#' \code{B=100, scale.max=10, shape.max=10}. For both shape and +#' scale, the smallest possible value is 1/\code{B}. +#' @param tol Cutoff value for cumulative distribution function of the +#' pre-discretization gamma serial distribution. Defaults to 0.999 +#' (i.e. in the discretization, the maximum is chosen such that the +#' original gamma distribution has cumulative probability of no more +#' than 0.999 at this maximum). #' -#' @return \code{WP} returns a list containing the following components: \code{Rhat} is the estimate of R0, -#' and \code{SD} is either the discretized serial distribution (if \code{mu} is not \code{NA}), or the -#' estimated discretized serial distribution (if \code{mu} is \code{NA}). The list also returns the -#' variable \code{check}, which is equal to the number of non-unique maximum likelihood estimators. -#' The serial distribution \code{SD} is returned as a list made up of \code{supp} (the support of -#' the distribution) and \code{pmf} (the probability mass function). +#' @return \code{WP} returns a list containing the following components: +#' \code{Rhat} is the estimate of R0, and \code{SD} is either the +#' discretized serial distribution (if \code{mu} is not \code{NA}), or +#' the estimated discretized serial distribution (if \code{mu} is +#' \code{NA}). The list also returns the variable \code{check}, which is +#' equal to the number of non-unique maximum likelihood estimators. The +#' serial distribution \code{SD} is returned as a list made up of +#' \code{supp} (the support of the distribution) and \code{pmf} (the +#' probability mass function). #' #' @examples -#' ## ===================================================== ## -#' ## Illustrate on weekly data ## -#' ## ===================================================== ## -#' +#' # Weekly data. #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) -#' ## obtain Rhat when serial distribution has mean of five days -#' res1 <- WP(NT=NT, mu=5/7) +#' +#' # Obtain R0 when the serial distribution has a mean of five days. +#' res1 <- WP(NT, mu = 5 / 7) #' res1$Rhat -#' ## obtain Rhat when serial distribution has mean of three days -#' res2 <- WP(NT=NT, mu=3/7) +#' +#' # Obtain R0 when the serial distribution has a mean of three days. +#' res2 <- WP(NT, mu = 3 / 7) #' res2$Rhat -#' ## obtain Rhat when serial distribution is unknown -#' ## NOTE: this implementation will take longer to run -#' res3 <- WP(NT=NT) +#' +#' # Obtain R0 when the serial distribution is unknown. +#' # NOTE: This implementation will take longer to run. +#' res3 <- WP(NT) #' res3$Rhat -#' ## find mean of estimated serial distribution +#' +#' # Find the mean of the estimated serial distribution. #' serial <- res3$SD #' sum(serial$supp * serial$pmf) #' -#' ## ========================================================= ## -#' ## Compute Rhat using only the first five weeks of data ## -#' ## ========================================================= ## -#' -#' res4 <- WP(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days -#' res4$Rhat -#' #' @importFrom stats pexp qexp #' #' @export -WP <- function(NT, mu=NA, search=list(B=100, shape.max=10, scale.max=10), tol=0.999) { - if (is.na(mu)) { - print("You have assumed that the serial distribution is unknown.") - res <- WP_unknown(NT=NT, B=search$B, shape.max=search$shape.max, scale.max=search$scale.max, tol=tol) - Rhat <- res$Rhat - p <- res$p - range.max <- res$range.max - JJ <- res$JJ - } else { - print("You have assumed that the serial distribution is known.") - range.max <- ceiling(qexp(tol, rate=1/mu)) - p <- diff(pexp(0:range.max, 1/mu)) - p <- p / sum(p) - res <- WP_known(NT=NT, p=p) - Rhat <- res - JJ <- NA - } +WP <- function(NT, mu = NA, + search = list(B = 100, shape.max = 10, scale.max = 10), + tol = 0.999) { + if (is.na(mu)) { + print("You have assumed that the serial distribution is unknown.") + res <- WP_unknown(NT, B = search$B, shape.max = search$shape.max, + scale.max = search$scale.max, tol = tol) + Rhat <- res$Rhat + p <- res$p + range.max <- res$range.max + JJ <- res$JJ + } else { + print("You have assumed that the serial distribution is known.") + range.max <- ceiling(qexp(tol, rate = 1 / mu)) + p <- diff(pexp(0:range.max, 1 / mu)) + p <- p / sum(p) + res <- WP_known(NT = NT, p = p) + Rhat <- res + JJ <- NA + } - return(list(Rhat=Rhat, check=length(JJ), SD=list(supp=1:range.max, pmf=p))) + return(list(Rhat = Rhat, + check = length(JJ), + SD = list(supp = 1:range.max, pmf = p))) } diff --git a/R/WP_internal.R b/R/WP_internal.R index 54744b9..420d0c0 100644 --- a/R/WP_internal.R +++ b/R/WP_internal.R @@ -1,7 +1,8 @@ #' WP method background function WP_known #' -#' This is a background/internal function called by \code{WP}. It computes the maximum -#' likelihood estimator of R0 assuming that the serial distribution is known and finite discrete. +#' This is a background/internal function called by \code{WP}. It computes the +#' maximum likelihood estimator of R0 assuming that the serial distribution is +#' known and finite discrete. #' #' @param NT Vector of case counts. #' @param p Discretized version of the serial distribution. @@ -10,97 +11,106 @@ #' #' @keywords internal WP_known <- function(NT, p) { - k <- length(p) - TT <- length(NT) - 1 - mu_t <- rep(0, TT) + k <- length(p) + TT <- length(NT) - 1 + mu_t <- rep(0, TT) - for (i in 1:TT) { - Nt <- NT[i:max(1, i-k+1)] - mu_t[i] <- sum(p[1:min(k, i)] * Nt) - } + for (i in 1:TT) { + Nt <- NT[i:max(1, i - k + 1)] + mu_t[i] <- sum(p[1:min(k, i)] * Nt) + } - Rhat <- sum(NT[-1]) / sum(mu_t) - return(Rhat) + Rhat <- sum(NT[-1]) / sum(mu_t) + return(Rhat) } #' WP method background function WP_unknown #' -#' This is a background/internal function called by \code{WP}. It computes the maximum likelihood estimator -#' of R0 assuming that the serial distribution is unknown but comes from a discretized gamma distribution. -#' The function then implements a simple grid search algorithm to obtain the maximum likelihood estimator -#' of R0 as well as the gamma parameters. +#' This is a background/internal function called by \code{WP}. It computes the +#' maximum likelihood estimator of R0 assuming that the serial distribution is +#' unknown but comes from a discretized gamma distribution. The function then +#' implements a simple grid search algorithm to obtain the maximum likelihood +#' estimator of R0 as well as the gamma parameters. #' #' @param NT Vector of case counts. #' @param B Length of grid for shape and scale (grid search parameter). #' @param shape.max Maximum shape value (grid \code{search} parameter). #' @param scale.max Maximum scale value (grid \code{search} parameter). -#' @param tol cutoff value for cumulative distribution function of the serial distribution (defaults to 0.999). +#' @param tol cutoff value for cumulative distribution function of the serial +#' distribution (defaults to 0.999). #' -#' @return The function returns \code{Rhat}, the maximum likelihood estimator of R0, as well as the maximum -#' likelihood estimator of the discretized serial distribution given by \code{p} (the probability mass -#' function) and \code{range.max} (the distribution has support on the integers one to \code{range.max}). -#' The function also returns \code{resLL} (all values of the log-likelihood) at \code{shape} (grid for -#' shape parameter) and at \code{scale} (grid for scale parameter), as well as \code{resR0} (the full -#' vector of maximum likelihood estimators), \code{JJ} (the locations for the likelihood for these), and -#' \code{J0} (the location for the maximum likelihood estimator \code{Rhat}). If \code{JJ} and \code{J0} -#' are not the same, this means that the maximum likelihood estimator is not unique. +#' @return The function returns \code{Rhat}, the maximum likelihood estimator of +#' R0, as well as the maximum likelihood estimator of the discretized +#' serial distribution given by \code{p} (the probability mass function) +#' and \code{range.max} (the distribution has support on the integers +#' one to \code{range.max}). The function also returns \code{resLL} (all +#' values of the log-likelihood) at \code{shape} (grid for shape +#' parameter) and at \code{scale} (grid for scale parameter), as well as +#' \code{resR0} (the full vector of maximum likelihood estimators), +#' \code{JJ} (the locations for the likelihood for these), and \code{J0} +#' (the location for the maximum likelihood estimator \code{Rhat}). If +#' \code{JJ} and \code{J0} are not the same, this means that the maximum +#' likelihood estimator is not unique. #' #' @importFrom stats pgamma qgamma #' #' @keywords internal -WP_unknown <- function(NT, B=100, shape.max=10, scale.max=10, tol=0.999) { - shape <- seq(0, shape.max, length.out=B+1) - scale <- seq(0, scale.max, length.out=B+1) - shape <- shape[-1] - scale <- scale[-1] +WP_unknown <- function(NT, B = 100, shape.max = 10, scale.max = 10, + tol = 0.999) { + shape <- seq(0, shape.max, length.out = B + 1) + scale <- seq(0, scale.max, length.out = B + 1) + shape <- shape[-1] + scale <- scale[-1] - resLL <- matrix(0,B,B) - resR0 <- matrix(0,B,B) + resLL <- matrix(0, B, B) + resR0 <- matrix(0, B, B) - for (i in 1:B) { - for (j in 1:B) { - range.max <- ceiling(qgamma(tol, shape=shape[i], scale=scale[j])) - p <- diff(pgamma(0:range.max, shape=shape[i], scale=scale[j])) - p <- p / sum(p) - mle <- WP_known(NT, p) - resLL[i,j] <- computeLL(p, NT, mle) - resR0[i,j] <- mle - } + for (i in 1:B) + for (j in 1:B) { + range.max <- ceiling(qgamma(tol, shape = shape[i], scale = scale[j])) + p <- diff(pgamma(0:range.max, shape = shape[i], scale = scale[j])) + p <- p / sum(p) + mle <- WP_known(NT, p) + resLL[i, j] <- computeLL(p, NT, mle) + resR0[i, j] <- mle } - - J0 <- which.max(resLL) - R0hat <- resR0[J0] - JJ <- which(resLL == resLL[J0], arr.ind=TRUE) - range.max <- ceiling(qgamma(tol, shape=shape[JJ[1]], scale=scale[JJ[2]])) - p <- diff(pgamma(0:range.max, shape=shape[JJ[1]], scale=scale[JJ[2]])) - p <- p / sum(p) - - return(list(Rhat=R0hat, J0=J0, ll=resLL, Rs=resR0, scale=scale, shape=shape, JJ=JJ, p=p, range.max=range.max)) + + J0 <- which.max(resLL) + R0hat <- resR0[J0] + JJ <- which(resLL == resLL[J0], arr.ind = TRUE) + range.max <- ceiling(qgamma(tol, shape = shape[JJ[1]], scale = scale[JJ[2]])) + p <- diff(pgamma(0:range.max, shape = shape[JJ[1]], scale = scale[JJ[2]])) + p <- p / sum(p) + + return(list(Rhat = R0hat, J0 = J0, ll = resLL, Rs = resR0, scale = scale, + shape = shape, JJ = JJ, p = p, range.max = range.max)) } #' WP method background function computeLL #' -#' This is a background/internal function called by \code{WP}. It computes the log-likelihood. +#' This is a background/internal function called by \code{WP}. It computes the +#' log-likelihood. #' #' @param NT Vector of case counts. #' @param p Discretized version of the serial distribution. #' @param R0 Basic reproductive ratio. #' -#' @return This function returns the log-likelihood at the input variables and parameters. +#' @return This function returns the log-likelihood at the input variables and +#' parameters. #' #' @keywords internal computeLL <- function(p, NT, R0) { - k <- length(p) - TT <- length(NT) - 1 - mu_t <- rep(0, TT) + k <- length(p) + TT <- length(NT) - 1 + mu_t <- rep(0, TT) - for (i in 1:TT) { - Nt <- NT[i:max(1, i-k+1)] - mu_t[i] <- sum(p[1:min(k, i)] * Nt) - } + for (i in 1:TT) { + Nt <- NT[i:max(1, i - k + 1)] + mu_t[i] <- sum(p[1:min(k, i)] * Nt) + } - mu_t <- R0 * mu_t - LL <- sum(NT[-1] * log(mu_t)) - sum(mu_t) + mu_t <- R0 * mu_t + LL <- sum(NT[-1] * log(mu_t)) - sum(mu_t) - return(LL) + return(LL) } diff --git a/R/seqB.R b/R/seqB.R index 8685f39..e51117f 100644 --- a/R/seqB.R +++ b/R/seqB.R @@ -1,126 +1,129 @@ #' seqB method #' -#' This function implements a sequential Bayesian estimation method of R0 due to Bettencourt and Riberio (PloS One, 2008). -#' See details for important implementation notes. +#' This function implements a sequential Bayesian estimation method of R0 due to +#' Bettencourt and Riberio (PloS One, 2008). See details for important +#' implementation notes. #' -#' The method sets a uniform prior distribution on R0 with possible values between zero and \code{kappa}, discretized to a fine grid. -#' The distribution of R0 is then updated sequentially, with one update for each new case count observation. -#' The final estimate of R0 is \code{Rhat}, the mean of the (last) posterior distribution. -#' The prior distribution is the initial belief of the distribution of R0; which in this implementation is the uninformative uniform -#' distribution with values between zero and \code{kappa}. Users can change the value of /code{kappa} only (i.e., the prior distribution -#' cannot be changed from the uniform). As more case counts are observed, the influence of the prior distribution should lessen on -#' the final estimate \code{Rhat}. +#' The method sets a uniform prior distribution on R0 with possible values +#' between zero and \code{kappa}, discretized to a fine grid. The distribution +#' of R0 is then updated sequentially, with one update for each new case count +#' observation. The final estimate of R0 is \code{Rhat}, the mean of the (last) +#' posterior distribution. The prior distribution is the initial belief of the +#' distribution of R0, which is the uninformative uniform distribution with +#' values between zero and \code{kappa}. Users can change the value of +#' /code{kappa} only (i.e., the prior distribution cannot be changed from the +#' uniform). As more case counts are observed, the influence of the prior +#' distribution should lessen on the final estimate \code{Rhat}. #' -#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes -#' that the mean of the serial distribution (sometimes called the serial interval) is known. The final estimate can be quite sensitive -#' to this value, so sensitivity testing is strongly recommended. Users should be careful about units of time (e.g., are counts observed -#' daily or weekly?) when implementing. +#' This method is based on an approximation of the SIR model, which is most +#' valid at the beginning of an epidemic. The method assumes that the mean of +#' the serial distribution (sometimes called the serial interval) is known. The +#' final estimate can be quite sensitive to this value, so sensitivity testing +#' is strongly recommended. Users should be careful about units of time (e.g., +#' are counts observed daily or weekly?) when implementing. #' -#' Our code has been modified to provide an estimate even if case counts equal to zero are present in some time intervals. This is done -#' by grouping the counts over such periods of time. Without grouping, and in the presence of zero counts, no estimate can be provided. +#' Our code has been modified to provide an estimate even if case counts equal +#' to zero are present in some time intervals. This is done by grouping the +#' counts over such periods of time. Without grouping, and in the presence of +#' zero counts, no estimate can be provided. #' #' @param NT Vector of case counts. -#' @param mu Mean of the serial distribution. This needs to match case counts in time units. For example, if case counts -#' are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one. If case -#' counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven. -#' @param kappa Largest possible value of uniform prior (defaults to 20). This describes the prior belief on ranges of R0, -#' and should be set to a higher value if R0 is believed to be larger. +#' @param mu Mean of the serial distribution. This needs to match case counts in +#' time units. For example, if case counts are weekly and the serial +#' distribution has a mean of seven days, then \code{mu} should be set +#' to one. If case counts are daily and the serial distribution has a +#' mean of seven days, then \code{mu} should be set to seven. +#' @param kappa Largest possible value of uniform prior (defaults to 20). This +#' describes the prior belief on ranges of R0, and should be set to +#' a higher value if R0 is believed to be larger. #' -#' @return \code{secB} returns a list containing the following components: \code{Rhat} is the estimate of R0 (the posterior mean), -#' \code{posterior} is the posterior distribution of R0 from which alternate estimates can be obtained (see examples), -#' and \code{group} is an indicator variable (if \code{group=TRUE}, zero values of NT were input and grouping was done -#' to obtain \code{Rhat}). The variable \code{posterior} is returned as a list made up of \code{supp} (the support of -#' the distribution) and \code{pmf} (the probability mass function). +#' @return \code{seqB} returns a list containing the following components: +#' \code{Rhat} is the estimate of R0 (the posterior mean), +#' \code{posterior} is the posterior distribution of R0 from which +#' alternate estimates can be obtained (see examples), and \code{group} +#' is an indicator variable (if \code{group=TRUE}, zero values of NT +#' were input and grouping was done to obtain \code{Rhat}). The variable +#' \code{posterior} is returned as a list made up of \code{supp} (the +#' support of the distribution) and \code{pmf} (the probability mass +#' function). #' #' @examples -#' ## ===================================================== ## -#' ## Illustrate on weekly data ## -#' ## ===================================================== ## -#' +#' # Weekly data. #' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) -#' ## obtain Rhat when serial distribution has mean of five days -#' res1 <- seqB(NT=NT, mu=5/7) +#' +#' ## Obtain R0 when the serial distribution has a mean of five days. +#' res1 <- seqB(NT, mu = 5 / 7) #' res1$Rhat -#' ## obtain Rhat when serial distribution has mean of three days -#' res2 <- seqB(NT=NT, mu=3/7) +#' +#' ## Obtain R0 when the serial distribution has a mean of three days. +#' res2 <- seqB(NT, mu = 3 / 7) #' res2$Rhat #' -#' ## ============================================================= ## -#' ## Compute posterior mode instead of posterior mean and plot ## -#' ## ============================================================= ## +#' # Compute posterior mode instead of posterior mean and plot. #' -#' Rpost <- res1$posterior +#' Rpost <- res1$posterior #' loc <- which(Rpost$pmf == max(Rpost$pmf)) -#' Rpost$supp[loc] # posterior mode -#' res1$Rhat # compare with posterior mean +#' Rpost$supp[loc] # Posterior mode. +#' res1$Rhat # Compare with the posterior mean. #' -#' par(mfrow=c(2, 1), mar=c(2, 2, 1, 1)) -#' plot(Rpost$supp, Rpost$pmf, col="black", type="l", xlab="", ylab="") -#' abline(h=1/(20/0.01+1), col="red") -#' abline(v=res1$Rhat, col="blue") -#' abline(v=Rpost$supp[loc], col="purple") -#' legend("topright", legend=c("prior", "posterior", "posterior mean (Rhat)", "posterior mode"), -#' col=c("red", "black", "blue", "purple"), lty=1) -#' plot(Rpost$supp, Rpost$pmf, col="black", type="l", xlim=c(0.5, 1.5), xlab="", ylab="") -#' abline(h=1/(20/0.01+1), col="red") -#' abline(v=res1$Rhat, col="blue") -#' abline(v=Rpost$supp[loc], col="purple") -#' legend("topright", legend=c("prior", "posterior", "posterior mean (Rhat)", "posterior mode"), -#' col=c("red", "black", "blue", "purple"), lty=1) +#' par(mfrow = c(2, 1), mar = c(2, 2, 1, 1)) #' -#' ## ========================================================= ## -#' ## Compute Rhat using only the first five weeks of data ## -#' ## ========================================================= ## -#' -#' res3 <- seqB(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days -#' res3$Rhat +#' plot(Rpost$supp, Rpost$pmf, col = "black", type = "l", xlab = "", ylab = "") +#' abline(h = 1 / (20 / 0.01 + 1), col = "red") +#' abline(v = res1$Rhat, col = "blue") +#' abline(v = Rpost$supp[loc], col = "purple") +#' legend("topright", +#' legend = c("Prior", "Posterior", "Posterior mean", "Posterior mode"), +#' col = c("red", "black", "blue", "purple"), lty = 1) #' #' @export -seqB <- function(NT, mu, kappa=20) { - if (length(NT) < 2) - print("Warning: length of NT should be at least two.") - else { - if (min(NT) > 0) { - times <- 1:length(NT) - tau <- diff(times) - } - group <- FALSE - if (min(NT) == 0) { - times <- which(NT > 0) - NT <- NT[times] - tau <- diff(times) - group <- TRUE - } +seqB <- function(NT, mu, kappa = 20) { + if (length(NT) < 2) { + print("Warning: length of NT should be at least two.") + } else { + if (min(NT) > 0) { + times <- 1:length(NT) + tau <- diff(times) + } + group <- FALSE + if (min(NT) == 0) { + times <- which(NT > 0) + NT <- NT[times] + tau <- diff(times) + group <- TRUE + } - R <- seq(0, kappa, 0.01) - prior0 <- rep(1, kappa / 0.01 + 1) - prior0 <- prior0 / sum(prior0) - k <- length(NT) - 1 - R0.post <- matrix(0, nrow=k, ncol=length(R)) - prior <- prior0 - posterior <- seq(0, length(prior0)) - gamma <- 1 / mu + R <- seq(0, kappa, 0.01) + prior0 <- rep(1, kappa / 0.01 + 1) + prior0 <- prior0 / sum(prior0) + k <- length(NT) - 1 + R0.post <- matrix(0, nrow = k, ncol = length(R)) + prior <- prior0 + posterior <- seq(0, length(prior0)) + gamma <- 1 / mu - for (i in 1:k) { - mm1 <- NT[i] - mm2 <- NT[i+1] - lambda <- tau[i] * gamma * (R - 1) - lambda <- log(mm1) + lambda - loglik <- mm2 * lambda - exp(lambda) - maxll <- max(loglik) - const <- 0 + for (i in 1:k) { + mm1 <- NT[i] + mm2 <- NT[i + 1] + lambda <- tau[i] * gamma * (R - 1) + lambda <- log(mm1) + lambda + loglik <- mm2 * lambda - exp(lambda) + maxll <- max(loglik) + const <- 0 - if (maxll > 700) - const <- maxll - 700 + if (maxll > 700) + const <- maxll - 700 - loglik <- loglik-const - posterior <- exp(loglik) * prior - posterior <- posterior / sum(posterior) - prior <- posterior - } + loglik <- loglik - const + posterior <- exp(loglik) * prior + posterior <- posterior / sum(posterior) + prior <- posterior + } - Rhat <- sum(R * posterior) + Rhat <- sum(R * posterior) - return(list(Rhat=Rhat, posterior=list(supp=R, pmf=posterior), group=group)) - } + return(list(Rhat = Rhat, + posterior = list(supp = R, pmf = posterior), + group = group)) + } } -- cgit v1.2.3