From 9c1a5668803e735f034700c55028ffc0146f1e93 Mon Sep 17 00:00:00 2001 From: Naeem Model Date: Wed, 21 Jun 2023 05:38:42 +0000 Subject: Fix code formatting and remove unnecessary comments --- R/ID.R | 47 ++++++++++--------- R/IDEA.R | 69 ++++++++++++++-------------- R/WP.R | 118 +++++++++++++++++++++++++++-------------------- R/WP_known.R | 30 ++++++------ R/WP_unknown.R | 71 +++++++++++++++------------- R/computeLL.R | 31 ++++++------- R/seqB.R | 143 ++++++++++++++++++++++++++++----------------------------- 7 files changed, 263 insertions(+), 246 deletions(-) diff --git a/R/ID.R b/R/ID.R index 679b442..7b0a1b9 100644 --- a/R/ID.R +++ b/R/ID.R @@ -1,49 +1,54 @@ #' ID method #' -#' This function implements a least squares estimation method of R0 due to Fisman et al. (PloS One, 2013). See details for implementation notes. +#' This function implements a least squares estimation method of R0 due to Fisman et al. (PloS One, 2013). +#' See details for implementation notes. #' -#'The method is based on a straightforward incidence decay model. The estimate of R0 is the value which minimizes the sum of squares between observed case counts and cases counts 'expected' under the model. +#' The method is based on a straightforward incidence decay model. The estimate of R0 is the value which +#' minimizes the sum of squares between observed case counts and cases counts 'expected' under the model. #' -#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes that the mean of the serial distribution (sometimes called the serial interval) is known. The final estimate can be quite sensitive to this value, so sensitivity testing is strongly recommended. Users should be careful about units of time (e.g. are counts observed daily or weekly?) when implementing. +#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. +#' The method assumes that the mean of the serial distribution (sometimes called the serial interval) is known. +#' The final estimate can be quite sensitive to this value, so sensitivity testing is strongly recommended. +#' Users should be careful about units of time (e.g. are counts observed daily or weekly?) when implementing. #' #' @param NT Vector of case counts -#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one, if case counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven). +#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are +#' weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one, if case +#' counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven) #' -#' @return \code{ID} returns a list containing the following components: \code{Rhat} is the estimate of R0 and \code{inputs} is a list of the original input variables \code{NT, mu}. +#' @return \code{ID} returns a list containing the following components: \code{Rhat} is the estimate of R0 and +#' \code{inputs} is a list of the original input variables \code{NT, mu}. #' #' @examples -#' +#' #' ## ===================================================== ## #' ## Illustrate on weekly data ## #' ## ===================================================== ## #' -#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) +#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) #' ## obtain Rhat when serial distribution has mean of five days -#' res1 <- ID(NT=NT, mu=5/7) +#' res1 <- ID(NT=NT, mu=5/7) #' res1$Rhat #' ## obtain Rhat when serial distribution has mean of three days -#' res2 <- ID(NT=NT, mu=3/7) +#' res2 <- ID(NT=NT, mu=3/7) #' res2$Rhat #' #' ## ========================================================= ## #' ## Compute Rhat using only the first five weeks of data ## #' ## ========================================================= ## #' -#' +#' #' res3 <- ID(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days #' res3$Rhat -#' @export #' +#' @export +ID <- function(NT, mu) { + NT <- as.numeric(NT) + TT <- length(NT) + s <- (1:TT) / mu + y <- log(NT) / s + R0_ID <- exp(sum(y) / TT) -ID <- function(NT, mu){ - - NT <- as.numeric(NT) - TT <- length(NT) - s <- (1:TT)/mu - y <- log(NT)/s - - R0_ID <- exp(sum(y)/TT) - - return(list=c(Rhat=R0_ID, inputs=list(NT=NT, mu=mu))) + return(list=c(Rhat=R0_ID, inputs=list(NT=NT, mu=mu))) } diff --git a/R/IDEA.R b/R/IDEA.R index fdc83dc..7668540 100644 --- a/R/IDEA.R +++ b/R/IDEA.R @@ -1,63 +1,64 @@ #' IDEA method #' -#' This function implements a least squares estimation method of R0 due to Fisman et al. (PloS One, 2013). See details for implementation notes. +#' This function implements a least squares estimation method of R0 due to Fisman et al. (PloS One, 2013). +#' See details for implementation notes. #' -#'This method is closely related to that implemented in \code{ID}. The method is based on an incidence decay model. The estimate of R0 is the value which minimizes the sum of squares between observed case counts and cases counts 'expected' under the model. +#' This method is closely related to that implemented in \code{ID}. The method is based on an incidence decay model. +#' The estimate of R0 is the value which minimizes the sum of squares between observed case counts and cases counts +#' expected under the model. #' -#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes that the mean of the serial distribution (sometimes called the serial interval) is known. The final estimate can be quite sensitive to this value, so sensitivity testing is strongly recommended. Users should be careful about units of time (e.g. are counts observed daily or weekly?) when implementing. +#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. +#' The method assumes that the mean of the serial distribution (sometimes called the serial interval) is known. +#' The final estimate can be quite sensitive to this value, so sensitivity testing is strongly recommended. +#' Users should be careful about units of time (e.g. are counts observed daily or weekly?) when implementing. #' #' @param NT Vector of case counts -#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one, if case counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven) +#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are +#' weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one, if case +#' counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven) #' -#' @return \code{IDEA} returns a list containing the following components: \code{Rhat} is the estimate of R0 and \code{inputs} is a list of the original input variables \code{NT, mu}. +#' @return \code{IDEA} returns a list containing the following components: \code{Rhat} is the estimate of R0 and +#' \code{inputs} is a list of the original input variables \code{NT, mu}. #' #' @examples -#' +#' #' ## ===================================================== ## #' ## Illustrate on weekly data ## #' ## ===================================================== ## #' -#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) +#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4) #' ## obtain Rhat when serial distribution has mean of five days -#' res1 <- IDEA(NT=NT, mu=5/7) +#' res1 <- IDEA(NT=NT, mu=5/7) #' res1$Rhat #' ## obtain Rhat when serial distribution has mean of three days -#' res2 <- IDEA(NT=NT, mu=3/7) +#' res2 <- IDEA(NT=NT, mu=3/7) #' res2$Rhat #' #' ## ========================================================= ## #' ## Compute Rhat using only the first five weeks of data ## #' ## ========================================================= ## #' -#' +#' #' res3 <- IDEA(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days #' res3$Rhat -#' @export #' +#' @export +IDEA <- function(NT, mu) { + if (length(NT) < 2) + print("Warning: length of NT should be at least two.") + else { + NT <- as.numeric(NT) + TT <- length(NT) + s <- (1:TT) / mu + y1 <- log(NT) / s + y2 <- s^2 + y3 <- log(NT) -IDEA <- function(NT, mu){ - - if(length(NT)<2) { - print("Warning: length of NT should be at least two.") - } - else{ - NT <- as.numeric(NT) - TT <- length(NT) - s <- (1:TT)/mu - - y1 <- log(NT)/s - y2 <- s^2 - y3 <- log(NT) -# IDEA1 <- cumsum(y2)*cumsum(y1)-cumsum(s)*cumsum(y3) -# IDEA2 <- (1:TT)*cumsum(y2)-(cumsum(s))^2 -# IDEA <- exp(IDEA1/IDEA2) -# Rhat <- tail(IDEA,1) - IDEA1 <- sum(y2)*sum(y1)-sum(s)*sum(y3) - IDEA2 <- TT*sum(y2)-(sum(s))^2 - IDEA <- exp(IDEA1/IDEA2) - + IDEA1 <- sum(y2) * sum(y1) - sum(s) * sum(y3) + IDEA2 <- TT * sum(y2) - sum(s)^2 + IDEA <- exp(IDEA1 / IDEA2) - return(list(Rhat=IDEA, inputs=list(NT=NT, mu=mu))) - } + return(list(Rhat=IDEA, inputs=list(NT=NT, mu=mu))) + } } diff --git a/R/WP.R b/R/WP.R index d65972e..9e44bf9 100644 --- a/R/WP.R +++ b/R/WP.R @@ -1,24 +1,57 @@ #' WP method #' -#' This function implements an R0 estimation due to White and Pagano (Statistics in Medicine, 2008). The method is based on maximum likelihood estimation in a Poisson transmission model. See details for important implementation notes. +#' This function implements an R0 estimation due to White and Pagano (Statistics in Medicine, 2008). +#' The method is based on maximum likelihood estimation in a Poisson transmission model. +#' See details for important implementation notes. #' -#' This method is based on a Poisson transmission model, and hence may be most most valid at the beginning of an epidemic. In their model, the serial distribution is assumed to be discrete with a finite number of posible values. In this implementation, if the serial distribution is assumed known, it is taken to be a discretized version of a gamma distribution with mean \code{mu}, shape parameter one, and largest possible value based on parameter \code{tol}. When the serial distribution is unknown, the function implements a grid search algorithm to find the maximum likelihood estimator over all possible gamma distributions with unknown mean and variance, restricting these to a prespecified grid (see \code{search} parameter). +#' This method is based on a Poisson transmission model, and hence may be most most valid at the beginning +#' of an epidemic. In their model, the serial distribution is assumed to be discrete with a finite number +#' of posible values. In this implementation, if the serial distribution is assumed known, it is taken to +#' be a discretized version of a gamma distribution with mean \code{mu}, shape parameter one, and largest +#' possible value based on parameter \code{tol}. When the serial distribution is unknown, the function +#' implements a grid search algorithm to find the maximum likelihood estimator over all possible gamma +#' distributions with unknown mean and variance, restricting these to a prespecified grid (see +#' \code{search} parameter). #' -#' When the serial distribution is taken to be \code{known}, sensitivity testing of the parameter \code{mu} is strongly recommended. If the serial distribution is \code{unknown}, the likelihood function can be flat near the maximum, resulting in numerical instability of the optimizer. When the serial distribution is \code{unkown} the implementation takes considerably longer to run. Users should be careful about units of time (e.g. are counts observed daily or weekly?) when implementing. -#' -#' The model developed in White and Pagano (2008) is discrete, and hence the serial distribution is finite discrete. In our implementation, the input value \code{mu} is that of a continuous distribution. The algorithm when \code{method="known"} disretizes this input, and hence the mean of the serial distribution returned in the list \code{SD} will differ from \code{mu} somewhat. That is to say, if the user notices that the input \code{mu} and out put mean of \code{SD} are different, this is to be expected, and is caused by the discretization. +#' When the serial distribution is taken to be \code{known}, sensitivity testing of the parameter \code{mu} +#' is strongly recommended. If the serial distribution is \code{unknown}, the likelihood function can be +#' flat near the maximum, resulting in numerical instability of the optimizer. When the serial distribution +#' is \code{unkown} the implementation takes considerably longer to run. Users should be careful about units +#' of time (e.g. are counts observed daily or weekly?) when implementing. #' +#' The model developed in White and Pagano (2008) is discrete, and hence the serial distribution is finite +#' discrete. In our implementation, the input value \code{mu} is that of a continuous distribution. The +#' algorithm when \code{method="known"} disretizes this input, and hence the mean of the serial distribution +#' returned in the list \code{SD} will differ from \code{mu} somewhat. That is to say, if the user notices that +#' the input \code{mu} and out put mean of \code{SD} are different, this is to be expected, and is caused by +#' the discretization. #' #' @param NT Vector of case counts -#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one). The default value of \code{mu} is set to \code{NA}. -#' @param method Variable taking one of two possible values: \code{known} or \code{unknown}. If "known", the serial distribution is assumed to be gamma with rate 1/\code{mu} and shape equal to one, if "unknown" then the serial distribution is gamma with unknown parameters. Defaults to "unknown" -#' @param search List of default values for the grid search algorithm; the list includes three elements: the first is \code{B} which is the length of the grid in one dimension, the second is \code{scale.max} which is the largest possible value of the scale parameter, and the third is \code{shape.max} which is the largest possible value of the shape parameter; defaults to \code{B=100, scale.max=10, shape.max=10}. For both shape and scale, the smallest possible value is 1/\code{B}. -#' @param tol Cutoff value for cumulative distribution function of the pre-discretization gamma serial distribution, defaults to 0.999 (i.e. in the discretization, the maximum is chosen such that the original gamma distribution has cumulative probability of no more than 0.999 at this maximum). +#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case +#' counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be +#' set to one). The default value of \code{mu} is set to \code{NA}. +#' @param method Variable taking one of two possible values: \code{known} or \code{unknown}. If "known", the +#' serial distribution is assumed to be gamma with rate 1/\code{mu} and shape equal to one, if +#' "unknown" then the serial distribution is gamma with unknown parameters. Defaults to "unknown" +#' @param search List of default values for the grid search algorithm; the list includes three elements: the +#' first is \code{B} which is the length of the grid in one dimension, the second is +#' \code{scale.max} which is the largest possible value of the scale parameter, and the third is +#' \code{shape.max} which is the largest possible value of the shape parameter; defaults to +#' \code{B=100, scale.max=10, shape.max=10}. For both shape and scale, the smallest possible +#' value is 1/\code{B}. +#' @param tol Cutoff value for cumulative distribution function of the pre-discretization gamma serial +#' distribution, defaults to 0.999 (i.e. in the discretization, the maximum is chosen such that the +#' original gamma distribution has cumulative probability of no more than 0.999 at this maximum). #' -#' @return WP returns a list containing the following components: \code{Rhat} is the estimate of R0, \code{SD} is either the discretized serial distribution (if \code{method="known"}) or the estimated discretized serial distribution (if \code{method="unknown"}), and \code{inputs} is a list of the original input variables \code{NT, mu, method, search, tol}. The list also returns the variable \code{check}, which is equal to the number of non-unique maximum likelihood estimators. The serial distribution \code{SD} is returned as a list made up of \code{supp} the support of the distribution and \code{pmf} the probability mass function. +#' @return WP returns a list containing the following components: \code{Rhat} is the estimate of R0, \code{SD} +#' is either the discretized serial distribution (if \code{method="known"}) or the estimated +#' discretized serial distribution (if \code{method="unknown"}), and \code{inputs} is a list of the +#' original input variables \code{NT, mu, method, search, tol}. The list also returns the variable +#' \code{check}, which is equal to the number of non-unique maximum likelihood estimators. The serial +#' distribution \code{SD} is returned as a list made up of \code{supp} the support of the distribution +#' and \code{pmf} the probability mass function. #' #' @examples -#' #' ## ===================================================== ## #' ## Illustrate on weekly data ## #' ## ===================================================== ## @@ -37,54 +70,39 @@ #' ## find mean of estimated serial distribution #' serial <- res3$SD #' sum(serial$supp*serial$pmf) -#' TODO - talk to Jane about this example - should we have tested SD in our simulations as well in the paper? #' #' ## ========================================================= ## #' ## Compute Rhat using only the first five weeks of data ## #' ## ========================================================= ## -#' #' #' res4 <- WP(NT=NT[1:5], mu=5/7, method="known") # serial distribution has mean of five days #' res4$Rhat #' -#' #' @export +WP <- function(NT, mu="NA", method="unknown", search=list(B=100, shape.max=10, scale.max=10), tol=0.999) { + if (method == "unknown") { + print("You have assumed that the serial distribution is unknown.") + res <- WP_unknown(NT=NT, B=search$B, shape.max=search$shape.max, scale.max=search$scale.max, tol=tol) + Rhat <- res$Rhat + p <- res$p + range.max <- res$range.max + JJ <- res$JJ + } -WP <- function(NT, mu="NA", method="unknown", search=list(B=100, shape.max=10, scale.max=10), tol=0.999){ - - if(method=="unknown"){ - - print("You have assumed that the serial distribution is unknown.") - res <- WP_unknown(NT=NT, B=search$B, shape.max=search$shape.max, scale.max=search$scale.max, tol=tol) - Rhat <- res$Rhat - p <- res$p - range.max <- res$range.max - JJ <- res$JJ - - - } - - if(method=="known"){ - - if(mu=="NA"){ + if (method == "known") { + if (mu=="NA") { + res <- "NA" + print("For method=known, the mean of the serial distribution must be specified.") + } else { + print("You have assumed that the serial distribution is known.") + range.max <- ceiling(qexp(tol, rate=1/mu)) + p <- diff(pexp(0:range.max, 1/mu)) + p <- p / sum(p) + res <- WP_known(NT=NT, p=p) + Rhat <- res$Rhat + JJ <- NA + } + } - res <- "NA" - print("For method=known, the mean of the serial distribution must be specified.") - - } else { - - print("You have assumed that the serial distribution is known.") - - range.max <- ceiling(qexp(tol, rate=1/mu)) - p <- diff(pexp(0:range.max, 1/mu)) - p <- p/sum(p) - res <- WP_known(NT=NT, p=p) - Rhat <- res$Rhat - JJ <- NA - } - - } - -return(list(Rhat=Rhat, check=length(JJ), SD=list(supp=1:range.max, pmf=p), inputs=list(NT=NT, mu=mu, method=method, search=search, tol=tol))) - + return(list(Rhat=Rhat, check=length(JJ), SD=list(supp=1:range.max, pmf=p), inputs=list(NT=NT, mu=mu, method=method, search=search, tol=tol))) } diff --git a/R/WP_known.R b/R/WP_known.R index 40f1ded..6b0e2ea 100644 --- a/R/WP_known.R +++ b/R/WP_known.R @@ -1,27 +1,23 @@ #' WP method background function WP_known #' -#' This is a background/internal function called by \code{WP}. It computes the maximum likelihood estimator of R0 assuming that the serial distribution is known and finite discrete. +#' This is a background/internal function called by \code{WP}. It computes the maximum +#' likelihood estimator of R0 assuming that the serial distribution is known and finite discrete. #' #' @param NT vector of case counts #' @param p discretized version of the serial distribution #' @return The function returns \code{Rhat}, the maximum likelihood estimator of R0. +#' #' @export -# +WP_known <- function(NT, p) { + k <- length(p) + TT <- length(NT) - 1 + mu_t <- rep(0, TT) + + for (i in 1:TT) { + Nt <- NT[i:max(1, i-k+1)] + mu_t[i] <- sum(p[1:min(k, i)] * Nt) + } -WP_known <- function(NT, p){ - - k <- length(p) - TT <- length(NT)-1 - mu_t <- rep(0, TT) - for(i in 1:TT){ - Nt <- NT[i:max(1,i-k+1)] -# print(Nt) -# print(p[1:min(k,i)]) - mu_t[i] <- sum(p[1:min(k,i)]*Nt) - } - Rhat <- sum(NT[-1])/sum(mu_t) + Rhat <- sum(NT[-1]) / sum(mu_t) return(list(Rhat=Rhat)) - } - - diff --git a/R/WP_unknown.R b/R/WP_unknown.R index a450b54..c5b0a35 100644 --- a/R/WP_unknown.R +++ b/R/WP_unknown.R @@ -1,47 +1,52 @@ #' WP method background function WP_unknown #' -#' This is a background/internal function called by \code{WP}. It computes the maximum likelihood estimator of R0 assuming that the serial distribution is unknown but comes from a discretized gamma distribution. The function then implements a simple grid search algorithm to obtain the maximum likelihood estimator of R0 as well as the gamma parameters. +#' This is a background/internal function called by \code{WP}. It computes the maximum likelihood estimator +#' of R0 assuming that the serial distribution is unknown but comes from a discretized gamma distribution. +#' The function then implements a simple grid search algorithm to obtain the maximum likelihood estimator +#' of R0 as well as the gamma parameters. #' #' @param NT vector of case counts #' @param B length of grid for shape and scale (grid search parameter) #' @param shape.max maximum shape value (grid search parameter) #' @param scale.max maximum scale value (grid search parameter) #' @param tol cutoff value for cumulative distribution function of the serial distribution, defaults to 0.999 -#' @return The function returns \code{Rhat}, the maximum likelihood estimator of R0, as well as the maximum likelihood estimator of the discretized serial distribution given by \code{p} (the probability mass function) and \code{range.max} (the distribution has support on the integers one to \code{range.max}). The function also returns \code{resLL} (all values of the log-likelihood) at \code{shape} (grid for shape parameter) and at \code{scale} (grid for scale parameter), as well as \code{resR0} (the full vector of maximum likelihood estimators), \code{JJ} (the locations for the likelihood for these), and \code{J0} (the location for the maximum likelihood estimator \code{Rhat}). If \code{JJ} and \code{J0} are not the same, this means that the maximum likelihood estimator is not unique. +#' +#' @return The function returns \code{Rhat}, the maximum likelihood estimator of R0, as well as the maximum +#' likelihood estimator of the discretized serial distribution given by \code{p} (the probability mass +#' function) and \code{range.max} (the distribution has support on the integers one to \code{range.max}). +#' The function also returns \code{resLL} (all values of the log-likelihood) at \code{shape} (grid for +#' shape parameter) and at \code{scale} (grid for scale parameter), as well as \code{resR0} (the full +#' vector of maximum likelihood estimators), \code{JJ} (the locations for the likelihood for these), and +#' \code{J0} (the location for the maximum likelihood estimator \code{Rhat}). If \code{JJ} and \code{J0} +#' are not the same, this means that the maximum likelihood estimator is not unique. #' #' @export +WP_unknown <- function(NT, B=100, shape.max=10, scale.max=10, tol=0.999) { + shape <- seq(0, shape.max, length.out=B+1) + scale <- seq(0, scale.max, length.out=B+1) + shape <- shape[-1] + scale <- scale[-1] -WP_unknown <- function(NT, B=100, shape.max=10, scale.max=10, tol=0.999){ - - shape <- seq(0, shape.max, length.out=B+1) - scale <- seq(0, scale.max, length.out=B+1) - shape <- shape[-1] - scale <- scale[-1] - - resLL <- matrix(0,B,B) - resR0 <- matrix(0,B,B) - - for(i in 1:B){ - for(j in 1:B){ - range.max <- ceiling(qgamma(tol, shape=shape[i], scale=scale[j])) - p <- diff(pgamma(0:range.max, shape=shape[i], scale=scale[j])) - p <- p/sum(p) - mle <- WP_known(NT, p) - resLL[i,j] <- computeLL(p, NT, mle$R) - resR0[i,j] <- mle$R - } -# print(i) - } + resLL <- matrix(0,B,B) + resR0 <- matrix(0,B,B) + + for (i in 1:B) { + for (j in 1:B) { + range.max <- ceiling(qgamma(tol, shape=shape[i], scale=scale[j])) + p <- diff(pgamma(0:range.max, shape=shape[i], scale=scale[j])) + p <- p / sum(p) + mle <- WP_known(NT, p) + resLL[i,j] <- computeLL(p, NT, mle$R) + resR0[i,j] <- mle$R + } + } - J0 <- which.max(resLL) - R0hat <- resR0[J0] - JJ <- which(resLL==resLL[J0], arr.ind=TRUE) -# JJ <- which(resLL==max(resLL), arr.ind=TRUE) - range.max <- ceiling(qgamma(tol, shape=shape[JJ[1]], scale=scale[JJ[2]])) - p <- diff(pgamma(0:range.max, shape=shape[JJ[1]], scale=scale[JJ[2]])) - p <- p/sum(p) + J0 <- which.max(resLL) + R0hat <- resR0[J0] + JJ <- which(resLL == resLL[J0], arr.ind=TRUE) + range.max <- ceiling(qgamma(tol, shape=shape[JJ[1]], scale=scale[JJ[2]])) + p <- diff(pgamma(0:range.max, shape=shape[JJ[1]], scale=scale[JJ[2]])) + p <- p / sum(p) - return(list(Rhat=R0hat, J0=J0, ll=resLL, Rs=resR0, scale=scale, shape=shape, JJ=JJ, p=p, range.max=range.max)) + return(list(Rhat=R0hat, J0=J0, ll=resLL, Rs=resR0, scale=scale, shape=shape, JJ=JJ, p=p, range.max=range.max)) } - - diff --git a/R/computeLL.R b/R/computeLL.R index 0730399..6acfc78 100644 --- a/R/computeLL.R +++ b/R/computeLL.R @@ -3,28 +3,23 @@ #' This is a background/internal function called by \code{WP}. It computes the log-likelihood. #' #' @param NT vector of case counts -#' @param p discretized version of the serial distribution +#' @param p discretized version of the serial distribution #' @param R0 basic reproductive ratio #' @return The function returns the variable \code{LL} which is the log-likelihood at the input variables and parameters. - +#' #' @export -# -computeLL <- function(p, NT, R0){ +computeLL <- function(p, NT, R0) { + k <- length(p) + TT <- length(NT) - 1 + mu_t <- rep(0, TT) + + for (i in 1:TT) { + Nt <- NT[i:max(1, i-k+1)] + mu_t[i] <- sum(p[1:min(k, i)] * Nt) + } - k <- length(p) - TT <- length(NT)-1 - mu_t <- rep(0, TT) - for(i in 1:TT){ - Nt <- NT[i:max(1,i-k+1)] -# print(Nt) -# print(p[1:min(k,i)]) - mu_t[i] <- sum(p[1:min(k,i)]*Nt) - } - mu_t <- R0*mu_t - LL <- -sum(mu_t)+sum(NT[-1]*log(mu_t)) + mu_t <- R0 * mu_t + LL <- sum(NT[-1] * log(mu_t)) - sum(mu_t) return(LL) - } - - diff --git a/R/seqB.R b/R/seqB.R index 2223f08..0f8a1b9 100644 --- a/R/seqB.R +++ b/R/seqB.R @@ -1,22 +1,39 @@ #' seqB method #' -#' This function implements a sequential Bayesian estimation method of R0 due to Bettencourt and Riberio (PloS One, 2008). See details for important implementation notes. +#' This function implements a sequential Bayesian estimation method of R0 due to Bettencourt and Riberio (PloS One, 2008). +#' See details for important implementation notes. #' -#'The method sets a uniform prior distribution on R0 with possible values between zero and \code{kappa}, discretized to a fine grid. The distribution of R0 is then updated sequentially, with one update for each new case count observation. The final estimate of R0 is \code{Rhat}, the mean of the (last) posterior distribution. -#' The prior distribution is the initial belief of the distribution of R0; which in this implementation is the uninformative uniform distribution with values between zero and kappa. Users can change the value of kappa only (ie. the prior distribution cannot be changed from the uniform). As more case counts are observed, the influence of the prior distribution should lessen on the final estimate \code{Rhat}. +#' The method sets a uniform prior distribution on R0 with possible values between zero and \code{kappa}, discretized to a fine grid. +#' The distribution of R0 is then updated sequentially, with one update for each new case count observation. +#' The final estimate of R0 is \code{Rhat}, the mean of the (last) posterior distribution. +#' The prior distribution is the initial belief of the distribution of R0; which in this implementation is the uninformative uniform +#' distribution with values between zero and \code{kappa}. Users can change the value of kappa only (ie. the prior distribution +#' cannot be changed from the uniform). As more case counts are observed, the influence of the prior distribution should lessen on +#' the final estimate \code{Rhat}. #' -#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes that the mean of the serial distribution (sometimes called the serial interval) is known. The final estimate can be quite sensitive to this value, so sensitivity testing is strongly recommended. Users should be careful about units of time (e.g. are counts observed daily or weekly?) when implementing. +#' This method is based on an approximation of the SIR model, which is most valid at the beginning of an epidemic. The method assumes +#' that the mean of the serial distribution (sometimes called the serial interval) is known. The final estimate can be quite sensitive +#' to this value, so sensitivity testing is strongly recommended. Users should be careful about units of time (e.g. are counts observed +#' daily or weekly?) when implementing. #' -#' Our code has been modified to provide an estimate even if case counts equal to zero are present in some time intervals. This is done by grouping the counts over such periods of time. Without grouping, and in the presence of zero counts, no estimate can be provided. +#' Our code has been modified to provide an estimate even if case counts equal to zero are present in some time intervals. This is done +#' by grouping the counts over such periods of time. Without grouping, and in the presence of zero counts, no estimate can be provided. #' #' @param NT Vector of case counts -#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are weekly and the serial distribution has a mean of seven days, then mu should be set to 1=7/7, if case counts are dailty and the serial distribution has a mean of seven days, then mu should be set to seven). -#' @param kappa Largest possible value of uniform prior, defaults to 20. This describes the prior belief on ranges of R0, so should be set to a higher value if R0 is believed to be larger. +#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are +#' weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one, if case +#' counts are daily and the serial distribution has a mean of seven days, then \code{mu} should be set to seven) +#' @param kappa Largest possible value of uniform prior, defaults to 20. This describes the prior belief on ranges of R0, +#' so should be set to a higher value if R0 is believed to be larger. #' -#' @return secB returns a list containing the following components: \code{Rhat} is the estimate of R0 (the posterior mean), \code{posterior} is the posterior distribution of R0 from which alternate estimates can be obtained (see examples), \code{group} is an indicator variable (if \code{group=TRUE}, zero values of NT were input and grouping was done to obtain \code{Rhat}), and \code{inputs} is a list of the original input variables \code{NT, gamma, kappa}. The variable \code{posterior} is returned as a list made up of \code{supp} the support of the distribution and \code{pmf} the probability mass function. +#' @return secB returns a list containing the following components: \code{Rhat} is the estimate of R0 (the posterior mean), +#' \code{posterior} is the posterior distribution of R0 from which alternate estimates can be obtained (see examples), +#' \code{group} is an indicator variable (if \code{group=TRUE}, zero values of NT were input and grouping was done to +#' obtain \code{Rhat}), and \code{inputs} is a list of the original input variables \code{NT, gamma, kappa}. The variable +#' \code{posterior} is returned as a list made up of \code{supp} the support of the distribution and \code{pmf} the +#' probability mass function. #' #' @examples -#' #' ## ===================================================== ## #' ## Illustrate on weekly data ## #' ## ===================================================== ## @@ -57,73 +74,53 @@ #' #' res3 <- seqB(NT=NT[1:5], mu=5/7) # serial distribution has mean of five days #' res3$Rhat +#' #' @export +seqB <- function(NT, mu, kappa=20) { + if (length(NT) < 2) + print("Warning: length of NT should be at least two.") + else { + if (min(NT) > 0) { + times <- 1:length(NT) + tau <- diff(times) + } + group <- FALSE + if (min(NT) == 0) { + times <- which(NT > 0) + NT <- NT[times] + tau <- diff(times) + group <- TRUE + } -seqB <- function(NT, mu, kappa=20){ - - gamma <- 1/mu - - if(length(NT)<2) { - print("Warning: length of NT should be at least two.") - } - else{ - if(min(NT)>0){ - times <- 1:length(NT) - tau <- diff(times) - } - - group <- FALSE - - if(min(NT)==0){ - times <- which(NT>0) - NT <- NT[times] - tau <- diff(times) - group <- TRUE - } - - R <- seq(0, kappa, 0.01) - prior0 <- rep(1,kappa/0.01+1) - prior0 <- prior0/sum(prior0) - - k <- length(NT)-1 - R0.post <- matrix(0, nrow=k, ncol=length(R)) - - prior <- prior0 - posterior <- seq(0, length(prior0)) - - for(i in 1:k){ - - mm1 <- NT[i] - mm2 <- NT[i+1] - lambda <- tau[i]*gamma*(R-1) - lambda <- log(mm1)+lambda - loglik <- mm2*lambda-exp(lambda) - -# numerical issues solve??? + R <- seq(0, kappa, 0.01) + prior0 <- rep(1, kappa / 0.01 + 1) + prior0 <- prior0 / sum(prior0) + k <- length(NT) - 1 + R0.post <- matrix(0, nrow=k, ncol=length(R)) + prior <- prior0 + posterior <- seq(0, length(prior0)) + gamma <- 1 / mu -# if((mm1==0)*(mm2==0)==0){ - maxll <- max(loglik) - const <- 0 - if(maxll>700){ - const <- maxll-700 - } - loglik <- loglik-const -# } - -# end numerical issues solve??? - - posterior <- exp(loglik)*prior - posterior <- posterior/sum(posterior) + for (i in 1:k) { + mm1 <- NT[i] + mm2 <- NT[i+1] + lambda <- tau[i] * gamma * (R - 1) + lambda <- log(mm1) + lambda + loglik <- mm2 * lambda - exp(lambda) + maxll <- max(loglik) + const <- 0 - - prior <- posterior - - } - - Rhat <- sum(R*posterior) - -return(list(Rhat=Rhat, posterior=list(supp=R, pmf=posterior), group=group, inputs=list(NT=NT, mu=mu, kappa=kappa))) -} -} + if (maxll > 700) + const <- maxll - 700 + loglik <- loglik-const + posterior <- exp(loglik) * prior + posterior <- posterior / sum(posterior) + prior <- posterior + } + Rhat <- sum(R * posterior) + + return(list(Rhat=Rhat, posterior=list(supp=R, pmf=posterior), group=group, inputs=list(NT=NT, mu=mu, kappa=kappa))) + } +} -- cgit v1.2.3