Testing testing 123

author: hannajankowski <hkj@yorku.ca> 2022-06-09 17:57:31 -0400
committer: hannajankowski <hkj@yorku.ca> 2022-06-09 17:57:31 -0400
commit: 14ec98c9bfc49f963e30159dc7db9f554088fb44 (patch)
tree: ccdd49f5dd1cda78357bf7c9d0a7c80b25ca3130 /R/WP.R
1 files changed, 89 insertions, 0 deletions
diff --git a/R/WP.R b/R/WP.R
new file mode 100644
index 0000000..3096bc4
--- /dev/null
+++ b/R/WP.R
@@ -0,0 +1,89 @@
+#' WP method
+#'
+#' This function implements an R0 estimation due to White and Pagano (Statistics in Medicine, 2008).  The method is based on maximum likelihood estimation in a Poisson transmission model.  See details for important implementation notes.
+#'
+#' The model developed in White and Pagano (2008) requires knowledge of the serial distribution.   In their model, the serial distribution is assumed to be finite discrete.  In this implementation, if the serial distribution is assumed known, it is taken to be a discretized version of a gamma distribution with mean \code{mu}, shape parameter one, and largest possible value based on parameter \code{tol}.  When the serial distribution is unknown, the function implements a grid search algorithm to find the maximum likelihood estimator over all possible gamma distributions with unknown mean and variance, restricting these to a prespecified grid (see \code{search} parameter).  
+#'
+#' This method is based on a Poisson transmission model, which may be most most valid at the beginning of an epidemic.  When the serial distribution is taken to be \code{known}, sensitivity testing is recommended. If the serial distribution is \code{unknown}, the likelihood function can be flat near the maximum, resulting in numerical instability of the optimizer.  Users should be careful about units of time (e.g. are counts observed daily or weekly?) when implementing. 
+#'
+#' The model developed in White and Pagano (2008) is discrete, and hence the serial distribution is finite discrete.  Due to this implementation (inspired by that suggested in White and Pagano (2008)), the input value \code{mu} is that of a continuous distribution.   The algorithm when \code{method="known"} disretizes this input, and hence the mean of the serial distribution returned in the list \code{SD} will differ from \code{mu} somewhat. 
+#'
+#'
+#' @param NT Vector of case counts
+#' @param mu Mean of the serial distribution (needs to match case counts in time units; for example, if case counts are weekly and the serial distribution has a mean of seven days, then \code{mu} should be set to one).  The default value of \code{mu} is set to \code{NA}.
+#' @param  method  Variable taking one of two possible values: \code{known} or \code{unknown}. If "known", the serial distribution is assumed to be gamma with  rate 1/\code{mu} and shape equal to one, if "unknown" then the serial distribution is gamma with unknown parameters.   Defaults to "unknown" 
+#' @param search List of default values for the grid search algorithm; the list includes three elements: the first is \code{B} which is the length of the grid in one dimension, the second is \code{scale.max} which is the largest possible value of the scale parameter, and the third is \code{shape.max} which is the largest possible value of the shape parameter; defaults to \code{B=100, scale.max=10, shape.max=10}.  For both shape and scale, the smallest possible value is 1/\code{B}.  
+#' @param tol Cutoff value for cumulative distribution function of the pre-discretization gamma serial distribution, defaults to 0.999 (i.e. in the discretization, the maximum is chosen such that the original gamma distribution has cumulative probability of no more than 0.999 at this maximum).
+#'
+#' @return WP returns a list containing the following components:  \code{Rhat} is the estimate of R0, \code{SD} is either the discretized serial distribution (if \code{method="known"}) or the estimated discretized serial distribution (if \code{method="unknown"}), and \code{inputs} is a list of the original input variables \code{NT, mu, method, search, tol}.   The list also returns the variable \code{check}, which is equal to the number of non-unique maximum likelihood estimators.  The serial distribution \code{SD} is returned as a list made up of \code{supp} the support of the distribution and \code{pmf} the probability mass function.  
+#'
+#' @examples
+#' 
+#' ## ===================================================== ##
+#' ## Illustrate on weekly data                             ##
+#' ## ===================================================== ##
+#'
+#' NT <- c(1, 4, 10, 5, 3, 4, 19, 3, 3, 14, 4)	
+#' ## obtain Rhat when serial distribution has mean of five days
+#' res1 <- WP(NT=NT, mu=5/7, method="known")	
+#' res1$Rhat
+#' ## obtain Rhat when serial distribution has mean of three days
+#' res2	<- WP(NT=NT, mu=3/7, method="known")	
+#' res2$Rhat
+#' ## obtain Rhat when serial distribution is unknown
+#' res3	<- WP(NT=NT)	
+#' res3$Rhat
+#' ## find mean of estimated serial distribution
+#' serial	<-	res3$SD
+#' sum(serial$supp*serial$pmf)
+#' TODO - talk to Jane about this example - should we have tested SD in our simulations as well in the paper?
+#'
+#' ## ========================================================= ##
+#' ## Compute Rhat using only the first five weeks of data      ##
+#' ## ========================================================= ##
+#'
+#' 
+#' res4 <- WP(NT=NT[1:5], mu=5/7, method="known")	# serial distribution has mean of five days
+#' res4$Rhat
+#'
+#' 
+#' @export
+
+WP	<-	function(NT, mu="NA", method="unknown", search=list(B=100, shape.max=10, scale.max=10), tol=0.999){
+	
+	if(method=="unknown"){
+		
+		print("You have assumed that the serial distribution is unknown.")	
+		res				<-	WP_unknown(NT=NT, B=search$B, shape.max=search$shape.max, scale.max=search$scale.max, tol=tol)
+		Rhat			<-	res$Rhat
+		p				<-	res$p
+		range.max		<-	res$range.max
+		JJ				<-	res$JJ
+		
+		
+	}
+	
+	if(method=="known"){
+		
+		if(mu=="NA"){
+
+			res		<-	"NA"
+			print("For method=known, the mean of the serial distribution must be specified.")
+						
+		} else {
+			
+		print("You have assumed that the serial distribution is known.")	
+		
+		range.max	<-	ceiling(qexp(tol, rate=1/mu))
+		p			<-	diff(pexp(0:range.max, 1/mu))
+		p			<-	p/sum(p)
+		res			<-	WP_known(NT=NT, p=p)
+		Rhat		<-	res$Rhat
+		JJ			<-	NA
+		}
+		
+	}
+	
+return(list(Rhat=Rhat, check=length(JJ), SD=list(supp=1:range.max, pmf=p), inputs=list(NT=NT, mu=mu, method=method, search=search, tol=tol)))
+	
+}
+\ No newline at end of file
author	hannajankowski <hkj@yorku.ca>	2022-06-09 17:57:31 -0400
committer	hannajankowski <hkj@yorku.ca>	2022-06-09 17:57:31 -0400
commit	14ec98c9bfc49f963e30159dc7db9f554088fb44 (patch)
tree	ccdd49f5dd1cda78357bf7c9d0a7c80b25ca3130 /R/WP.R