# constants for specifying the experiment to be designed

L <- 200			# read length

S.lambda <- 100			# expected number of species in the pool

B.lo <- 50000			# uniform bounds for genome lengths
B.hi <- 350000

kappa.A.lo <- 1         	# Parameters for controlling the Pareto distribution of abundances
kappa.A.hi <- 10        	# such that the maximum abundance is equal to tgt.abundance 
tgt.abundance <- 0.025

abund.direction <- 'smallest'	# controls whether 'smallest' genomes or 'largest' genomes are most abundant

k.star <- 4			# desired length of contig (in reads)
p <- 0.95			# observance probability

R.min <- 0			# these control the range of reads to be evaluated for equation solving.
R.max <- 200000			# (I use previous experimental designs to help with this.)

coverage.lim <- 2		# the genome coverage point at which 100% probability of obtaining a contig is assumed.


# R* objective function

kappa.solver <- function(kappa.A,S,abund.direction,tgt.abundance)  {
	if (abund.direction == 'smallest')  {
		A.0 <- qpareto((1-(1:S)/(S+1)),shape=kappa.A,scale=1) + 1
		}
	if (abund.direction == 'largest')  {
		A.0 <- qpareto((1:S)/(S+1),shape=kappa.A,scale=1) + 1
		}

	A <- A.0 / sum(A.0)

	A.max <- max(A)

	obj <- A.max - tgt.abundance

	obj
	}

equality.function <- function(R,L,S.lambda,B.lo,B.hi,kappa.A.lo,kappa.A.hi,tgt.abundance,abund.direction,k.star,coverage.lim,p)
{
S.lo <- qpois(0.0001,lambda=S.lambda)
S.hi <- qpois(0.9999,lambda=S.lambda)

expectation <- 0
for (S in c(S.lo:S.hi))  {
	B <- floor(qunif((1:S)/(S+1),min=B.lo,max=B.hi))

	kappa.A <- uniroot(kappa.solver,c(kappa.A.lo,kappa.A.hi),S=S,abund.direction=abund.direction,tgt.abundance=tgt.abundance)$root
	if (abund.direction == 'smallest')  {
		A.0 <- qpareto((1-(1:S)/(S+1)),shape=kappa.A,scale=1) + 1
		}
	if (abund.direction == 'largest')  {
		A.0 <- qpareto((1:S)/(S+1),shape=kappa.A,scale=1) + 1
		}
	A <- A.0 / sum(A.0)

	R.s <- R * (A*B / sum(A*B))

	coverage <- R.s*L/B
        
	alpha <- 2*B/L - 1
	beta <- 1 - (1-1/alpha)^R.s
	theta <- log((B/L - 1)*(1-beta)+1,base=1/beta)

	individual.term <- 1 - exp(-2*beta^(k.star - theta))

	if (max(coverage) > coverage.lim)  {
	        ind <- coverage > coverage.lim
	        individual.term[ind] <- 1
	        }

	term <- prod(individual.term)

	expectation <- expectation + term*dpois(S,lambda=S.lambda)
	}

dff <- expectation - p

dff
}



# Compute R.

library(actuar)

slv <- uniroot(equality.function,c(R.min,R.max),L=L,S.lambda=S.lambda,B.lo=B.lo,B.hi=B.hi,kappa.A.lo=kappa.A.lo,kappa.A.hi=kappa.A.hi,
	       tgt.abundance=tgt.abundance,abund.direction=abund.direction,k.star=k.star,coverage.lim=coverage.lim,p=p)

print(paste('Total Reads Required=',slv$root))
