# constants for specifying the experiment to be designed

L <- 200			# read length

S <- 100			# expected number of species in the pool

B.lo <- 50000			# uniform bounds for genome lengths
B.hi <- 350000

kappa.A <- 3.5			# Shape parameter for the Pareto distribution of abundances

abund.direction <- 'smallest'	# controls whether 'smallest' genomes or 'largest' genomes are most abundant

k.star <- 4			# desired length of contig (in reads)
p <- 0.95			# observance probability

R.min <- 0			# these control the range of reads to be evaluated for equation solving.
R.max <- 300000			# (I use the fixed genome size and equal abundance design to help with this.)

coverage.lim <- 2		# the genome coverage point at which 100% probability of obtaining a contig is assumed.



# R* objective function

equality.function <- function(R,L,S,B.lo,B.hi,kappa.A,k.star,coverage.lim,p)
{
B <- floor(qunif((1:S)/(S+1),min=B.lo,max=B.hi))

if (abund.direction == 'smallest')  {
	A.0 <- qpareto((1-(1:S)/(S+1)),shape=kappa.A,scale=1) + 1
	}
if (abund.direction == 'largest')  {
	A.0 <- qpareto((1:S)/(S+1),shape=kappa.A,scale=1) + 1
	}
A <- A.0 / sum(A.0)

R.s <- R * (A*B / sum(A*B))

coverage <- R.s*L/B
	
alpha <- 2*B/L - 1
beta <- 1 - (1-1/alpha)^R.s
theta <- log((B/L - 1)*(1-beta)+1,base=1/beta)

individual.term <- 1 - exp(-2*beta^(k.star - theta))

if (max(coverage) > coverage.lim)  {
	ind <- coverage > coverage.lim
	individual.term[ind] <- 1
	}

obj <- prod(individual.term)

dff <- obj - p

dff
}



# Compute R.

library(actuar)

slv <- uniroot(equality.function,c(R.min,R.max),L=L,S=S,B.lo=B.lo,B.hi=B.hi,kappa.A=kappa.A,k.star=k.star,coverage.lim=coverage.lim,p=p)

print(paste('Total Reads Required=',slv$root))
