# these parameters are for equation solution plotting

R.min <- 0				# lowest total read counts
R.max <- 200000				# highest total read counts
R.int <- 1				# interval
R <- seq(R.min,R.max,R.int)		# read count sequence



# these parameters specify the experiment to be designed

L <- 200	# read length

S <- 100	# known species in the pool
B <- 200000	# length of genomes

k.star <- 4	# desired length of contig (in reads)
p <- 0.95	# observance probability



# plot curves

R.s <- R / S

alpha <- 2*B/L - 1
beta <- 1 - (1-1/alpha)^R.s
theta <- log((B/L - 1)*(1-beta)+1,base=1/beta)

rhs <- rep(-log(1-p^(1/S))/2,NROW(R))
lhs <- beta^(k.star - theta)

plot(R,lhs,type='l',col='red',xlab='Total reads',ylab='')
lines(R,rhs,col='green')



# explicit solution

equality.function <- function(R,L,S,B,k.star,p)
{
R.s <- R / S

alpha <- 2*B/L - 1
beta <- 1 - (1-1/alpha)^R.s
theta <- log((B/L - 1)*(1-beta)+1,base=1/beta)

rhs <- -log(1-p^(1/S))/2
lhs <- beta^(k.star - theta)

dff <- rhs - lhs

dff
}

slv <- uniroot(equality.function,c(R.min,R.max),L=L,S=S,B=B,k.star=k.star,p=p)

print(paste('Total Reads Required=',slv$root))
print(paste('Reads Allocated per Species=',slv$root/S))
