library(actuar)


# These parameters control the assembly to be simulated.

S <- 100						# Number of species

B.lo <- 50000						# Uniform genome length parameters
B.hi <- 350000					
B <- floor(qunif((1:S)/(S+1),min=B.lo,max=B.hi))	# Species lengths

kappa.A <- 3.5						# Abundances
A.0 <- qpareto((1-(1:S)/(S+1)),shape=kappa.A,scale=1) + 1
A <- A.0 / sum(A.0)

R <- 67109						# Number of reads

L <- 200						# Read length

its <- 100						# number of simulation iterations


B.S <- sum(A*B)
pr.R <- A*B / B.S

min.max.contig <- rep(0,its)

for (it in 1:its)  {
	assignment <- sample(c(1:S),R,replace=TRUE,prob=pr.R)

	max.contig <- rep(0,S)
	for (s in 1:S)  {
		B.s <- B[s]

		assembly <- rep(0,B.s)

		position.s <- sample(c(1:B.s),sum(assignment == s),replace=TRUE)

		for (p in position.s)  {
			assembly[p:min((p+L-1),B.s)] <- assembly[p:min((p+L-1),B.s)] + 1
			}

		occ <- assembly > 0
		runs <- rle(occ)
		lengths <- runs$length[runs$values == TRUE]
		lengths.norm <- lengths / L
		max.contig[s] <- max(lengths.norm)
		}
	min.max.contig[it] <- min(max.contig)

	print(paste('Iteration',it,min.max.contig[it]))
	}



print('')
print(paste('S=',S))
print('B=')
print(B)
print('A=')
print(A)
print(paste('R=',R))
print('pr.R')
print(pr.R)
print(quantile(min.max.contig,probs=seq(0,1,0.05)))
