#!/bin/awk -f
# Quik Change primer design program by JL
# User put in the DNA sequence, and change the setting manually
# Things to do
#  Read in the codon table
#  Read in the user input sequence
#  Translate the sequence
#  Find the mutation site and the closest codon to mutate
#  Design the primer based on user input and calculating the Tm

BEGIN {
# User settings here - Based on Stratagene QC protocol
#  Put 0 to turn off optimum value in scoring scheme
#  Mutation site, and residue to change to
Res_num = 228
Mut_aa = "C"

#  Melting temp max and min
Tm_min = 75
Tm_max = 85
Tm_opt = 75
Tm_score = 10

#  Primer length
len_min = 25
len_max = 45
len_opt = 0
len_score = 10

#  % GC content
GC_min = 40 
GC_max = 50
GC_opt = 0
GC_score = 10

#  Flanking length
Front_min = 10
Front_max = 15
Front_opt = 0
Behind_min = 10
Behind_max = 15
Behind_opt = 0
Flank_score = 5

#  End in GC 0 = NO, 1 = YES
GC_End = 1
End_score = 10

# Read in the codon from codon.txt 
 while ((getline < "codon.txt") > 0) {codon[$1] = $2}
 DNA_seq = ""
}

# Functions used in the program are written here
#  Translate the DNA sequence for mutation analysis
function abs(num) {
 if (num < 0) {num *= -1}
 return num
}

function translate(seq) {
 prot_seq = ""
 for (i = 1; i <= (length(seq) - 2); i += 3) {
  prot_seq = prot_seq codon[substr(seq, i, 3)] 
 } 
 return prot_seq
}

# Display sequence in format
function print_seq(DNA, AA) {
 # Format aa seq to match DNA seq
 format_aa = ""
 for (i = 1; i < length(AA); i++) {
  if ((i != Res_num)&&(i != Res_num - 1)) {format_aa = format_aa substr(AA, i, 1) "  "}
  else {
   if (i == Res_num) {shape = "<-"}
   else {shape = "->"}
   format_aa = format_aa substr(AA, i, 1) shape
  }
 }
 for (i = 1; i < length(AA); i+=20) { i
  printf ("%5i - %s ", i * 3 - 2, substr(DNA, i * 3 - 2, 15))
  for (j = 1; j <= 3; j++) {printf ("%s ", substr(DNA, (i*3-2 + j*15), 15))}
  printf ("- %i\n", i * 3 + 57)
  printf ("%5i - %s ", i, substr(format_aa, i * 3 - 2, 15))
  for (j = 1; j <= 3; j++) {printf ("%s ", substr(format_aa, (i*3-2 + j*15), 15))}
  printf ("- %i\n\n", i + 19)
 }
}

# Display summary and function of this program
function tell_mut_info(DNA, AA, DNA_num) {
 print "QC Mutagenesis primer design program by JL"
 print "------------------------------------------"
 print " Melting temperature = " Tm_min " - " Tm_max
 print " Primer length = " len_min " - " len_max
 print " % GC content = " GC_min " - " GC_max
 print " Flanking nucleotides"
 print "  Front : " Front_min " - " Front_max
 print "  Behind : " Behind_min " - " Behind_max
 printf " End in GC = "
 if (GC_End == 0) {printf "FALSE\n"}
  else {printf "TRUE\n"}
 print ""
 print_seq(DNA, AA)
 print ""
 print "Mutation made : " substr(AA, Res_num, 1) " " Res_num " (" substr(DNA, DNA_num, 3) " " DNA_num ") -> " Mut_aa
}

#  Make primer
function design_primer(templ, mutcod, frontl, behindl) {
 newprimer = substr(templ, DNA_num - frontl, frontl) mutcod substr(templ, DNA_num + 3, behindl)
 return newprimer
}

#  Calculates % of GC content
function GC_calc(seq) {
 GC = 0
 for (i = 1; i <= length(seq); i++) {
  if ((substr(seq, i, 1) == "G") || (substr(seq, i, 1) == "C")) {GC++}
 }
 GC_cont = GC / length(seq) * 100
 return GC_cont
}

#  Calculates Tm of primer
function calctm(seq, mismat) {
 tm = 81.5 + (0.41 * GC_calc(seq)) - (675 / length(seq)) - (mismat / length(seq) * 100)
 return tm 
}

#  Prints primer statistics for output
function print_stat(primer, mismat) {
 print "Primer designed : " primer
 print "  Mismatches : " mismat
 printf ("  Length : %i      ", length(primer))
 if ((length(primer) >= len_min) && (length(primer) <= len_max)) {
  printf ("Length OK!\n")}
 else {printf ("Length NOT OK!\n")}
 printf ("  Tm : %.2f    ", calctm(primer, mismat))
 if ((calctm(primer, mismat) >= Tm_min) && (calctm(primer, mismat) <= Tm_max)) {
  printf ("Melting temp OK!\n")}
 else {printf ("Melting temp NOT OK!\n")}
 printf ("  GC : %i    ", GC_calc(primer))
 if ((GC_calc(primer) >= GC_min) && (GC_calc(primer) <= GC_max)) {
  printf ("GC content OK!\n")}
 else {printf ("GC content NOT OK!\n")}
 if ((substr(primer, length(primer), 1) == "G") || (substr(primer, length(primer), 1) == "C")) {
  if (GC_End == 1) {print "  Ends in GC!"}}
 else {if (GC_End == 1) {print "  NOT Ends in GC!"}}
}

#  Scoring scheme used to find optimal primer
function score(value, opt, max, min, pt) {
 pt_earned = 0
 if ((value >= min) && (value <= max)) {
  if (opt != 0) { 
   if (value > opt) {pt_earned = (1 - (value - opt)/(max - opt)) * pt}
   if (value < opt) {pt_earned = (1 - (opt - value)/(opt - min)) * pt}
   if (value == opt) {pt_earned = pt}
  }
  else {pt_earned = pt}
 }
 return pt_earned
}

# Read in the DNA sequence
{
 if (substr($1, 1, 1) != ">") {
  DNA_seq = DNA_seq $1
 }
}


# Main program here
END {
 DNA_num = Res_num * 3 - 2

 # Translate DNA sequence
 AA_seq = translate(DNA_seq)

 # Tell which amino acid will be mutated  
 tell_mut_info(DNA_seq, AA_seq, DNA_num)

 #  Scan through codon database to find the available codons
 condon_found = 0
 for (i in codon) {
  if (codon[i] == Mut_aa) {
    codon_found++
    codon_match[codon_found] = i
  }
 }

 #  Match the codon and try to find the mismatch number, start, and end
 for (i = 1; i <= codon_found; i++) {
  mismatches[i] = 0
  mismatch_begin[i] = 99999
  mismatch_end[i] = 0
  for (j = 0; j <= 2; j++) {
   if (substr(DNA_seq, DNA_num + j, 1) != substr(codon_match[i], j + 1, 1)) {
    mismatches[i]++
    if (mismatch_begin[i] > (DNA_num + j)) {mismatch_begin[i] = DNA_num + j}
    else {mismatch_end[i] = DNA_num + j}
   }
  }
  if (mismatches[i] == 1) {mismatch_end[i] = mismatch_begin[i]}
 } 

 #  Choose the codon based on minimum number of mismatches
 lowest_mismatch = 4
 print "Available codons for mutation to " Mut_aa
 for (i = 1; i <= codon_found; i++) {
  if (mismatches[i] < lowest_mismatch) {
   lowest_mismatch = mismatches[i]
   codon_chosen = i
  }
  print "   " codon_match[i] " Mismatches : " mismatches[i]
 }
 print ""
 print "Codon selected"
 print " ->" codon_match[codon_chosen] " Mismatches : " mismatches[codon_chosen]

 #  Make a primer based on a minimum criteria given
 primer = design_primer(DNA_seq, codon_match[codon_chosen], Front_min, Behind_min)

 #  Satisfy minimum Tm first by adding nucleotides front and end
 if (calctm(primer, mismatches[codon_chosen]) < Tm_min) {
  extras = 0
  do { 
  extras++
  primer = design_primer(DNA_seq, codon_match[codon_chosen], Front_min + extras, Behind_min + extras)}
  while (calctm(primer, mismatches[codon_chosen]) <= Tm_min)
  print ""
  print "Primer satisfying minimum Tm"
  print_stat(primer, mismatches[codon_chosen])
 } 

 #  Satisfy maximum Tm first by removing nucleotides front and end if too long
 if (calctm(primer, mismatches[codon_chosen]) > Tm_max) { 
  do {
  extras--
  primer = design_primer(DNA_seq, codon_match[codon_chosen], Front_min + extras, Behind_min + extras)}
  while (calctm(primer, mismatches[codon_chosen]) >= Tm_max)
  print ""
  print "Primer satisfying maximum Tm"
  print_stat(primer, mismatches[codon_chosen])
 }

 #  Fix the GC ends if needed
 if ((GC_End == 1) && ((substr(primer, length(primer), 1) != "G") && (substr(primer, length(primer), 1) != "C"))) {
  end_extras = 0
  do {
   end_extras++
   primer = design_primer(DNA_seq, codon_match[codon_chosen], Front_min + extras, Behind_min + extras + end_extras)}
  while ((substr(primer, length(primer), 1) != "G") && (substr(primer, length(primer), 1) != "C"))
  print ""
  print "Primer might satisfy Tm and ends in GC"
  print_stat(primer, mismatches[codon_chosen])
 }

 #  Minimum length primer that satisfies most criterias as possible
 #  Scanning through variations that gives highest score
 highest_score = 0
 sec_score = 0
 thi_score = 0
 front_back_diff = 0
 best_primer = ""
 sec_primer = ""
 thi_primer = ""
 for (front = 1; front <= Front_max + 20; front++) {
  for (back = 1; back <= Behind_max + 20; back++) {
   cur_sco = 0
   primer = design_primer(DNA_seq, codon_match[codon_chosen], front, back)
   cur_sco += score(length(primer), len_opt, len_max, len_min, len_score)
   cur_sco += score(calctm(primer,mismatches[codon_chosen]), Tm_opt, Tm_max, Tm_min, Tm_score)
   cur_sco += score(GC_calc(primer), GC_opt, GC_max, GC_min, GC_score)
   cur_sco += score(front, Front_opt, Front_max, Front_min, Flank_score)
   cur_sco += score(back, Behind_opt, Behind_max, Behind_min, Flank_score)
   if ((substr(primer, length(primer), 1) == "G") || (substr(primer, length(primer), 1) == "C")) {if (GC_End == 1) {cur_sco += End_score}}
   if ((cur_sco > highest_score) || ((cur_sco == highest_score) && (front_back_diff > abs(front - back)))) {
    front_back_diff = abs(front - back)
    thi_score = sec_score
    sec_score = highest_score
    highest_score = cur_sco
    thi_primer = sec_primer
    sec_primer = best_primer
    best_primer = primer
   }
  }
 }
 totpt = Tm_score + GC_score + len_score + Flank_score * 2 + End_score
 print ""
 print "Primer that satisfy most criterias"
 print " Score : " highest_score " / " totpt
 print_stat(best_primer, mismatches[codon_chosen])
 print ""
 print "Second primer"
 print " Score : " sec_score " / " totpt
 print_stat(sec_primer, mismatches[codon_chosen])
 print ""
 print "Third primer" 
 print " score : " thi_score " / " totpt
 print_stat(thi_primer, mismatches[codon_chosen]) 
}
