{melting.pas - AnnHyb
 	Copyright (C) 1997-2005 Olivier Friard
        (annhyb @ gmail.com)
 
  This file is part of AnnHyb.

Copyright (C) 1997-2012 Olivier Friard

AnnHyb is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.

AnnHyb is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with AnnHyb; see the file COPYING.TXT.  If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
}
unit u_biotools;

interface

uses graphics,classes;
type TFeature=record
              f_init,f_end:integer;
              color:Tcolor;
              name:shortstring;
              end;

function strtofloat2(s:shortstring):extended;
function floattostr2(e:extended):shortstring;
function xytopos(x,y:integer):integer;
function featuresExtr(s:string):Tfeature;
function featuresToSl(s:string):tstringlist;
function wordextr(s:string;c:char;b:integer):string;
function rev_(c:char):char;
function revcompseq(s:shortstring):shortstring;
function compseq(s:shortstring):shortstring;
function group(seq:shortstring):shortstring;
function format_iupac(s:string;flaggroup:boolean):string;
function melting(s,comp_primer:shortstring;saltcc,primercc:double;bibTm:shortstring):shortstring;
function clearseq(s:string):string;
function gcg_checksum(seq:string):string;
function zfill(s:shortstring;i:integer):shortstring;
function gcp(s:string):double;
function ndegen(s:string):int64;
function long_seq_melting(salt,gc,fa:double;seq_len:integer):shortstring;

const validbase=['A','C','G','T','U','Y','R','W','S','K','M','B','D','H','V','N'];
      max_degen:int64=16384;
      base:shortstring='ACGTX';
      iupac:shortstring='ACMGRSVTWYHKDBN';
      CRLF:shortstring=#13#10;
      LF:char=#10;
      color_rsf:array[0..15] of Tcolor=(clBlack,clMaroon,clgreen,clolive,clnavy,clpurple,clteal,clgray,clSilver,clred,cllime,clyellow,clblue,clfuchsia,claqua,clwhite);
      month_:array[1..12] of string[20]=('January','February','March','April','May','June','July','August','September','October','November','December');
      TmBreslauer1986='Breslauer, 1986';
      TmAllawi1997='Allawi, 1997';
      cg1:array[1..21] of string[25]=('GCT GCC GCA GCG','TGT TGC','GAT GAC','GAA GAG','TTT TTC',
                                     'GGT GGC GGA GGG','CAT CAC','ATT ATC ATA','AAA AAG',
                                     'TTG TTA CTT CTC CTA CTG','ATG','AAT AAC','CCT CCC CCA CCG',
                                     'CAA CAG','CGT CGC CGA CGG AGA AGG','TCT TCC TCA TCG AGT AGC',
                                     'ACT ACC ACA ACG','GTT GTC GTA GTG','TGG','TAT TAC','TAA TAG TGA');
      cg2:array[1..21] of string[3]=('Ala','Cys','Asp','Glu','Phe',
                                     'Gly','His','Ile','Lys',
                                     'Leu','Met','Asn','Pro',
                                     'Gln','Arg','Ser',
                                     'Thr','Val','Trp','Tyr',' * ');
      cg3:array[1..21] of string[1]=('A','C','D','E',
                                     'F','G','H','I',
                                     'K','L','M','N',
                                     'P','Q','R',
                                     'S','T','V','W','Y','*');


implementation
uses math,sysutils;

function strtofloat2(s:shortstring):extended;
begin
decimalseparator:='.';
strtofloat2:=strtofloat(s);
end;

function floattostr2(e:extended):shortstring;
//var s:shortstring;
begin
decimalseparator:='.';
//str(e:0:32,s);
floattostr2:=floattostr(e);
end;

function long_seq_melting(salt,gc,fa:double;seq_len:integer):shortstring;
begin
//formula from Howley, 1979
long_seq_melting:=floattostr2(roundto(81.5+16.6*ln(salt)/ln(10)+0.41*gc-0.62*fa-500/seq_len,-1))
end;

function xytopos(x,y:integer):integer;
var i:integer;
begin
i:=x;
if i>10 then
   begin
   i:=i-10;
   if x>=22 then
      dec(i);
   if x>=33 then
      dec(i);
   if x>=44 then
      dec(i);
   if x>=55 then
      dec(i);
   if x>64 then
      begin
      inc(y);
      i:=1;
      end;
   end
else
   i:=1;
result:=(y-1)*50+i;
end;

function featuresExtr(s:string):Tfeature;
var fe:Tfeature;
begin
fe.f_init:=strtoint(wordextr(s,' ',2));
fe.f_end:=strtoint(wordextr(s,' ',3));
fe.color:=strtoint(wordextr(s,' ',4));
fe.name:=trim(copy(s,pos('square solid',s)+12,65535));
if pos('///',fe.name)<>0 then
   fe.name:=trim(copy(fe.name,1,pos('///',fe.name)-1));
featuresExtr:=fe;
end;

function featuresToSl(s:string):tstringlist;
var sl:tstringlist;
begin
sl:=tstringlist.Create;
sl.Text:=s;
(*
while pos(LF,s)<>0 do
      begin
      sl.Add(copy(s,1,pos(LF,s)-1));
      delete(s,1,pos(LF,s));
      end;

result:=sl;
*)
result:=sl;
//sl.Free;
end;

function wordextr(s:string;c:char;b:integer):string;
var i,compt:integer;
    s2:string;
begin
//remove repetitions
s:=trim(s);
while pos(c+c,s)<>0 do
   delete(s,pos(c+c,s),1);
compt:=0;
for i:=1 to length(s) do
    if s[i]=c then
       inc(compt);
inc(compt);
if compt<b then
   result:=''
else
   begin
   for i:=1 to b do
       begin
       s2:=copy(s+' ',1,pos(c,s+' ')-1);
       delete(s,1,pos(c,s));
       end;
   result:=s2;
   end;
end; //wordextr

function rev_(c:char):char;
var crev:char;
begin
case c of 'a':crev:='t';                'A':crev:='T';
          'c':crev:='g';                'C':crev:='G';
          'g':crev:='c';                'G':crev:='C';
          't':crev:='a';                'T':crev:='A';
          'b':crev:='v';                'B':crev:='V';
          'd':crev:='h';                'D':crev:='H';
          'h':crev:='d';                'H':crev:='D';
          'k':crev:='m';                'K':crev:='M';
          'm':crev:='k';                'M':crev:='K';
          'n':crev:='n';                'N':crev:='N';
          'r':crev:='y';                'R':crev:='Y';
          's':crev:='s';                'S':crev:='S';
          'u':crev:='a';                'U':crev:='A';
          'v':crev:='b';                'V':crev:='B';
          'w':crev:='w';                'W':crev:='W';
          'y':crev:='r';                'Y':crev:='R';
          ' ':crev:=' ';
          else
          crev:='?'
          end;
rev_:=crev;
end;

function revcompseq(s:shortstring):shortstring;
var revs:shortstring;
    t:byte;
begin
s:=format_iupac(s,false);
revs:='';
for t:=1 to length(s) do
    revs:=rev_(s[t])+revs;
result:=revs;
end;

function compseq(s:shortstring):shortstring;
var comps:shortstring;
    t:byte;
begin
s:=uppercase(stringreplace(s,' ','',[rfReplaceAll]));
comps:='';
t:=1;
while t<=length(s) do
    begin
    if (pos(s[t],iupac)<>0) or (s[t]='I') then
        if (s[t+1]='/') then
           begin
           comps:=comps+s[t+2];
           inc(t,2);
           end
        else if s[t]='I' then
           comps:=comps+'N'
        else   
           comps:=comps+rev_(s[t]);
    inc(t);
    end;
result:=comps;
end;

function group(seq:shortstring):shortstring;
var t:byte;
    s:shortstring;
begin
s:='';
for t:=1 to length(seq) do
    begin
    s:=s+seq[t];
    if t mod 3=0 then
       s:=s+' ';
    end;
group:=s;
end;

function format_iupac(s:string;flaggroup:boolean):string;
var t,t1,v:integer;
    s_iupac:string;
begin
s:=ansiuppercase(s);
s_iupac:='';
//degenerated bases
for t:=1 to length(s) do
    if s[t]='(' then  //init denerate position
        begin
        t1:=t;
        v:=0;
        repeat
	          inc(t1);
        	  case s[t1] of 'A':v:=v or 1;
                          'C':v:=v or 2;
                          'G':v:=v or 4;
                          'T':v:=v or 8;
		                      end;
        until (s[t1]=')') or (t1>=length(s));
        if s[t1]=')' then
            begin
            s[t]:=iupac[v];
            repeat
                delete(s,t+1,1);
                if s[t+1]=')' then
                    break;
            until false;
            delete(s,t+1,1);
            end;
        end;
s:=stringreplace(s,' ','',[rfReplaceAll]);
for t:=1 to length(s) do
    if (pos(s[t],iupac)<>0) or (s[t]='I') then
        if (s[t-1]<>'/') then
           s_iupac:=s_iupac+s[t];

if flaggroup then
    s_iupac:=group(s_iupac);
format_iupac:=s_iupac;
end;

function melting(s,comp_primer:shortstring;saltcc,primercc:double;bibTm:shortstring):shortstring;
const bp:array[1..4] of char=('A','C','G','T');

      //parameters from Breslauer, 1986 (http://www.pubmedcentral.gov/articlerender.fcgi?tool=pubmed&pubmedid=3459152)
      //delta H (kcal/mol)
      dHo_Breslauer1986:array[1..4,1..4] of double=((9.1,5.8,5.6,6.0),(6.5,11.0,11.1,5.6),
                                  (7.8,11.9,11.0,5.8),(8.6,7.8,6.5,9.1));
      //delta S (e.u.)
      dSo_Breslauer1986:array[1..4,1..4] of double=((24.0,12.9,13.5,16.9),(17.3,26.6,26.7,13.5),
                                  (20.8,27.8,26.6,12.9),(23.9,20.8,17.3,24.0));

      //parameters from Santalucia, 1998 (http://www.pubmedcentral.gov/articlerender.fcgi?tool=pubmed&pubmedid=9465037)
      //delta H (kcal/mol)

      prop_seq:array[1..10] of string=
      ('AA/TT','AT/TA','TA/AT','CA/GT','GT/CA','CT/GA','GA/CT','CG/GC','GC/CG','GG/CC');
      dHo_Santalucia1998_exp:array[1..10] of double=(-7.9,-7.2,-7.2,-8.5,-8.4,-7.8,-8.2,-10.6,-9.8,-8.0);
      dSo_Santalucia1998_exp:array[1..10] of double=(-22.2,-20.4,-21.3,-22.7,-22.4,-21.0,-22.2,-27.2,-24.4,-19.9);

      prop_seq_I:array[1..36] of string=
      ('AI/TC','TI/AC','AC/TI','TC/AI','CI/GC','GI/CC','CC/GI','GC/CI',
      'AI/TA','TI/AA','AA/TI','TA/AI','CI/GA','GI/CA','CA/GI','GA/CI',
      'AI/TT','TI/AT','AT/TI','TT/AI','CI/GT','GI/CT','CT/GI','GT/CI',
      'AI/TG','TI/AG','AG/TI','TG/AI','CI/GG','GI/CG','CG/GI','GG/CI',
      'AI/TI','TI/AI','CI/GI','GI/CI');

      dHo_Watkins2005:array[1..36] of double=
      (-8.9,-5.9,-8.8,-4.9,-5.4,-6.8,-8.3,-5.0,
      -8.3,-3.4,-0.7,-1.3,2.6,-7.8,-7.0,-7.6,
      0.49,-6.5,-5.6,-0.8,-1.0,-3.5,0.1,-4.3,
      -4.9,-1.9,0.1,1.0,7.1,-1.1,5.8,-7.6,
      -3.3,0.1,1.3,-0.5);
      dSo_Watkins2005:array[1..36] of double=
      (-25.5,-17.4,-25.4,-13.9,-13.7,-19.1,-23.8,-12.6,
      -25.0,-11.2,-2.6,-4.6,8.9,-21.1,-20.0,-20.2,
      -0.7,-22.0,-18.7,-4.3,-2.4,-10.6,-1.0,-12.1,
      -15.8,-8.5,-1.8,1.0,21.3,-3.2,16.9,-22.0,
      -11.9,-2.3,3.0,-1.3);

var s1,memmin,memmax:shortstring;
    melt,meltmin,meltmax,enth,entr,gc:double;
    t:byte;
    compt:integer;
    flag_degen:boolean;

function pb(c:char):byte;
begin
case upcase(c) of 'A':pb:=1;  'C':pb:=2;  'G':pb:=3;  'T':pb:=4;
                  else pb:=0;
                  end;
end;

function melting_Santalucia1998_acgt_exp(primer,comp_primer:string):real;
var t1,b:byte;
begin
enth:=0; entr:=0;
for t1:=1 to length(primer)-1 do
    begin
    if (primer[t1]='I') or (primer[t1+1]='I') then
        begin
        for b:=1 to length(prop_seq_I) do
            if ((copy(prop_seq_I[b],1,2)=primer[t1]+primer[t1+1]) and (copy(prop_seq_I[b],4,2)=comp_primer[t1]+comp_primer[t1+1]))
             or ((copy(prop_seq_I[b],4,2)=primer[t1+1]+primer[t1]) and (copy(prop_seq_I[b],1,2)=comp_primer[t1+1]+comp_primer[t1])) then
                begin
                enth:=enth+dHo_Watkins2005[b];
                entr:=entr+dSo_Watkins2005[b];
                end;
         end

    else
        for b:=1 to length(prop_seq) do
            if (copy(prop_seq[b],1,2)=primer[t1]+primer[t1+1]) or (copy(prop_seq[b],4,2)=primer[t1+1]+primer[t1]) then
                begin
                enth:=enth+dHo_Santalucia1998_exp[b];
                entr:=entr+dSo_Santalucia1998_exp[b];
                end;
    end;

if primer[1] in ['A','T'] then
   begin
      enth:=enth+2.3;    entr:=entr+4.1;
   //enth:=enth+2.2;    entr:=entr+6.9;
   end;
if primer[1] in ['C','G'] then
   begin
      enth:=enth+0.1;    entr:=entr-2.8;
   //enth:=enth+0.2;   entr:=entr-5.7;
   end;
if primer[length(primer)] in ['A','T'] then
   begin
   enth:=enth+2.3;     entr:=entr+4.1;
//enth:=enth+2.2;   entr:=entr+6.9;
   end;
if primer[length(primer)] in ['C','G'] then
   begin
   enth:=enth+0.1;     entr:=entr-2.8;
//   enth:=enth+0.2;   entr:=entr-5.7;
   end;

enth:=enth*4.18*1000;
entr:=(entr+0.368*(length(primer)-1)*ln(saltcc))*4.18;

melting_Santalucia1998_acgt_exp:=enth/(entr+1.987*4.18*ln(primercc/4))-273.15;        //2
end;

function melting_Breslauer1986_acgt(primer:string):real;
var t1:byte;
begin
enth:=0; entr:=0;
for t1:=1 to length(primer)-1 do
    begin
    enth:=enth+dHo_Breslauer1986[pb(primer[t1+1]),pb(primer[t1])];
    entr:=entr+dSo_Breslauer1986[pb(primer[t1+1]),pb(primer[t1])];
    end;
entr:=entr+10.8;
melting_Breslauer1986_acgt:=-enth*1000/(-entr+1.987*ln(primercc/4))-273.15+16.6*log10(saltcc);
end;

procedure base;   //
var tt:byte;
begin
inc(t);
for tt:=1 to 4 do
    begin
    if pos(s[t],iupac) and round(exp((tt-1)*ln(2)))=round(exp((tt-1)*ln(2))) then
       begin
       s1:=s1+bp[tt];
       base;
       dec(t);
       s1:=copy(s1,1,t-1);
       end;
    end;
if length(s1)=length(s) then
   begin
   if bibTm='Breslauer, 1986' then
      melt:=melting_Breslauer1986_acgt(s1);
   if bibTm='Allawi, 1997' then
      melt:=melting_Santalucia1998_acgt_exp(s1,compseq(s1));

   if melt<meltmin then
      begin
      meltmin:=melt;
      memmin:=s1;
      end;
   if melt>meltmax then
      begin
      meltmax:=melt;
      memmax:=s1;
      end;
   inc(compt);
   end;
end; //base

begin //melting
s:=format_iupac(s,false);
comp_primer:=format_iupac(comp_primer,false);
if (s='')  then
   begin
   melting:='';
   exit;
   end;
if (length(s)<10)  then
   begin
   melting:='oligo too short';
   exit;
   end;

if ndegen(s)>16384 then
   begin
   melting:='oligo too degenerated';
   exit;
   end;

flag_degen:=false;
gc:=0;
for t:=1 to length(s) do
    begin
    if s[t] in ['C','G'] then //calcul GC %
       gc:=gc+1;
    if not (s[t] in ['A','C','G','T','I']) then
       flag_degen:=true;
    end;
gc:=gc/length(s)*100;
//length >50
if length(s)>50 then
   begin
   str(81.5+16.6*log10(saltcc)+0.41*gc-500/length(s):0:1,s1);
   melting:=s1;
   exit;
   end;
if flag_degen then
   begin
   t:=0;
   s1:='';
   meltmin:=1000;
   meltmax:=-1000;
   base;
   if bibTm='Breslauer, 1986' then
      begin
      str(melting_Breslauer1986_acgt(memmin):0:1,s);
      str(melting_Breslauer1986_acgt(memmax):0:1,s1);
      end;
   if bibTm='Allawi, 1997' then
      begin
      str(melting_Santalucia1998_acgt_exp(memmin,compseq(memmin)):0:1,s);
      str(melting_Santalucia1998_acgt_exp(memmax,compseq(memmax)):0:1,s1);
      end;

   melting:=s+' - '+s1;
   end
else if not flag_degen then
   begin
   if bibTm='Breslauer, 1986' then
      str(melting_Breslauer1986_acgt(s):0:1,s1)
   else if bibTm='Allawi, 1997' then
      str(melting_Santalucia1998_acgt_exp(s,comp_primer):0:1,s1)
   else
      s1:='Unknow parameter: '+bibTm;
   melting:=s1;
   end
else
   melting:='not determined';
end;

function clearseq(s:string):string;
var s1:string;
    t:cardinal;
begin
s1:='';
for t:=1 to length(s) do
    if not (s[t] in ['0'..'9',' ',#10,#13]) then
       s1:=s1+s[t];
clearseq:=s1;
end;

function gcg_checksum(seq:string):string;     //calculate GCG checksum of sequence
var s:string[80];
    cs:cardinal;
    w,compt:cardinal;

begin
seq:=uppercase(clearseq(seq));
cs:=0;
compt:=0;
for w:=1 to length(seq) do
    begin
    if seq[w] in validbase then
       begin
       inc(compt);
       cs:=cs+compt*ord(seq[w]);
       if compt=57 then
          compt:=0;
       end;
    end;
cs:=cs-(cs div 10000)*10000;
str(cs,s);
gcg_checksum:=s;
end;

function zfill(s:shortstring;i:integer):shortstring;
begin
while length(s)<i do
   s:='0'+s;
zfill:=s;
end;

function gcp(s:string):double;    //calcul of GC percent
var i,compt_tot:integer;
    compt_gc:double;
begin
compt_gc:=0;
compt_tot:=0;
for i:=1 to length(s) do
    begin
    case upcase(s[i]) of 'C','G','S','N':compt_gc:=compt_gc+1;
                         'M','R','Y','K':compt_gc:=compt_gc+0.5;
                         'V','B':compt_gc:=compt_gc+0.67;
                         'H','D':compt_gc:=compt_gc+0.33;
                         end;

    if upcase(s[i]) in validbase then
       compt_tot:=compt_tot+1;
    end;
gcp:=compt_gc/compt_tot;
end; //gcp

function ndegen(s:string):int64; //number of different oligonucleotides
var i:integer;
    nd:int64;
begin
nd:=1;
for i:=1 to length(s) do
    begin
    case s[i] of 'M','R','S','W','Y','K':nd:=nd*2;
                 'V','B','D','H':nd:=nd*3;
                 'N':nd:=nd*4;
		          end;
    if nd>max_degen then
        break;
    end;
ndegen:=nd;
end;


end.
