function [ b, c ] = align( ds1,ds2 )
%ALIGN aligns two proteomic data sets.  'a' and'b' are expected to be data
%frames.

ds1.anno=strrep(ds1.anno,'"','');
ds1.t=c2n(strtok(ds1.anno(:,strcmp(ds1.annoHead,'Peak Centroid Time')),','));
ds1.mz=c2n(ds1.anno(:,strcmp(ds1.annoHead,'Monoisotopic m/z')));
ds1.e=c2n(ds1.anno(:,strcmp(ds1.annoHead,'Charge State')));
ds1.pep=ds1.anno(:,strcmp(ds1.annoHead,'Modified Peptide Sequence'));
ds1.pep(strcmp(ds1.pep,'-'))=cellstr('ds1-');
ds1.pCode=ds1.anno(:,strcmp(ds1.annoHead,'Primary Protein Name'));
ds1.pCode(strcmp(ds1.pCode,'-'))=cellstr('ds1-');

ds2.anno=strrep(ds2.anno,'"','');
ds2.t=c2n(strtok(ds2.anno(:,strcmp(ds2.annoHead,'Peak Centroid Time')),','));
ds2.mz=c2n(ds2.anno(:,strcmp(ds2.annoHead,'Monoisotopic m/z')));
ds2.e=c2n(ds2.anno(:,strcmp(ds2.annoHead,'Charge State')));
ds2.pep=ds2.anno(:,strcmp(ds2.annoHead,'Modified Peptide Sequence'));
ds2.pep(strcmp(ds2.pep,'-'))=cellstr('ds2-');
ds2.pCode=ds2.anno(:,strcmp(ds2.annoHead,'Primary Protein Name'));
ds2.pCode(strcmp(ds2.pCode,'-'))=cellstr('ds2-');

dak1=cellstr(strtrim([num2str(ds1.e) char(ds1.pep)]));
dak2=cellstr(strtrim([num2str(ds2.e) char(ds2.pep)]));
[a b c]=intersect(dak1,dak2);

[jnk kp]=dropOutliers(ds1.t(b)-ds2.t(c));
b=b(kp); c=c(kp);
[jnk kp]=dropOutliers(ds1.mz(b)-ds2.mz(c));
b=b(kp); c=c(kp);
% 
% ds1.mz=ds1.mz-mean(ds1.mz(b)-ds2.mz(c));
% dfm=ds1.mz(b)-ds2.mz(c);
% ds1.t=ds1.t-mean(ds1.t(b)-ds2.t(c));
% dft=ds1.t(b)-ds2.t(c);
% sig=cov(dfm,dft);
% 
% sz=10;
% n=length(ds1.mz);
% pbl=zeros(n,3);
% pbl(:,1)=1:size(pbl,1);
% pbl(b,2)=1;
% pbl(b,3)=c;
% loc=zeros(n,sz);
% scr=zeros(n,sz);
% for(i=1:n)
%     if(pbl(i,2)==0)
%         mz=ds1.mz(i);
%         t=ds1.t(i);
%         kp=find(abs(ds2.mz-mz)<4*std(dfm) & abs(ds2.t-t)<4*std(dft) & ds2.e==ds1.e(i));
%         ln=min(length(kp), sz);
%         loc(i,1:ln)=kp(1:ln);
%         scr(i,1:ln)=mvnpdf([ds2.mz(kp(1:ln))-mz ds2.t(kp(1:ln))-t],zeros(1,2),sig);
%     end;
%     if(mod(i,1000)==0)
%         i
%     end;
% end;
% 
% [ll ord]=sort(scr(:),'descend');
% cutoff=40;
% for(i=1:sum(ll>cutoff))
%     lr=mod(ord(i)-1,n)+1;
%     lc=ceil(ord(i)/n);
%     if(pbl(lr,2)==0 & sum(loc(lr,lc)==pbl(:,3))==0)
%         pbl(lr,2)=1;
%         pbl(lr,3)=loc(lr,lc);
%     end;
%     if(sum(pbl(:,2))+1~=length(unique(pbl(:,3))))
%         break;
%     end;
%     if(mod(i,1000)==0)
%         i
%     end;
% end;
% kp=find(pbl(:,2)); 
% b=pbl(kp,1);
% c=pbl(kp,3);
% 
% [jnk kp]=dropOutliers(ds1.t(b)-ds2.t(c));
% b=b(kp); c=c(kp);
% [jnk kp]=dropOutliers(ds1.mz(b)-ds2.mz(c));
% b=b(kp); c=c(kp);

% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
% %%%%%%%%%%% exploratory plots %%%%%%%%%%%%%%%%%%%%%%%
% % ta=40; tb=60; mza=600; mzb=800; e=3;
% % kpq=find(ds2.t>ta & ds2.t<tb & ds2.mz>mza & ds2.mz<mzb & ds2.e==e);
% % kpdb=find(ds1.t>ta & ds1.t<tb & ds1.mz>mza & ds1.mz<mzb & ds1.e==e);
% % plot(ds2.t(kpq),ds2.mz(kpq),'.');
% % hold on;
% % plot(ds1.t(kpdb)-.35,ds1.mz(kpdb),'.r');
% % hold off;
% % xlim([ta tb]); ylim([mza mzb]);
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
% %%%%%%%%%%%% offset by matching ID %%%%%%%%%%%%%
% % [a b c]=intersect(ds1.pep,ds2.pep);
% % at=ds1.t(b); am=ds1.mz(b);
% % bt=ds2.t(c); bm=ds2.mz(c);
% % dt=ds1.t(b)-ds2.t(c); dm=ds1.mz(b)-ds2.mz(c);
% % kp=find(abs(dt-median(dt))<3 & abs(dm-median(dm))<.3);
% % figure(1)
% % plot(at,bt,'.')
% % figure(2)
% % plot(am,bm,'.')
% % figure(3)
% % scatterHeat(at,am,dm);
% % figure(4)
% % scatterHeat(at,am,dt);
% % 
% % mzShift=-median(ds1.mz(b)-ds2.mz(c))
% % thetaM=var(dm(kp))
% % tShift=-median(ds1.t(b)-ds2.t(c))
% % thetaT=var(dt(kp))
% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% % 
% % % facPth='d:\bfrm\pbmcMcCarthy\validation180\trainAll1\';
% % % 
% % % % key=importdata('L:\lucas\dataRepository\tissueSamples\pbmc\mccarthy\dateKey.txt');
% % % key=scanfile('d:\dataRepository\tissueSamples\serum\mccarthy\dateKey.txt');
% % % keyHeader=key(1,:);
% % % key=key(2:end,:);
% % % 
% % % isoID=1; retentionTime=2; mzRatio=3; chargeState=4; peptide=5; protein=6;
% % % runDate=find(strcmp(keyHeader,'runDate')==1);
% % % genotype1=find(strcmp(keyHeader,'genotype1')==1);
% % % responder=find(strcmp(keyHeader,'responder')==1);
% % % lTrans=find(strcmp(keyHeader,'liverTransplant')==1);
% % % runblock=find(strcmp(keyHeader,'runblock')==1);
% % 
% % baseShift=0;
% % mzShift=0;
% % 
% % n=length(ds1.t);
% % top=NaN*ones(n,1);
% % top5=NaN*ones(n,5);
% % topLoc=NaN*ones(n,1);
% % testShift=(tShift-.5):.1:(tShift+.5);
% % bestM=NaN*ones(n,1);
% % bestT=NaN*ones(n,1);
% % bestN=NaN*ones(n,1);
% % shiftLLTrace=NaN*zeros(length(testShift),1);
% % for(cnt=1:length(testShift))
% %     baseShift=testShift(cnt);
% %     for(i=1:n)
% %         tst=find(ds1.e(i)==ds2.e & abs(ds1.t(i)+baseShift-ds2.t)<5 & abs(ds1.mz(i)+mzShift-ds2.mz)<.1);
% %         if(length(tst)>0)
% %             scr=(ds1.t(i)+baseShift-ds2.t(tst)).^2/(2*thetaT) +(ds1.mz(i)+mzShift-ds2.mz(tst)).^2/(2*thetaM);
% % %             dff=(repmat(c1.data(i,mtchLst(:,2)),length(tst),1)-c2.data(tst,mtchLst(:,1)))./sqrt(thetaN);
% % %             dff=nanmean(dff,2);
% % %             scr=scr+dff.^2;
% %             [jnk ord]=sort(scr);
% %             top(i)=scr(ord(1));
% %             ln=min(length(scr),5);
% %             top5(i,1:ln)=scr(ord(1:ln));
% %             topLoc(i)=tst(ord(1));
% % %             bestM(i)=ds1.mz(i)-ds2.mz(tst(ord(1)));
% % %             bestT(i)=ds1.t(i)-ds2.t(tst(ord(1)));
% % %             bestN(cnt)=dff(ord(1));
% %         end;
% %     end;
% %     shiftLLTrace(cnt)=nanmedian(top5(:,1));
% %     figure(1)
% %     plot(ds1.t,-log(top5(:,1)),'.');
% %     figure(2)
% %     plot(ds1.mz,-log(top5(:,1)),'.');
% %     figure(3)
% %     kp=find(top5(:,1)<quantile(top5(:,1),.1));
% %     scatterHeat(ds1.t(kp), ds1.mz(kp), -log(top5(kp,1)));
% %     figure(4)
% %     plot(testShift(1:cnt),shiftLLTrace(1:cnt));
% %     xlim([min(testShift)-.1 max(testShift)+.1])
% %     pause(.01);
% % end;
% % baseTShift=testShift(shiftLLTrace==min(shiftLLTrace));
% % 
% % %%%%% now that the shifts are set, find the best matches
% % % thetaM=1.2e-4;
% % for(i=1:n)
% %     tst=find(ds1.e(i)==ds2.e & abs(ds1.t(i)+baseTShift-ds2.t)<5 & abs(ds1.mz(i)+mzShift-ds2.mz)<.1);
% %     if(length(tst)>0)
% %         scr=(ds1.t(i)+baseTShift-ds2.t(tst)).^2/(2*thetaT) +(ds1.mz(i)+mzShift-ds2.mz(tst)).^2/(2*thetaM);
% %         [jnk ord]=sort(scr);
% %         topLoc(i)=tst(ord(1));
% %         top(i)=scr(ord(1));
% %         bestM(i)=ds1.mz(i)-ds2.mz(tst(ord(1)));
% %         bestT(i)=ds1.t(i)-ds2.t(tst(ord(1)));
% %     end;
% % end;
% % cutoff=1.1;
% % plot(bestM,bestT,'.');
% % hold on;
% % plot(bestM(top<cutoff),bestT(top<cutoff),'.r');
% % hold off;
% % 
% % kp=find(~isnan(topLoc) & top<cutoff);
% % dbLoc=1:length(ds1.pep);
% % ds2.newPep=cellstr(repmat('ds2-',length(ds2.pep),1)); ds2.newPep(topLoc(kp))=ds1.pep(dbLoc(kp));
% % ds2.newPCode=cellstr(repmat('ds2-',length(ds2.pCode),1)); ds2.newPCode(topLoc(kp))=ds1.pCode(dbLoc(kp));
% % 
% % ct=[sum(strcmp(ds2.newPep,ds2.pep) & ~strcmp(ds2.newPep,'ds2-') & ~strcmp(ds2.pep,'ds2-')) sum(strcmp(ds2.newPep,'ds2-') & ~strcmp(ds2.pep,'ds2-'));
% %     sum(~strcmp(ds2.newPep,'ds2-') & strcmp(ds2.pep,'ds2-')) sum(~strcmp(ds2.newPep,ds2.pep) & ~strcmp(ds2.newPep,'ds2-') & ~strcmp(ds2.pep,'ds2-'))]
% % 
% % oldID=find(strcmp(ds2.newPep,'-') & ~strcmp(ds2.pep,'-'));
% % ds2.newPep(oldID)=ds2.pep(oldID);
% % ds2.newPCode(oldID)=ds2.pCode(oldID);
% % 
% % m=length(ds2.id);
% % newAnnotation=[char(ds2.id) repmat(' ',m,1) num2str(ds2.t,6) repmat(' ',m,1) num2str(ds2.mz,6) repmat(' ',m,1) num2str(ds2.e,2) repmat(' ',m,1) char(ds2.newPep) repmat(' ',m,1) char(ds2.newPCode)];
% % dlmwrite('newAnnotation.txt',newAnnotation,'newline','pc','delimiter','');
% % 
% % % dat=readDataframe('D:\dataRepository\tissueSamples\serum\mccarthy\hcv-180.txt','D:\dataRepository\tissueSamples\serum\mccarthy\dateKey.txt','D:\bfrm\pbmcMcCarthy\validation135\leaveOut357');
% % % [res vlist]=sss(cell2mat(dat.key(:,4)),dat.mf);
% % 
% % useID=ds2.id(strcmp(ds2.newPep,'-')~=1);
% % dlmwrite('annotatedIDs.txt',char(useID),'delimiter','','newline','pc');
% % 
% 
% end

