function [ dat ] = processProteomics( dat, filter, imputeNaN )

if(nargin<2)
    filter=0;
end;
if(nargin<3)
    imputeNaN=1;
end;

%PROCESSPROTEOMICS Summary of this function goes here
%   Detailed explanation goes here
dat.xpr(dat.xpr<100)=NaN;
dat.xpr=log(dat.xpr);

drp=find(sum(isnan(dat.xpr),2)>=size(dat.xpr,2)/2);
dat.xpr(drp,:)=[];
dat.pids(drp)=[];
if(isfield(dat,'anno'))
    dat.anno(drp,:)=[];
end;

kp=find(sum(isnan(dat.xpr),2)==0);
dat.xpr=dat.xpr-repmat(mean(dat.xpr(kp,:)),size(dat.xpr,1),1);

%%% clear outliers
[p n]=size(dat.xpr);
for(count=1:2)
    cdt=standardize(dat.xpr,2);
    out=zeros(p,n);
    for(i=1:p)
        out(i,:)=1*(abs(cdt(i,:))>3);
    end;
    cdt(out==1)=NaN;
    dat.xpr(out==1)=NaN;
end;

%%%% combine replicates
strid=find(strcmp(dat.keyHead,'Storage ID'));
if(length(strid)>0)
    ustor=unique(dat.key(:,strid));
    xpr=[];
    key=[];
    sids=[];
    mf=[];
    for(i=1:length(ustor))
        kp=find(strcmp(ustor(i),dat.key(:,strid)));
        xpr=[xpr nanmean(dat.xpr(:,kp),2)];
        key=[key; dat.key(kp(1),:)];
        sids=[sids; dat.sids(kp(1))];
        if(isfield(dat,'mf'))
            mf=[mf; nanmean(dat.mf(kp,:),1)];
        end;
    end;
    dat.sids=sids;
    dat.xpr=xpr;
    dat.key=key;
    if(isfield(dat,'mf'))
        dat.mf=mf;
    end;
end;

%%%%%%%%%%%% filter out low intensity, unidentified peaks
if(filter)
    mn=nanmedian(dat.xpr,2);
    kp=find(strcmp(dat.anno(:,strcmp(dat.annoHead,'Modified Peptide Sequence')),'-')~=1 | mn>0);
    dat.pids=dat.pids(kp);
    dat.xpr=dat.xpr(kp,:);
    dat.anno=dat.anno(kp,:);
end;

%%% set missing data to row mean
if(imputeNaN)
    for(i=1:size(dat.xpr,1))
        dat.xpr(i,isnan(dat.xpr(i,:)))=nanmean(dat.xpr(i,:));
    end;
end;

end

