function [beta,li,alli,XX]=Mbinregsvdnew(x,Z,group,ngene,nfac,nmc,cvi,ci,version)
%=====================================================================
%
% Fits full posterior in the binary regression model with Bayesian 
% stochastic regularisation using iterative similation MCMC methods
%
% 
% INPUTS: 
% x is the expression data to be used -- genes are rows, arrays are columns
% Z is the binary indicator of class membership (2== unknown)
% group: 1 == training, 2 == validating samples
% ngene is the number of genes to be selected -- those most highly correlated with 
%   outcome on the training arrays
% nfac is the number of supergene factors to include, the rest are ignored
% nmc is a 3 vector: 
%    nit is the number of iterates to summarise the MCMC
%    nbi is the initial number to burn-in and discard
%    nskip is the number of skips between saves 
% ci is percent alpha used for a two-sided CI
% cvi is 1 if you want '1 at a time' cross-validation analyses too, 0 if not
% version is 1 if SVD factor are regressed in all sample.
%            2 if SVD factors are regressed only in training data
% 
% OUTPUTS: 
% beta is the estimated regression vector for genes
% li the top ngene genes selected in the training set analysis 
% alli the indices of all genes selected in the CV optimisations
%

%% Editted by WTB 6-12-07
%% Noteworthy changes -
%%   - Both versions of BinReg allowed
%%   - plots supressed until after algorithm
%%   - CV of version-1 redefined



% data &  parameters 
 [N,n]=size(x); 
 k=2;nit=nmc(1); nbi=nmc(2); nskip=nmc(3); 

 itrain = find(group==1); ntrain=length(itrain); 

% reduce to top most correlated ngene  ... 
%% always use only training samples in gene selection
 z=Z(itrain);  X=x(:,itrain); 
 ind=select_genes_cor(X,z,ngene); 
 li=ind(1:ngene);  alli=[]; 

%% perform SVD on all training and standarding (version 1), 
%% or training only (version 2)  --WTB 5.21.07
 if(version==1) 
   itrain2 = find(group<3); ntrain2 = length(itrain2); 
   ifill = 1:ntrain2; ifill(group(itrain2)==1) = []; nfill = length(ifill);
   ivalid = find(group==3); nvalid = length(ivalid);
 end;
 if(version==2) 
   itrain2 = find(group==1); ntrain2 = length(itrain2); 
   ifill = []; nfill = length(ifill);
   ivalid = find(group>1);  nvalid = length(ivalid);
 end;
 X=x(li,itrain2)-repmat(mean(x(li,itrain2),2),1,ntrain2); 
 [A,D,F]=svd_mw(X); XX=X;
 D=D';
 [p,n]=size(F); 
 j=1:min(min(ngene,nfac),p); 
 A=A(:,j); D=D(j); F=F(j,:);
 %% p = number of metagenes/factors to be used in model
 %% n = number of training samples
 
 [p,n]=size(F); 
 %% add intercept
 ln=1/sqrt(n); F=([ln*ones(1,n);F]); 
 D2 = D; D=[sqrt(n);D];  
 p=p+1; 
 
 %% y = X'b+e; latent variable
 %% gamma = coefficients on factors
 %% my = fitted regression model (zhat)
 y=randn(n,1); gamma=zeros(p,1); tau2=ones(p,1);

 vi=tau2./(1+tau2);  
 my=F'*gamma; 
 m=[]; fit=zeros(n,1); pfit=[]; 
 
%% Response vectors of all or training data 
 data=reshape(Z(itrain2),n,1);  Fit=zeros(n,1); Pfit=zeros(n,nit);   

 fprintf(' Starting MCMC algorithm\n')

 for j=1:(nbi+nskip*nit)
     phi=pnorm(-my,0,1); 
     pr=1-phi;
     r=mean(pr); r=r/(1-r); 
     pr=pr./(pr+(1-pr)*r); 

     if (nfill>0)
       data(ifill)=rand(nfill,1)<pr(ifill); 
     end;

     y=my+(qnorm(data.*phi+rand(n,1).*(data+(1-2*data).*phi),0,1));  
     gamma=vi.*(F*y)+sqrt(vi).*randn(p,1); 
     my=F'*gamma;
     tau2=1./gamrnd((k+1)/2,2./(k+gamma.^2),p,1); 
     vi=tau2./(1+tau2); 
     if (j>nbi & floor((j-nbi)/nskip)==(j-nbi)/nskip)
        fit=fit+my; 
        pfit=[pfit, pr]; 
        m=[m,gamma]; 
     end;
     if (floor(j/1000)==j/1000) 
       fprintf('    %d iterations complete\n',j)
     end;
 end;

 Fit=fit/nit; Pfit=pfit; 
 %% coefficients on factors
 
 gamma=mean(m,2); mu = gamma(1) / sqrt(n);
 gamma=gamma(2:p); 
 %% coefficients on genes
 %beta=A*gamma;    %% incorrect WTB 
 beta=A * diag(1./D2) * gamma; 
 beta = [mu - (beta' * mean(x(li,itrain2),2)) ; beta];
 %% reset p to number of factors
 F=F(2:p,:); p=p-1; 
 fit=Fit; pfit=mean(Pfit,2); 
 sl=prctile(Pfit',ci/2)'; su=prctile(Pfit',100 - ci/2)';
 
 %% now apply above results to predict validation samples
 
if (nvalid>0)
    X=x(li,itrain2)-repmat(mean(x(li,itrain),2),1,ntrain2); 
    [A,D,F]=svd_mw(X);
    %D=D'; 
    D=diag(D);  % D is square matrix with elements of D on diagonal
    % generate factor matrix for new samples
    FF=inv(D)*A'*(x(li,ivalid)-repmat(mean(x(li,itrain2),2),1,nvalid));
    [p,n]=size(FF); 
    j=1:min(min(ngene,nfac),p); 
    FF=FF(j,:);
    %% p = number of metagenes/factors to be used in model
    %% n = number of validation samples
    [p,n]=size(FF); 
    %% add intercept (??)
    ln=1/sqrt(ntrain); FF=([ln*ones(1,nvalid);FF]); p=p+1;
    %% analogous to fit and pfit, Fit and Pfit from above
    pred=zeros(nvalid,1); ppred=[];
    Pred=zeros(nvalid,1); Ppred=zeros(nvalid,nit);
    for j=1:nit
        my=FF'*m(:,j);
        phi=pnorm(-my,0,1); 
        pr=1-phi; 	
        % use pr from training for correct mean centering of probabilities
        r=mean(Pfit(:,j)); r=r/(1-r); 
        pr=pr./(pr+(1-pr)*r); 
        pred=pred+my;
        ppred=[ppred, pr];
    end
    Pred=pred/nit; Ppred=ppred; 
    FF=FF(2:p,:); p=p-1;
    pred=Pred; ppred=mean(Ppred,2); 
    slp=prctile(Ppred',ci/2)'; sup=prctile(Ppred',100 - ci/2)';
%% place ivalid results in variables 
    [N,n]=size(x); F=F(1:p,:); 
    F2=zeros(p,n); F2(:,itrain2)=F; F2(:,ivalid)=FF; F=F2; clear FF F2;
    fit2=zeros(n,1); fit2(itrain2)=fit; fit2(ivalid)=pred; fit=fit2; clear pred fit2;
    pfit2=zeros(n,1); pfit2(itrain2)=pfit; pfit2(ivalid)=ppred; pfit=pfit2; clear ppred pfit2;
    sl2=zeros(n,1); sl2(itrain2)=sl; sl2(ivalid)=slp; sl=sl2; clear slp sl2;
    su2=zeros(n,1); su2(itrain2)=su; su2(ivalid)=sup; su=su2; clear sup su2;
 end;
 Silent.f1.use = 1;
 Silent.f1.gamma = gamma; Silent.f1.F = F; Silent.f1.itrain = itrain;
 Silent.f1.Z = Z; Silent.f1.nfac = nfac; Silent.f1.ntrain = ntrain;
 Silent.f2.use = 1;
 Silent.f2.gamma = gamma; Silent.f2.beta = beta; Silent.f2.beta(1) = []; 
 Silent.f2.ngene = ngene; Silent.f2.p = p;
 Silent.f3.use = 1;
 Silent.f3.fit = fit; Silent.f3.n = n; Silent.f3.ivalid = 1:n; Silent.f3.ivalid(itrain) = [];
 Silent.f3.Z =Z; Silent.f3.pfit = pfit; Silent.f3.sl = sl; Silent.f3.su = su;
 
 z=itrain;
% cvoutput = [z' Z(z)' pfit(z) sl(z) su(z) fit(z)]
 fprintf('\n Writing training results to file\n\n')
 fidfit = fopen('trainingcases.txt','w');
 for (zi = z)
   fprintf(fidfit,'%i\t%i\t%10.7f\t%10.7f\t%10.7f\t%10.7f\n', ...
         zi, Z(zi), pfit(zi), sl(zi), su(zi), fit(zi));
 end;

% fprintf(fidfit,'%i\t%i\t%10.7f\t%10.7f\t%10.7f\t%10.7f\n',cvoutput');
 fclose(fidfit);
 if (n>ntrain)
    fprintf(' Writing validation results to file\n\n')
    z=1:n; z(itrain)=[];
    Silent.f4.use = 1;
    Silent.f4.fit = fit; Silent.f4.ivalid = z; Silent.f4.Z = Z;
    Silent.f4.pfit = pfit; Silent.f4.sl = sl; Silent.f4.su = su;
%    cvoutput = [z' Z(z)' pfit(z) sl(z) su(z) fit(z)]
    fidcv = fopen('validationcases.txt','w');
%   fprintf(fidcv,'%i\t%i\t%10.7f\t%10.7f\t%10.7f\t%10.7f\n',cvoutput');
    for (zi = z)
      fprintf(fidcv,'%i\t%i\t%10.7f\t%10.7f\t%10.7f\t%10.7f\n', ...
              zi, Z(zi), pfit(zi), sl(zi), su(zi), fit(zi));
    end;
    fclose(fidcv);
 else
     Silent.f4.use = 0;
 end;


%%% Cross validation ...
 if (cvi==1) 
    fprintf(' Starting Cross Validation\n')
    alli = li; Fitcv=zeros(ntrain,1); Pfitcv=zeros(ntrain,nit);
    for cv=1:ntrain 
       fprintf('    Sample %d complete\n',cv)
       groupcv = group; groupcv(itrain(cv)) = 3;
       itraincv = find(groupcv==1); ntraincv=length(itraincv);   
    %% always use only training samples in gene selection
       z=Z(itraincv);  X=x(:,itraincv);  ind=select_genes_cor(X,z,ngene); 
       li_cv=ind(1:ngene);  alli=[alli;li_cv]; 
       if(version==1) 
          itrain2 = find(groupcv<3); ntrain2 = length(itrain2); 
          ifill = 1:ntrain2; ifill(groupcv(itrain2)==1) = []; nfill = length(ifill);
       end;
       if(version==2) 
          itrain2 = itraincv; ntrain2 = length(itrain2); 
          ifill = []; nfill = length(ifill);
       end;
 
       X=x(li_cv,itrain2)-repmat(mean(x(li_cv,itrain2),2),1,ntrain2); 
       [A,D,F]=svd_mw(X); D=D'; [p,n]=size(F); 
       j=1:min(min(ngene,nfac),p); A=A(:,j); D=D(j); F=F(j,:);
       [p,n]=size(F);ln=1/sqrt(n); F=([ln*ones(1,n);F]); 
       D=[sqrt(n);D]; p=p+1; 
       y=randn(n,1); gamma=zeros(p,1); tau2=ones(p,1);
       vi=tau2./(1+tau2);my=F'*gamma;m=[]; fit=zeros(n,1); pfit=[]; 
       data=reshape(Z(itrain2),n,1);  Fit=zeros(n,1); Pfit=zeros(n,nit);   

       for j=1:(nbi+nskip*nit)
          phi=pnorm(-my,0,1);  pr=1-phi; r=mean(pr); r=r/(1-r);  pr=pr./(pr+(1-pr)*r); 
          if (nfill>0)
            data(ifill)=rand(nfill,1)<pr(ifill); 
          end;
          y=my+(qnorm(data.*phi+rand(n,1).*(data+(1-2*data).*phi),0,1));  
          gamma=vi.*(F*y)+sqrt(vi).*randn(p,1); my=F'*gamma;
          tau2=1./gamrnd((k+1)/2,2./(k+gamma.^2),p,1); vi=tau2./(1+tau2); 
          if (j>nbi & floor((j-nbi)/nskip)==(j-nbi)/nskip)
	    fit=fit+my; pfit=[pfit, pr]; m=[m,gamma];
          end;
       end;

       [A,D,F]=svd_mw(X);ID=diag(1./D); Pfit=pfit; pfit=[]; fit=0;
       FF=ID*A'*(x(li_cv,itrain(cv))-repmat(mean(x(li_cv,itrain2),2),1,1));
       [p,n]=size(FF); j=1:min(min(ngene,nfac),p); FF=FF(j,:); [p,n]=size(FF); 
       ln=1/sqrt(size(X,2)); FF=([ln*ones(1,1);FF]); p=p+1;
       for j=1:nit
          my=FF'*m(:,j); phi=pnorm(-my,0,1);  pr=1-phi; 	
          r=mean(Pfit(:,j)); r=r/(1-r); pr=pr./(pr+(1-pr)*r); 
	  fit=fit+my; pfit=[pfit, pr]; 
       end;
       Fitcv(cv)=fit/nit; Pfitcv(cv,:) = pfit; 
    end;
    
    pfitcv=mean(Pfitcv,2);
    slcv=prctile(Pfitcv',ci/2)'; 
    sucv=prctile(Pfitcv',100 - ci/2)';

    Silent.f5.use = 1; Silent.f5.ci = ci;
    Silent.f5.Fitcv = Fitcv; Silent.f5.Pfitcv = Pfitcv;
    Silent.f5.itrain = itrain;Silent.f5.Z = Z;

 fprintf('\n Writing cross validation results to file\n\n')
 fidfit2 = fopen('crossvalidation.txt','w');
 for (zi = itrain)
   fprintf(fidfit2,'%i\t%i\t%10.7f\t%10.7f\t%10.7f\t%10.7f\n', ...
         zi, Z(zi), pfitcv(zi), slcv(zi), sucv(zi), Fitcv(zi));
 end;
 fclose(fidfit2);
 else
    Silent.f5.use = 0;
 end;
 save plotdata Silent;



















