function [beta,li,alli]=Multregsvd(x,y,ivalid,ngene,nfac,nmc,cvi)
%=====================================================================
%
% Fits full posterior in the linear regression model with Bayesian stochastic regularisation
% using iterative similation MCMC methods
%
% 
% INPUTS: 
% x is the expression data to be used -- genes are rows, arrays are columns
% y is response 
% ivalid is a vector of array numbers of validation cases
% ngene is the number of genes to be selected -- those most highly correlated with 
%   outcome on the training arrays
% nfac is the number of supergene factors to include, the rest are ignored
% nmc is a 3 vector: 
%    nit is the number of iterates to summarise the MCMC
%    nbi is the initial number to burn-in and discard
%    nskip is the number of skips between saves 
% cvi is 1 if you want '1 at a time' cross-validation analyses too, 0 if not
% 
% OUTPUTS: 
% beta is the estimated regression vector for genes
% li the top ngene genes selected in the training set analysis 
% alli the indices of all genes selected in the CV optimisations
%
 
 
% data &  parameters 
 [N,n]=size(x); nvalid=length(ivalid); itrain=1:n; itrain(ivalid)=[]; ntrain=length(itrain); 
 k=2; nit=nmc(1); nbi=nmc(2); nskip=nmc(3); 
 
% reduce to top most correlated ngene  ... 
 Y=y; Y(ivalid)=[]; X=x; X(:,ivalid)=[];  
 ind=select_genes_cor(X,Y,ngene); li=ind(1:ngene);  alli=[]; 
 
 'running optimised training set analysis '
 X=x(li,:)-repmat(mean(x(li,:),2),1,n); [A,D,F]=svd_mw(X);
      D=D'; [p,n]=size(F); 
      j=1:min(min(ngene,nfac),p); A=A(:,j); D=D(j); F=F(j,:); [p,n]=size(F); 
      ln=1/sqrt(n); F=([ln*ones(1,n);F]); D=[sqrt(n);D]; D2=D.*D; p=p+1; 
 
 gamma=zeros(p,1); tau2=ones(p,1); sigma2=var(y); sigma=sqrt(sigma2); 
 vi=tau2./(sigma2+tau2);  my=F'*gamma;
 m=[]; fit=[]; data=reshape(y,n,1); 
 
 for j=1:(nbi+nskip*nit)
     if (nvalid>0), data(ivalid)=my(ivalid)+sigma*randn(nvalid,1); end
     gamma=vi.*(F*data)+sqrt(vi.*sigma).*randn(p,1); my=F'*gamma;
     e=data-my; sigma2=1/gamrnd(n/2,2/(e'*e)); sigma=sqrt(sigma2); 
     tau2=1./gamrnd((k+1)/2,2./(k+gamma.^2),p,1); vi=tau2./(sigma2+tau2); 
     if (j>nbi & floor((j-nbi)/nskip)==(j-nbi)/nskip)
        fit=[fit,my]; m=[m,gamma]; 
     end;
     if (floor(j/1000)==j/1000) 
        j
     end;
 end;
 gamma=mean(m,2); gamma=gamma(2:p); beta=A*gamma; F=F(2:p,:); p=p-1; 
 
 
 figure(2); set(gca,'FontSize',14);
 subplot(2,1,1); 
 plot(gamma,'--bs','MarkerSize',6,'LineWidth',1.0); hold on; 
 title('Expression weights: Metagenes');  xlabel('Metagene'); ylabel('gamma')
 plot([0 p],[0 0],'black--','LineWidth',1); hold off
 subplot(2,1,2);
 plot(beta,'LineWidth',1.0); hold on; plot([0 ngene],[0 0],'black--','LineWidth',1.0); hold off
 title('Expression weights: Genes')
 xlabel('Gene');  ylabel('beta')
 hold off;  
 
 
 figure(4); clf; set(gca,'FontSize',14); 
 mfit=mean(fit')'; sl=prctile(fit',2.5)'; su=prctile(fit',97.5)'; 
 scatter(mfit,y,'k');  hold on 
 scatter(mfit(ivalid),y(ivalid),'r');  hold on 
 % a= [ min([min(mfit),min(y)]), max([max(mfit),max(y)])   ]; line(a,a); hold on
 scatter(mfit,mfit,'b*'); hold on;
 for j=itrain
   plot([mfit(j) mfit(j)],[sl(j),su(j)],'b:','LineWidth',2.0)
 end;
 for j=ivalid
   plot([mfit(j) mfit(j)],[sl(j),su(j)],'r:','LineWidth',2.0)
 end;
 ylabel('Outcome'); 
 xlabel('Predicted values'); 
 hold off;
 
 
li=reshape(li,ngene,1); 
 
if (cvi==1) 
 
%%% training cases ...
 
 [N,n]=size(x); fitcv=fit; 
 
 for ij=1:ntrain 
    ix=itrain(ij); 
    ['running analysis ',int2str(ix) ]
    % reduce to most correlated ngene ignoring the hold out and validation cases 
 
    iv=sort(unique([ix ivalid])); Y=y; Y(iv)=[]; X=x; X(:,iv)=[]; s=[];  
    ind=select_genes_cor(X,Y,ngene);  si=ind(1:ngene);  alli=unique([alli si]); 
    X=x(si,:)-repmat(mean(x(si,:),2),1,n); [A,D,F]=svd_mw(X);
        D=D'; [p,n]=size(F); 
        j=1:min(min(ngene,nfac),p); A=A(:,j); D=D(j); F=F(j,:); [p,n]=size(F); 
        F=([ln*ones(1,n);F]); D=[sqrt(n);D]; D2=D.*D; p=p+1; 
    gamma=zeros(p,1); tau2=ones(p,1); sigma2=var(y); sigma=sqrt(sigma2); 
    vi=tau2./(sigma2+tau2);  my=F'*gamma; fit=[]; 
    data=reshape(y,n,1); 
 
    for j=1:(nbi+nskip*nit)
       data(iv)=my(iv)+sigma*randn(nvalid+1,1); 
       gamma=vi.*(F*data)+sqrt(vi.*sigma).*randn(p,1);  my=F'*gamma; 
       tau2=1./gamrnd((k+1)/2,2./(k+gamma.^2),p,1); vi=tau2./(sigma2+tau2); 
       if (j>nbi & floor((j-nbi)/nskip)==(j-nbi)/nskip)
          fit=[fit, my(ix)]; 
       end;
       if (floor(j/1000)==j/1000) 
          j
       end;
    end;
    fitcv(ix,:)=fit;
 end;
 
 
 figure(5); clf; set(gca,'FontSize',14); 
 mfit=mean(fitcv')'; sl=prctile(fitcv',5)'; su=prctile(fitcv',95)'; 
 scatter(mfit(itrain),y(itrain),'k');  hold on 
% a=[min([y,mfit']), max([y,mfit'])]; line(a,a); hold on
 scatter(mfit(itrain),mfit(itrain),'b*'); hold on;
 for j=itrain
   plot([mfit(j) mfit(j)],[sl(j),su(j)],'b:','LineWidth',2.0)
 end;
 title('CV predicted outcomes')
 ylabel('Outcome'); 
 xlabel('Predicted values'); 
 hold off;
 
%
 
end;
 
alli=reshape(alli,length(alli),1); 
 
 