%    SVD followed by fuzzy K-means clustering
%    Copyright (C) 2007 Ken Chen, William A. McLaughlin, and Wei Wang 
 
%    This program is free software: you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published by
%    the Free Software Foundation, either version 3 of the License, or
%    (at your option) any later version.

%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.

%    You should have received a copy of the GNU General Public License
%    along with this program.  If not, see <http://www.gnu.org/licenses/>.

ldim=1;
k_opt=2;
mu_old=[];
var_old=[];
centerclip=.45;
mu_d={}; var_d={};

for(dim=2:300)
    %ldim=max(2,dim-2);
    info=sprintf('%d-%d\n',ldim,dim);
    disp(info);
    x=U(:,ldim:dim);
    xw=x.*repmat(SD(ldim:dim),[N_Proteins 1]);
    mag_Gene=sum(xw.*xw,2);
    Genevec=mag_Gene>std(mag_Gene)*centerclip;    
    oc=find(Genevec>0);
    xc=x(oc,:);
    %N_trials=round(dim*0.1)+20;
    N_trials=10;
    
    D=size(xc,2);
    N=size(xc,1);
    
    %K-mean clustering; find optimal number of clusters;
    mu_k={};CH_k=[];
    ik=0;
    k_low=max(k_opt-2,2);
    for(k=k_low:k_opt+5)  %possible number of clusters
        CHtrial=0;
        for(trial=1:N_trials)  %trials search for max CH index
            rand('state',sum(100*clock));
            mu=(rand(k,D)-0.5*ones(k,D)).*repmat(std(x)*.5,[k 1]);

            %initialize mu with mu_old
            if(size(mu_old,1)>0)
                %[o,p]=sort(var_old(:,1));
                %smaller variance clusters are more trustable?
                %mu_old=mu_old(p);
                mu(1:min(k,size(mu_old,1)),1:size(mu_old,2))=mu_old(1:min(k,size(mu_old,1)),:);
            end
            
            [mu, CHidx]=learnKmean(xc,mu,5);
            if(CHidx>CHtrial)
                mu_M=mu;
                CHtrial=CHidx;
            end
        end
        ik=ik+1;
        mu_k{ik}=mu_M; CH_k=[CH_k CHtrial]; 
    end
    
    [v,p]=max(CH_k);
    mu=mu_k{p};
    k_opt=size(mu,1);
    
    %spherical MG learning: only update var
    w=ones(k_opt,1)/k_opt;
    var1=ones(k_opt,D)*.01;  
    [w,mu,var1]=learnSMG(xc,w,mu,var1);
    %buffer centroid
    mu_d{dim}=mu;
    mu_old=mu;
    var_d{dim}=var1;   
    var_old=var1;
    
    %label Proteins
    Genelabels=zeros(N_Proteins,k_opt);
    dist=zeros(N,k_opt);
    for(k=1:k_opt)
        delta=xc-repmat(mu(k,:),[N 1]);
        dist(:,k)=sqrt(sum(delta.*delta,2));
        o=find(dist(:,k)<sqrt(var1(k,1))*3);
        Genelabels(oc(o),k)=1;
    end
    
    %label Domains
    y=V(:,ldim:dim);
    mag_TF=diag(y*y');  
    
    TFlabels=zeros(N_Domains,k_opt);
    for(l=1:N_Domains)
        for(k=1:k_opt)
            op=find(Genelabels(:,k)>0);
            TFlabels(l,k)=sum(A(op,l));   
        end
    end

    %output
    finame=strcat('out',int2str(ldim));
    if(dim>99)
        iname=int2str(dim);   
    elseif(dim>9)
        iname=strcat('0',int2str(dim));
    else
        iname=strcat('00',int2str(dim));
    end
    fgname=strcat('Protein_',finame,'-',iname);
    ftname=strcat('Domain_',finame,'-',iname);

    for(k=1:k_opt)
        fid=fopen(strcat(fgname,'.',int2str(k)),'wt');
        op=find(Genelabels(:,k)>0);
        [vs,ps]=sort(mag_Gene(op));
        for(l=length(ps):-1:1)
            fprintf(fid,'%d\t%.6f\n',op(ps(l)),vs(l));    
        end
        fclose(fid);
        fid=fopen(strcat(ftname,'.',int2str(k)),'wt');
        op=find(TFlabels(:,k)>0);
        [vs,ps]=sort(TFlabels(op,k));
        for(l=length(ps):-1:1)
            fprintf(fid,'%d\t%d\t%.6f\n',op(ps(l)),vs(l),mag_TF(op(ps(l))));    
        end       
        fclose(fid);
        save('currentrun');
    end
    %break;
end
    
    
