%Spherical mixture-Gaussian learner
%input: x, mu_initial, sigma_inital
%output: mu, sigma
function [w,mu,var1]=learnSMG(x,w,mu,var1);

var_floor=(min(max(x)-min(x))/100)^2;

maxIter=20;
LZERO=1e-50;
lpthresh=1e-3;
N=size(x,1);  %number of data points
D=size(x,2);  %Dimension
K=size(mu,1);  %K cluster
iter=0;

%compute var_ceil(K)
%Restrict var such that a cluster won't cross the center of its closest
%neighbor
for(k=1:K)
    dist=100000*ones(K,1);
    for(l=1:K)
        if(k==l)
        else
            delta=mu(k,:)-mu(l,:);
            dist(l)=sqrt(sum(delta.*delta));
        end
    end
    std_ceil=min(dist)/3;
    var_ceil(k)=std_ceil^2;
end

lpold=LZERO;
lpnew=lp_mg(x,w,mu,var1);
gamma=zeros(N,K);
while((abs(lpnew-lpold)>lpthresh) && (iter<maxIter))
    
    %compute gamma
    for(k=1:K)
        gamma(:,k)=w(k)*mvnpdf(x,mu(k,:),diag(var1(k,:),0));
    end
    gamma=gamma./repmat(sum(gamma,2),[1 K]);  %normalize to probabilities
    
    %update mean
    for(k=1:K)
        %mu(k,:)=sum(repmat(gamma(:,k),[1 D]).*x)/sum(gamma(:,k));
    end
    %update variance
    
    for(k=1:K)
        vtmp=zeros(D,D);
        for(l=1:N)
            delta=x(l,:)-mu(k,:);
            vtmp=vtmp+gamma(l,k)*delta'*delta;
        end
        %diagonal
        %var1(k,:)=diag(vtmp/)';
        %sperical
        denom=sum(gamma(:,k));
        if(denom>0)
            vs=max(diag(vtmp))/denom;
        end
        %apply floor and ceiling
        vs=min(vs,var_ceil(k));
        vs=max(vs,var_floor);
        var1(k,:)=vs*ones(1,D);
    end    
    %update weight
    s_gamma=sum(sum(gamma));
    for(k=1:K)
        %w(k)=sum(gamma(:,k))/s_gamma;
    end    
    
    lpold=lpnew;
    lpnew=lp_mg(x,w,mu,var1);
    lpdelta=lpnew-lpold;
    info=sprintf('iter=%d, lpdelta=%.6f',iter,lpdelta);
    %disp(info);
    iter=iter+1;
end

