/*  
 * 	lasso/lars.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:08 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2008  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <math.h>

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_mode.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_math.h>
#include <gdl/gdl_permutation.h>
#include <gdl/gdl_vector.h>
#include <gdl/gdl_matrix.h>
#include <gdl/gdl_blas.h>
#include <gdl/gdl_cblas.h>
#include <gdl/gdl_statistics_double.h>

#include <gdl/gdl_lasso.h>
#include <gdl/gdl_elastic_net.h>

gdl_enet_workspace *
gdl_enet_workspace_alloc (const size_t n, const size_t m)
{
	gdl_enet_workspace * w = gdl_lasso_workspace_alloc (n, m);
    // U and residuals are different from lasso
 	gdl_vector_free (w->residuals);
 	gdl_vector_free (w->U);
	w->residuals = gdl_vector_alloc (n + m);
	w->U         = gdl_vector_alloc (n + m);
 	return w;
}

void
gdl_enet_workspace_free (gdl_enet_workspace * w)
{
	gdl_lasso_workspace_free (w);
}

#define VECTOR_SET(X,i,y)(*((X)->data+(i)*(X)->stride)=(y))
#define VECTOR_GET(X,i)(*((X)->data+(i)*(X)->stride))
#define VECTOR_PTR(X,i)((X)->data+(i)*(X)->stride)
#define MATRIX_SET(X,i,j,y)(*((X)->data+((i)*(X)->tda+(j)))=(y))
#define MATRIX_GET(X,i,j)(*((X)->data+((i)*(X)->tda+(j))))

typedef struct
{
	gdl_matrix * matrix;
	size_t size;
	size_t rank;
} gdl_enet_R;

static gdl_enet_R * 
gdl_enet_R_alloc (const size_t n, const size_t m, const size_t r)
{
	gdl_enet_R * R;
	
	R = GDL_CALLOC (gdl_enet_R, 1);
	
	R->matrix = gdl_matrix_calloc (n, n);
	R->size   = n;
	R->matrix->size1 = R->matrix->size2 = m;
	R->rank   = r;
	
	return R;
}

static void
gdl_enet_R_free (gdl_enet_R * R)
{
	if (R)
	{
		R->matrix->size1 = R->matrix->size2 = R->size;
		gdl_matrix_free (R->matrix);
		GDL_FREE (R);
	}	
}
static void
gdl_enet_R_up (gdl_enet_R * R, const size_t r)
{
	(R->matrix->size1)++;
	(R->matrix->size2)++;
	R->rank = r;	
}

static void
gdl_enet_R_down (gdl_enet_R * R, const size_t r)
{
	(R->matrix->size1)--;
	(R->matrix->size2)--;
	R->rank = r;
}

static gdl_enet_R *
gdl_enet_updateR (gdl_matrix * X,
         gdl_enet_R * R,
         const size_t inew,
         const size_t * active,
         const size_t nactive,
         const double eps,
         const double lambda)
{
	size_t i,j,rank;
	double norm_xnew, xnew, rpp;
	double * dptr;
	gdl_vector * Xtx;
	
	gdl_vector_view xc = gdl_matrix_column (X, inew);
	gdl_blas_ddot (&(xc.vector), &(xc.vector), &xnew);
	xnew += lambda;
	xnew /= (1.0 + lambda);
	norm_xnew = sqrt (xnew);
	
	//printf ("updateR::norm_xnew = %g\n", norm_xnew);
	
	if (!R)
	{
		// m = X->size2 in any cases ;-)
        R = gdl_enet_R_alloc (X->size2,1,1);
        MATRIX_SET (R->matrix, 0, 0, norm_xnew);
        return R;
    }
    //printf ("nactive = %d\n", nactive);
    Xtx = gdl_vector_alloc (nactive); 
    gdl_vector_view xc1 = gdl_matrix_column (X, inew);
	for(j = 0; j < nactive; j++)
	{
		gdl_vector_view xc2 = gdl_matrix_column (X, active[j]);
		dptr = VECTOR_PTR(Xtx, j);
		gdl_blas_ddot (&(xc1.vector), &(xc2.vector), dptr);
		(*dptr) /= (1.+lambda);		
	}
    //r <- backsolve(R, Xtx, transpose=TRUE)
    gdl_blas_dtrsv (CblasUpper, CblasTrans, CblasNonUnit, R->matrix, Xtx);
//    printf("r (%d) = ", nactive);
//    for(j = 0; j < nactive; j++)
//    {
//    	printf(" %g", VECTOR_GET(Xtx, j));	
//    }
//    printf("\n");
    //  rpp <- norm.xnew^2 - sum(r^2)
    gdl_blas_ddot (Xtx, Xtx, &rpp);
    //printf("updateR::sum(r^2) = %g\n", rpp);
    rpp = norm_xnew*norm_xnew - rpp;
    //printf("updateR::rpp = %g\n", rpp);
    rank = R->rank;
   
    if (rpp <= eps)
    {
        rpp = eps;
    }
    else
    {
        rpp  = sqrt(rpp);
        rank = rank + 1;
    }
    
    gdl_enet_R_up (R, rank);
    for(i = 0; i < Xtx->size; i++)
    {
    	//printf ("xt %d %g\n", i, VECTOR_GET (Xtx, i));
    	MATRIX_SET(R->matrix, i, R->matrix->size1-1, VECTOR_GET (Xtx, i));
    }
    MATRIX_SET(R->matrix, i, R->matrix->size1-1, rpp);
    
    gdl_vector_free (Xtx);
    
    return (R);
}

static void
gdl_enet_delcol_internal (gdl_matrix * r,
		                   const size_t p,
		                   const size_t k)
//		                   gdl_matrix * z,
//		                   const size_t n, // p = r->size1
//		                   const size_t nz) // nz = 1
{
  size_t p1,i,j;
  double a,b,c,s,tau;
  
  p1 = p-1;
  i  = k;
  while (i < p1)
  {
      a = MATRIX_GET(r, i, i);
      b = MATRIX_GET(r, (i+1), i);
      if (b != 0.)
	  {
	  	  if (fabs(b) > fabs(a))
	  	  {
	  	  	 tau = -a/b;
	      	 s = 1.0/sqrt(1.0+tau*tau);
	      	 c = s * tau;
	  	  }
	  	  else
	  	  {
		      tau = -b/a;
		      c = 1.0/sqrt(1.0+tau*tau);
		      s = c * tau;
	  	  }
	      MATRIX_SET(r, i, i, c*a - s*b);
	      MATRIX_SET(r, (i+1), i, s*a + c*b);
	      j = i+1;
		  while (j < p1)
		  {
		  	 a = MATRIX_GET(r, i, j);
	     	 b = MATRIX_GET(r, (i+1), j);
	     	 MATRIX_SET(r, i, j, c*a - s*b);
	      	 MATRIX_SET(r, (i+1), j, s*a + c*b);
	      	 j = j+1;
		  }
//	      j = 0;
//	      while(j < nz)
//	      {
//		      a = MATRIX_GET(z, i, j);
//		      b = MATRIX_GET(z, (i+1), j);
//		      MATRIX_SET(z, i, j, c*a - s*b);
//		      MATRIX_SET(z, (i+1), j, s*a + c*b);
//		      j = j+1;
//	      }
	  }
  	  i = i+1;
  }
}
/**
 * r and z are modified by the routine 
 */
static void
gdl_enet_delcol (gdl_matrix * r, gdl_matrix * z, const size_t k)
{
	size_t i, j, p = r->size1;
	//  r <- r[, -k, drop = FALSE]
	for(j = k; j < r->size2-1; j++)
	{
		for(i = 0; i < r->size1; i++)
		{
			MATRIX_SET(r, i, j, MATRIX_GET(r, i, (j+1)));
		}
	}
	gdl_enet_delcol_internal (r, p, k);//, z, z->size1, z->size2);
//	printf("DELCOL(Z) = ");
//	for(i = 0; i < z->size1; i++)
//	{
//		printf(" %g",MATRIX_GET (z, i, 0));
//	}
//	printf("\nDELCOL(r, k = %d)\n", k);
//	for(i = 0; i < r->size1; i++)
//	{
//		for(j = 0; j < r->size2-1; j++)
//		{
//			printf (" %g", MATRIX_GET(r, i, j));
//		}
//		printf("\n");
//	}
//	printf("\n");
//	//(r->size2++);
}
/**
 * R is modified by the routine
 */
static gdl_enet_R * 
gdl_enet_R_downdate (gdl_enet_R * R, const size_t k)
{
    size_t p = R->matrix->size1;
    if (p == 1)
    {
        return 0;
    }    
    // remove line -p from R 
    //gdl_matrix * Z = gdl_matrix_alloc (p, 1);
    //gdl_matrix_set_all (Z, 1.0);
    gdl_enet_delcol (R->matrix, 0, k);
    gdl_enet_R_down (R, p-1);
    //gdl_matrix_free (Z);
    return R;
}

/**
 * gdl_enet_perform
 * 
 * Elastic Net method
 * 
 * References:
 *   
 *    Zou and Hastie (2005) "Regularization and Variable Selection via
 *    the Elastic Net", Journal of the Royal Statistical Society, Series
 *    B, 67, 301-320.
 * 
 * Arguments:
 * 
 *  X: matrix of predictors
 *
 *  y: response
 * 
 *  lambda: Quadratic penalty parameter. lambda=0 performs the Lasso fit.
 *
 *  center: If = gdl_true, each variable is centered to have 0 mean. Default is = gdl_true
 * 
 *  normalize: If = gdl_true, each variable is standardized to have unit L2 norm,
 *         otherwise it is left alone. Default is = gdl_true.
 *  
 *  eps: An effective zero
 *
 *  max_steps: Limit the number of steps taken; the default is '8 * min(m,
 *         n-intercept)', with m the number of variables, and n the
 *         number of samples. For 'algo="lar"' or 'algo="stepwise"', the
 *         maximum number of steps is 'min(m,n-intercept)'. For
 *         'algo="lasso"' and especially 'algo="forward.stagewise"',
 *         there can be many more terms, because although no more than
 *         'min(m,n-intercept)' variables can be active during any step,
 *         variables are frequently droppped and added as the algorithm
 *         proceeds. Although the default usually guarantees that the
 *         algorithm has proceeded to the saturated fit, users should
 *         check.
 *
 *  store_beta: If = gdl_true, store the estimate of the regression coefficients at each iteration
 *              For large data set, setting store_beta = gdl_false can help saving both memory and time
 * 
 *  logger: If not NULL, the function prints out its progress on the given stream
 *
 **/
int
gdl_enet_perform (gdl_lasso_workspace * work,
                  gdl_matrix * X,                   
                  gdl_vector * y,
                  const double lambda,
                  const gdl_boolean center,
                  const gdl_boolean normalize,
                  const double eps,
                  size_t max_steps,
                  const gdl_boolean store_beta,
                  FILE * logger)
{
	const size_t n = X->size1;
	const size_t m = X->size2;
    size_t i,j,ja,ji,aj,k,jj,inew,nnewx,anydrop=0,free_Gram=0;
    int * df, kdf, status = GDL_CONTINUE;
    size_t * newx, * newxi;
    double a, b, c, g, w, A, t, * normx, tmp, min, zmin, Cmax, ssy, gamhat, * dptr, * RSS;
    gdl_enet_R * R = 0;
   	gdl_matrix * beta_store = 0;
   	gdl_vector_view Xc;
	gdl_vector * C, * Cvec, * residuals, * Sign, * Gi1, * directions, * W, * U, * av, * beta, * beta_new, * beta_swap;
	gdl_lasso_model * model;
	
	/**********************************************************
	 * Check size
	 **********************************************************/
	if (y->size > work->N)
	{
		GDL_ERROR_VAL ("gdl_enet_perform [ ERROR ]\t[ LARS-EN workspace is too small ]\t[ EXIT]\n", GDL_FAILURE, GDL_FAILURE); 
	}
	if (X->size2 > work->M)
	{
		GDL_ERROR_VAL ("gdl_enet_perform [ ERROR ]\t[ LARS-EN workspace is too small ]\t[ EXIT]\n", GDL_FAILURE, GDL_FAILURE);
	}
	/**********************************************************
	 * Workspace allocation
	 **********************************************************/
	model     = work->model;
	newx      = work->newx;nnewx=0;
	normx     = work->normx;
	Sign      = work->Sign;work->Sign->size=X->size2;
	Cvec      = work->Cvec;work->Cvec->size=X->size2;
	C         = work->C;work->C->size=X->size2;
	W         = work->W;work->W->size=X->size2;
	Gi1       = work->Gi1;work->Gi1->size=X->size2;
	av        = work->av;work->av->size=X->size2;
	beta      = work->beta;work->beta->size=X->size2;
	beta_new  = work->beta_new;work->beta_new->size=X->size2;
	residuals = work->residuals;work->residuals->size=y->size+X->size2;
	U         = work->U;work->U->size=y->size+X->size2;
	df        = work->df;
	RSS       = work->RSS;
	gdl_lasso_model_reset (model);
	gdl_vector_set_zero (U);
	gdl_vector_set_zero (Sign);
	gdl_vector_set_zero (Cvec);
	gdl_vector_set_zero (C);
	gdl_vector_set_zero (W);
	gdl_vector_set_zero (Gi1);
	gdl_vector_set_zero (av);
	gdl_vector_set_zero (beta);
	gdl_vector_set_zero (beta_new);
	/**********************************************************
	 * PREPROCESSING
	 **********************************************************/
	if (center)
	{
		/******************************************************
		 * Center the predictors 
		 * X = X - mu(X)
		 ******************************************************/
		for(j = 0; j < m; j++)
		{
			Xc = gdl_matrix_column (X, j);
			work->mux[j] = gdl_stats_mean (Xc.vector.data, Xc.vector.stride, Xc.vector.size);
			gdl_vector_add_constant (&(Xc.vector), (-1.0)*work->mux[j]);
			//printf ("%d %g\n", j, mu);
		}
		/******************************************************
		 * Center the response
		 * y = y - mu(y)
		 ******************************************************/
		 work->muy = gdl_stats_mean (y->data, y->stride, y->size);
		 gdl_vector_add_constant (y, (-1.0)*work->muy);
	}
	if (normalize)
    {
    	//double sd;
    	/******************************************************
		 * normalize the predictors
		 ******************************************************/
		for(j = 0; j < m; j++)
		{
			Xc = gdl_matrix_column (X, j);
			gdl_blas_ddot (&(Xc.vector), &(Xc.vector), normx+j);
			normx[j]=sqrt(normx[j]);
			if (normx[j]/sqrt(Xc.vector.size) < eps)
			{
				gdl_lasso_ignore_predictor (model, j);
				if (logger)
				{
					fprintf (logger, "gdl_enet_perform [ Step 0 ] Variable %d with standard deviation = %g < eps = %g [ dropped for good ]\n", j, normx[j], eps);
					fflush (logger);
				}
			}
			else
			{
				gdl_lasso_inactive_predictor (model, j);
			}
			gdl_vector_scale (&(Xc.vector), 1.0/normx[j]);
		}
    }
    else
    {
    	gdl_lasso_inactive_all (model);
    }
   /*******************************************************
    * Compute some summary satistics
	******************************************************/
	double d1 = sqrt (lambda);
	double d2 = 1.0/sqrt(1.0 + lambda);
	size_t maxvars = (lambda == 0.0) ? GDL_MIN (m, n-1) : m;
	
	if (!max_steps)
    {
    	max_steps = 50 * maxvars;
    }
    /********************************************
     * Additional Workspace
     *********************************************/
    if (store_beta)
    {
    	if (work->beta_pure)
    	{
    	 	for(i = 0; i < work->max_steps; i++)
    	 		GDL_FREE (work->beta_pure[i]);	
    	 	GDL_FREE (work->beta_pure);
    	}
    	if (work->beta_pure_idx)
    		GDL_FREE (work->beta_pure_idx);
    	if (work->beta_pure_size)
    		GDL_FREE (work->beta_pure_size);
    	work->beta_pure      = GDL_MALLOC (double *, max_steps);
    	work->beta_pure_idx  = GDL_MALLOC (size_t *, max_steps);
    	work->beta_pure_size = GDL_MALLOC (size_t, max_steps);
    }
    
    work->max_steps = max_steps;
    
    if (work->df) 
    	GDL_FREE (work->df);
    work->df = df = GDL_CALLOC (int, max_steps);
	if (work->RSS)
		GDL_FREE (work->RSS);
	work->RSS = RSS = GDL_CALLOC (double, max_steps);
	if (work->L1norm) 
    	GDL_FREE (work->L1norm);
    work->L1norm = GDL_CALLOC (double, max_steps);
    if (work->penalty)
    	GDL_FREE (work->penalty);
    work->penalty =	GDL_CALLOC (double, max_steps+1);
    
    for(j = 0; j < m; j++)
	{
		Xc      = gdl_matrix_column (X, j);
		dptr    = VECTOR_PTR (Cvec, j);
		(*dptr) = 0;
		gdl_blas_ddot (&(Xc.vector), y, dptr);
		(*dptr) *= d2; 
		if (fabs(*dptr) > work->penalty[0]) 
			 work->penalty[0] = fabs(*dptr);
	}
	work->ssy=0;
	gdl_blas_ddot (y, y, &(work->ssy));
	// copy y into residuals
	gdl_vector_set_zero (residuals);
	residuals->size = y->size;
	gdl_vector_memcpy (residuals, y);
	residuals->size = y->size + X->size2;
	/********************************************
     * Initialization
     ******************************************/
    status = GDL_CONTINUE;
    k = 0;
    /********************************************
     * Iterations
     ******************************************/
    while(status == GDL_CONTINUE)
    {
//    	printf("********\n");
//    	printf("** Step %d\n", k);
//    	printf("********\n");
    	kdf = 0;
    	Cmax = 0.0;
    	for(j = 0; j < model->ninactive; j++)
    	{
    		t = VECTOR_GET (Cvec, model->inactives[j]);
    		VECTOR_SET (C, j, t);
    		t = fabs(t);
			if (t > Cmax)
			{
				Cmax = t;
			}
    	}
    	//printf ("Cmax = %g\n", Cmax);
    	C->size = j;
    	if (anydrop==0) //
        {
        	t = Cmax - eps;
        	for(j = nnewx = 0; j < C->size; j++)
        	{
        		if (fabs(VECTOR_GET (C, j)) >= t)
        		{
        			newx[nnewx++] = model->inactives[j];
        		}
        	}
            for(j = 0; j < nnewx; j++)
            {
            	R = gdl_enet_updateR (X, R, newx[j], model->actives, model->nactive, eps, lambda);
                if (R->rank == model->nactive)
                {
                	// nR = seq(length(active))
                	// R <- R[nR, nR, drop = FALSE]
                	// come back to previous R
                	gdl_enet_R_down (R, model->nactive);
                	// ignore this variable
                	gdl_lasso_ignore_predictor (model, newx[j]);
                	//_add_to_ordered_array (ignores, &nignore, newx[i]);
                	if (logger)
                	{
                       fprintf (logger, "gdl_enet_perform [ Step %d ]\t[ Variable %d is collinear ]\t[ DROPPED ]\n", k, newx[j]);
                	}
                	kdf--;
                }
                else
                {
                  	VECTOR_SET(Sign, model->nactive, GDL_SIGN(VECTOR_GET(Cvec, newx[j])));
                  	Sign->size = model->nactive+1;
                  	// active this variable
                  	gdl_lasso_active_predictor (model, newx[j]);
                  	if (logger)
                    {
                    	fprintf (logger, "gdl_enet_perform [ Step %d ]\t[ Variable %d ]\t[ ADDED ]\n", k, newx[j]);
                    }
                    kdf++;
                    if (model->nactive >= GDL_MIN(m - model->nignore, n - center))
    	 				break;    
                }
            }
        }
        else 
        {
        	kdf=-anydrop;
        	anydrop=0;
        }
        Gi1->size = Sign->size;
        gdl_vector_memcpy (Gi1, Sign);
        gdl_blas_dtrsv (CblasUpper, CblasTrans, CblasNonUnit, R->matrix, Gi1);
        gdl_blas_dtrsv (CblasUpper, CblasNoTrans, CblasNonUnit, R->matrix, Gi1);
        gdl_blas_ddot (Gi1, Sign, &A);
        A = 1.0/sqrt(A);
        W->size = Gi1->size;
        gdl_vector_memcpy (W, Gi1);
        gdl_vector_scale (W, A);
        //printf ("u1 ");
        for(i = 0; i < n; i++)
    	{
    		dptr = VECTOR_PTR (U, i);
    		(*dptr) = 0.;
        	for(j = 0; j < model->nactive; j++)
        	{
    			(*dptr) += MATRIX_GET (X, i, model->actives[j]) * VECTOR_GET (W, j) * d2;
    		}
    		//if (i < 10) printf (" %g", *dptr);
    	}
    	//printf ("\n");
    	//printf ("\nu2 ");
    	for(j = 0; j < model->nactive; j++)
        {
    		dptr    = VECTOR_PTR (U, n+model->actives[j]);
    		(*dptr) = d1 * d2 * VECTOR_GET (W, j);
    	    //if (j < 10) printf (" %g", *dptr);     	
    	}
    	//printf ("\n");
    	if (lambda > 0)
    	{
    		maxvars = m - model->nignore;	
    	}
    	if (lambda == 0)
    	{
    		maxvars = GDL_MIN (m - model->nignore, n - 1);	
    	} 
    	//printf (">>>>> %d >= %d ??\n", model->nactive, maxvars);
    	if (model->nactive >= maxvars)
        {
        	gamhat = Cmax/A;
        }
        else
        {
        	//printf ("a = ");
            for(j = 0; j < model->ninactive; j++)
            {
            	dptr    = VECTOR_PTR (av, j);
                (*dptr) = 0;
                for(i = 0; i < n; i++)
                {
                	(*dptr) += VECTOR_GET (U, i) * MATRIX_GET (X, i, model->inactives[j]);  
                }
                (*dptr) += d1 * VECTOR_GET (U, n + model->inactives[j]);
                (*dptr) *= d2;
                //if (j < 10) printf(" %g", *dptr);
            }
            //printf ("\n");
            av->size = model->ninactive;
            //printf ("Cmax = %g\nA = %g min = %g\n", Cmax, A, Cmax/A);
            min = Cmax/A;
            for(j = 0; j < model->ninactive; j++)
            {
	        	c = VECTOR_GET (Cvec, model->inactives[j]);
	        	a = VECTOR_GET (av, j);
	        	//printf (" (%g, %g)", c, a);
	            t = (Cmax - c)/(A - a);
	            if (t > eps && t < min)
	            {
	            	min = t;
	            }
	            t = (Cmax + c)/(A + a);
	            if (t > eps && t < min)
	            {
	            	min = t;
	            }
            }
            //printf ("\nMIN %g\n", min);
            gamhat = min;
        }
        zmin = gamhat;
        for(i = 0; i < model->nactive; i++)
        {
        	b = VECTOR_GET (beta, model->actives[i]);
        	w = VECTOR_GET (W, i);
        	t = (-1.0)*(b/w);
        	if (t > eps && t < gamhat)
        	{
        		zmin = t;
        	}
        }
        if (zmin < gamhat)
        {
        	gamhat = zmin;
        	for(i = 0; i < model->nactive; i++)
        	{
        		b = VECTOR_GET (beta, model->actives[i]);
        		w = VECTOR_GET (W, i);
        		t = (-1.0)*(b/w);
        		if (t == zmin)
        		{
        			gdl_lasso_drop_cache_predictor (model, model->actives[i]);
        		} 	
        	}
        }
        //printf("gamhat = %g\n", gamhat);
        for(i = 0; i < model->nactive; i++)
        {
        	b = VECTOR_GET (beta, model->actives[i]);
        	w = VECTOR_GET (W, i);
        	//printf (" %g (%d)", b  + gamhat * w, model->actives[i]);
        	VECTOR_SET(beta_new, model->actives[i], (b  + gamhat * w));
        	work->L1norm[k] += fabs(VECTOR_GET(beta_new, model->actives[i]));
        }
        work->L1norm[k]   /= d2;
        work->penalty[k+1] = work->penalty[k] - fabs(gamhat * A);
        //printf("residuals ");
        for(i = 0; i < n; i++)
    	{
    		t = VECTOR_GET (residuals, i);
    		t -= gamhat * VECTOR_GET (U, i);
    		VECTOR_SET (residuals, i, t);
    		//if (i < 10) printf (" %g", t);
    		//if (i < 10) printf(" %g", VECTOR_GET (residuals, i));
    	}
    	//printf("\n");
    	for(j = 0; j < m; j++)
    	{
    		t = VECTOR_GET (residuals, n + j);
    		t -= gamhat * VECTOR_GET (U, n + j);
    		VECTOR_SET (residuals, n + j, t);
    	}
    	//printf("Cvec ");
    	for(i = 0; i < m; i++)
		{
			//Xc = gdl_matrix_column (X, i);
			dptr = gdl_vector_ptr (Cvec, i);
			//gdl_blas_ddot (&(Xc.vector), residuals, dptr);
			t = 0;
			for(j = 0; j < n; j++)
			{
				t += MATRIX_GET (X, j, i)*VECTOR_GET (residuals, j);
			}
			//printf ("left term = %g\n", t);
		    t += d1 * VECTOR_GET (residuals, n + i);
		    t *= d2;
		    *dptr = t;
			//printf(" %g", *dptr);
		}
		//printf("\n");
		anydrop=0;
        for(i = 0; i < model->ndrop; i++)
        {
        	if (logger)
        	{
        		fprintf(logger, "gdl_enet_perform [ LARS-EN Step %d ]\t[ Variable %d ]\t[ DROPPED ]\n",  k+1, model->actives[model->drops[i]]);
        		fflush (logger);	
        	}
        	
        	R = gdl_enet_R_downdate (R, model->drops[i]);
        	
        	VECTOR_SET (beta, model->actives[model->drops[i]], 0.0);
        	// remove them from Sign
			for(ji = model->drops[i]; ji < model->nactive-1; ji++)
			{
				VECTOR_SET (Sign, ji, VECTOR_GET (Sign, ji+1));
			}
			(Sign->size)--;
        	anydrop++;
        }
        gdl_lasso_drop_apply_predictor (model);            	           
        /***********************************************
         * TERMINATION
         ***********************************************/
         // RSS
         for(i = 0; i < n; i++)
    	 {
    	 	t = 0;
    	 	for(j = 0; j < model->nactive; j++)
    	 	{
    	 		 t += MATRIX_GET (X, i, model->actives[j]) * VECTOR_GET (beta_new, model->actives[j]) / d2; 
    	 	}
    	 	t = VECTOR_GET (y, i) - t;
    	 	RSS[k] += t * t;
    	 }
    	 // DF
    	 df[k] = model->nactive;
    	 // Swap beta and beta_new
    	 if (store_beta)
    	 {
    	 	 work->beta_pure_size[k] = model->nactive;
    	 	 work->beta_pure[k]      = GDL_MALLOC (double, model->nactive);
    	 	 work->beta_pure_idx[k]  = GDL_MALLOC (size_t, model->nactive);
    	 	 for(i = 0; i < model->nactive; i++)
    	 	 {
    	 	 	work->beta_pure[k][i]     = VECTOR_GET (beta_new, model->actives[i]) / (normx[model->actives[i]] * d2);
    	 	 	work->beta_pure_idx[k][i] = model->actives[i];
    	 	 }    	 	  
    	 }
    	 beta_swap = beta;
    	 beta      = beta_new;
    	 beta_new  = beta_swap;
    	 /***********************************************
    	 * Stopping rules
    	 ***********************************************/
    	 if (k == max_steps-1)
    	 	break;
    	 if (model->nactive >= maxvars)
    	 	break;
    	 k = k + 1;
    }
    if (logger)
    {
       fprintf(logger, "gdl_enet_perform [ LARS-EN Termination ]\t[ Computing residuals, RSS etc...]\t[ wait... ]\n");
       fflush(logger);
    }
    work->max_steps = k+1;
    for(i = 0; i < m; i++)
    {
    	VECTOR_SET (beta, i, VECTOR_GET (beta, i) / (normx[i] * d2));
    	//if (VECTOR_GET (beta, i)!=0) printf ("%d %g\n", i, VECTOR_GET (beta, i));
    }
    for(i = 0; i <= k; i++) df[i] = df[i] + center;
    if (RSS[k] > work->ssy || RSS[k] < eps || df[k] == 0)
    	work->sigma2 = GDL_NAN;
    else
    	work->sigma2 = RSS[k]/(n-df[k]);
//    printf ("sigma2 = %g (%d)\n", work->sigma2, model->nactive);
	if (logger)
	{
//    for(i = 0; i <= k; i++)
//    {
//    	fprintf(logger, "%d rss = %g r2 = %g df = %d Cp = %g\n", i, RSS[i], 1.0-RSS[i]/work->ssy, df[i], (RSS[i]/work->sigma2) - n + 2.0*df[i]);
//    }
	}
    /**
     * Clean workspace
     */
    gdl_enet_R_free (R);
    if (logger)
    {
       fprintf(logger, "\b\b\b\b\b\b\b\b\bDONE ]\n");
       fflush(logger);
    }
    return GDL_SUCCESS;
}

#undef VECTOR_SET
#undef VECTOR_PTR
#undef VECTOR_GET
#undef MATRIX_SET
#undef MATRIX_GET
