/*  
 * 	lasso/lars.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:08 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 * 
 *  Copyright (C) 2003-2008  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <math.h>

#include <gdl/gdl_common.h>
#include <gdl/gdl_errno.h>
#include <gdl/gdl_mode.h>
#include <gdl/gdl_string.h>
#include <gdl/gdl_math.h>
#include <gdl/gdl_permutation.h>
#include <gdl/gdl_vector.h>
#include <gdl/gdl_matrix.h>
#include <gdl/gdl_blas.h>
#include <gdl/gdl_cblas.h>
#include <gdl/gdl_statistics_double.h>

#include <gdl/gdl_lasso.h>

#define VECTOR_SET(X,i,y)(*((X)->data+(i)*(X)->stride)=(y))
#define VECTOR_GET(X,i)(*((X)->data+(i)*(X)->stride))
#define VECTOR_PTR(X,i)((X)->data+(i)*(X)->stride)
#define MATRIX_SET(X,i,j,y)(*((X)->data+((i)*(X)->tda+(j)))=(y))
#define MATRIX_GET(X,i,j)(*((X)->data+((i)*(X)->tda+(j))))

typedef struct
{
	gdl_matrix * matrix;
	size_t size;
	size_t rank;
} gdl_lasso_R;

static gdl_lasso_R * 
gdl_lasso_R_alloc (const size_t n, const size_t m, const size_t r)
{
	gdl_lasso_R * R;
	
	R = GDL_CALLOC (gdl_lasso_R, 1);
	
	R->matrix = gdl_matrix_calloc (n, n);
	R->size   = n;
	R->matrix->size1 = R->matrix->size2 = m;
	R->rank   = r;
	
	return R;
}

static void
gdl_lasso_R_free (gdl_lasso_R * R)
{
	if (R)
	{
		R->matrix->size1 = R->matrix->size2 = R->size;
		gdl_matrix_free (R->matrix);
		GDL_FREE (R);
	}	
}

static gdl_lasso_R * 
gdl_lasso_R_clone (const gdl_lasso_R * R)
{
	gdl_lasso_R * Rx;
	
	Rx = GDL_CALLOC (gdl_lasso_R, 1);
	Rx->size = R->size;
	Rx->matrix = gdl_matrix_calloc (R->size, R->size);
	gdl_matrix_memcpy (Rx->matrix, R->matrix);
	Rx->matrix->size1  = Rx->matrix->size2 = R->matrix->size1;
	Rx->rank   = R->rank;
	
	return Rx;
}

static void
gdl_lasso_R_up (gdl_lasso_R * R, const size_t r)
{
	(R->matrix->size1)++;
	(R->matrix->size2)++;
	R->rank = r;	
}

static void
gdl_lasso_R_down (gdl_lasso_R * R, const size_t r)
{
	(R->matrix->size1)--;
	(R->matrix->size2)--;
	R->rank = r;	
}

static gdl_lasso_R *
gdl_lasso_updateR (gdl_matrix * X,
         gdl_lasso_R * R,
         const size_t inew,
         const size_t * active,
         const size_t nactive,
         const double eps,
         const gdl_boolean Gram)
{
	size_t i,j,rank;
	double norm_xnew, xnew, rpp;
	double * dptr;
	gdl_vector * Xtx;
	
	if (Gram)
	{
		xnew = MATRIX_GET (X, inew, inew);
	}
	else
	{
		gdl_vector_view xc = gdl_matrix_column (X, inew);
		gdl_blas_ddot (&(xc.vector), &(xc.vector), &xnew);
	}
	
	norm_xnew = sqrt (xnew);
	
	//printf ("updateR::norm_xnew = %g\n", norm_xnew);
	
	if (!R)
	{
		// m = X->size2 in any cases ;-)
        R = gdl_lasso_R_alloc (X->size2,1,1);
        MATRIX_SET (R->matrix, 0, 0, norm_xnew);
        return R;
    }
    /**
     * Xtx <- if (Gram)
     *   xold
     * else drop(t(xnew) %*% xold)
     */ 
    //printf ("nactive = %d\n", nactive);
    Xtx = gdl_vector_alloc (nactive); 
    if (Gram)
    {
    	for(j = 0; j < nactive; j++)
    	{
    		VECTOR_SET (Xtx, j, MATRIX_GET (X, inew, active[j]));
    	}	
    }
    else
    {
    	gdl_vector_view xc1 = gdl_matrix_column (X, inew);
    	for(j = 0; j < nactive; j++)
    	{
    		gdl_vector_view xc2 = gdl_matrix_column (X, active[j]);
    		dptr = VECTOR_PTR(Xtx, j);
    		gdl_blas_ddot (&(xc1.vector), &(xc2.vector), dptr);
    		
    	}
    }
    //r <- backsolve(R, Xtx, transpose=TRUE)
    gdl_blas_dtrsv (CblasUpper, CblasTrans, CblasNonUnit, R->matrix, Xtx);
//    printf("r (%d) = ", nactive);
//    for(j = 0; j < nactive; j++)
//    {
//    	printf(" %g", VECTOR_GET(Xtx, j));	
//    }
//    printf("\n");
    //  rpp <- norm.xnew^2 - sum(r^2)
    gdl_blas_ddot (Xtx, Xtx, &rpp);
    //printf("updateR::sum(r^2) = %g\n", rpp);
    rpp = norm_xnew*norm_xnew - rpp;
    //printf("updateR::rpp = %g\n", rpp);
    rank = R->rank;
   
    if (rpp <= eps)
    {
        rpp = eps;
    }
    else
    {
        rpp  = sqrt(rpp);
        rank = rank + 1;
    }
    
    gdl_lasso_R_up (R, rank);
    for(i = 0; i < Xtx->size; i++)
    {
    	//printf ("xt %d %g\n", i, VECTOR_GET (Xtx, i));
    	MATRIX_SET(R->matrix, i, R->matrix->size1-1, VECTOR_GET (Xtx, i));
    }
    MATRIX_SET(R->matrix, i, R->matrix->size1-1, rpp);
    
    gdl_vector_free (Xtx);
    
    return (R);
}

static void
gdl_lasso_delcol_internal (gdl_matrix * r,
		                   const size_t p,
		                   const size_t k)
//		                   gdl_matrix * z,
//		                   const size_t n, // p = r->size1
//		                   const size_t nz) // nz = 1
{
  size_t p1,i,j;
  double a,b,c,s,tau;
  
  p1 = p-1;
  i  = k;
  while (i < p1)
  {
      a = MATRIX_GET(r, i, i);
      b = MATRIX_GET(r, (i+1), i);
      if (b != 0.)
	  {
	  	  if (fabs(b) > fabs(a))
	  	  {
	  	  	 tau = -a/b;
	      	 s = 1.0/sqrt(1.0+tau*tau);
	      	 c = s * tau;
	  	  }
	  	  else
	  	  {
		      tau = -b/a;
		      c = 1.0/sqrt(1.0+tau*tau);
		      s = c * tau;
	  	  }
	      MATRIX_SET(r, i, i, c*a - s*b);
	      MATRIX_SET(r, (i+1), i, s*a + c*b);
	      j = i+1;
		  while (j < p1)
		  {
		  	 a = MATRIX_GET(r, i, j);
	     	 b = MATRIX_GET(r, (i+1), j);
	     	 MATRIX_SET(r, i, j, c*a - s*b);
	      	 MATRIX_SET(r, (i+1), j, s*a + c*b);
	      	 j = j+1;
		  }
//	      j = 0;
//	      while(j < nz)
//	      {
//		      a = MATRIX_GET(z, i, j);
//		      b = MATRIX_GET(z, (i+1), j);
//		      MATRIX_SET(z, i, j, c*a - s*b);
//		      MATRIX_SET(z, (i+1), j, s*a + c*b);
//		      j = j+1;
//	      }
	  }
  	  i = i+1;
  }
}
/**
 * r and z are modified by the routine 
 */
static void
gdl_lasso_delcol (gdl_matrix * r, gdl_matrix * z, const size_t k)
{
	size_t i, j, p = r->size1;
	//  r <- r[, -k, drop = FALSE]
	for(j = k; j < r->size2-1; j++)
	{
		for(i = 0; i < r->size1; i++)
		{
			MATRIX_SET(r, i, j, MATRIX_GET(r, i, (j+1)));
		}
	}
	gdl_lasso_delcol_internal (r, p, k);//, z, z->size1, z->size2);
//	printf("DELCOL(Z) = ");
//	for(i = 0; i < z->size1; i++)
//	{
//		printf(" %g",MATRIX_GET (z, i, 0));
//	}
//	printf("\nDELCOL(r, k = %d)\n", k);
//	for(i = 0; i < r->size1; i++)
//	{
//		for(j = 0; j < r->size2-1; j++)
//		{
//			printf (" %g", MATRIX_GET(r, i, j));
//		}
//		printf("\n");
//	}
//	printf("\n");
//	//(r->size2++);
}
/**
 * R is modified by the routine
 */
static gdl_lasso_R * 
gdl_lasso_R_downdate (gdl_lasso_R * R, const size_t k)
{
    size_t p = R->matrix->size1;
    if (p == 1)
    {
        return 0;
    }    
    // remove line -p from R 
    //gdl_matrix * Z = gdl_matrix_alloc (p, 1);
    //gdl_matrix_set_all (Z, 1.0);
    gdl_lasso_delcol (R->matrix, 0, k);
    gdl_lasso_R_down (R, p-1);
    //gdl_matrix_free (Z);
    return R;
}
/**
 * R is not modified by the routine
 */
static gdl_lasso_R * 
gdl_lasso_R_downdate_const (const gdl_lasso_R * R, const size_t k)
{
	gdl_lasso_R * Rx;
	
	Rx = gdl_lasso_R_clone (R); 
	
    Rx = gdl_lasso_R_downdate (Rx, k);
    
    return Rx;
}

static gdl_boolean
gdl_lasso_all_positive (const gdl_vector * beta, size_t positive[], size_t npositive)
{
	size_t i;
	// stopping rule
    for(i = 0; i < npositive; i++)
    {
    	if (VECTOR_GET(beta, positive[i])<=0.)
    		break;
    }
    return (i < npositive) ? gdl_false : gdl_true;	
}

static gdl_boolean
gdl_lasso_all_negzero (const gdl_vector * beta, size_t positive[], size_t npositive)
{
	size_t i;
	// stopping rule
    for(i = 0; i < npositive; i++)
    {
    	if (VECTOR_GET(beta, positive[i])>0.)
    		break;
    }
    return (i < npositive) ? gdl_false : gdl_true;
}

//// nnls.lars
////function (active, Sign, R, beta, Gram, eps = 1e-10, trace = FALSE,
////    use.Gram = TRUE)
//static int
//gdl_lasso_nnls_lars (size_t active[],
//                     size_t *nactive,
//                     gdl_vector  * Sign,
//                     gdl_lasso_R * R,
//                     gdl_vector  * beta,
//                     gdl_matrix  * X,
//                     const double eps,
//                     const gdl_boolean use_Gram,
//                     FILE * logger)
//{
//	const size_t M = *nactive;
//	int i,j,m;
//	gdl_matrix * GX;
//	
//	if (use_Gram)
//    {
//       GX = gdl_matrix_alloc (M, M);
//       for(i = 0; i < M; i++)
//       {
//          for(j = 0; j < M; j++)
//          {
//          	  MATRIX_SET (GX, i, j, MATRIX_GET (X, active[i], active[j]));
//          }
//       }
//    }
//    else
//    {
//       GX = gdl_matrix_alloc (X->size1, M);
//       for(i = 0; i < X->size1; i++)
//       {
//          for(j = 0; j < M; j++)
//          {
//          	  MATRIX_SET (GX, i, j, MATRIX_GET (X, i, active[j]));
//          }
//       }
//    }
//    double b;
//    gdl_vector * Sign_old, * Sign_tmp, * beta_tmp;
//    gdl_lasso_R * R_old, * R_tmp;
//    size_t npos, * positive, * im, * zero = 0, * zero_old;
//    
//    Sign_old = gdl_vector_alloc (Sign->size);
//    m = M-1;
//    while (m > 0)
//    {
//    	gdl_lasso_R_downdate_const (R_old, R, m);
//        gdl_vector_memcpy (Sign_old, Sign);
//        Sign_old->size = Sign->size = m - 1;
//        //beta0 <- backsolve(R.old, backsolvet(R.old, Sign[-zero.old])) * Sign[-zero.old])
//        gdl_blas_dtrsv (CblasUpper, CblasTrans, CblasNonUnit, R_old->matrix, Sign_old);
//        gdl_blas_dtrsv (CblasUpper, CblasNoTrans, CblasNonUnit, R_old->matrix, Sign_old);
//        gdl_vector_mul (Sign_old, Sign);
//        //beta.old <- c(beta0, rep(0, length(zero.old)))
//        for(i = npos = 0; i < m; i++)
//        {
//        	b = VECTOR_GET (Sign_old, i);
//        	VECTOR_SET (beta_old, i, b);
//        	if (b > 0) npos++;
//        }
//        if (npos == m)
//        {
//        	break;
//        }
//        //for(; i < M; i++) VECTOR_SET (beta_old, 0);
//        m = m - 1;
//        //zero <- zero.old
//        //positive <- im[-zero]
//        // swap R and R_old
//        R_tmp = R;
//        R = R_old;
//        R_old = tmp;
//        // swap beta and beta_old
//        beta_tmp = beta;
//        beta     = beta_old;
//        beta_old = beta_tmp;
//    }
//	for(i = m+1; i < M; i++)
//    {
//    	VECTOR_SET (beta, 0);
//    }
//    npositive = m;
//    positive  = GDL_CALLOC (gdl_boolean, m);
//    for(i = 0; i <= m; i++)
//    {
//    	positive[i] = i;
//    }
//    Sign->size = M;
//    while (0==0)
//    {
//    	while (!gdl_lasso_all_positive(beta, positive, npositive))
//        {
//            //alpha0 <- beta.old/(beta.old - beta)
//            //alpha <- min(alpha0[positive][(beta <= 0)[positive]])
//            alpha = GDL_POSINF;
//            for(i = 0; i < npositive; i++)
//            {
//            	bo = VECTOR_GET (beta_old, positive[i]);
//            	b  = VECTOR_GET (beta, positive[i]);
//            	bo = bo/(bo-b);
//            	VECTOR_SET (alpha0, positive[i], bo);
//            	if (b <= 0 && bo < alpha)
//            	{
//            		alpha = bo;
//            	}
//            }
//            //beta.old <- beta.old + alpha * (beta - beta.old)
//            for(i = npositive-1; i >= 0; i--)
//            {
//            	bo = VECTOR_GET (beta_old, positive[i]);
//            	b  = VECTOR_GET (beta, positive[i]);
//            	bo = bo + alpha * (b - bo);
//            	VECTOR_SET (beta_old, positive[i], bo);
//            	a0 = VECTOR_GET (alpha0, positive[i]); 
//            	//dropouts <- match(alpha, alpha0[positive], 0)
//            	if (a0 == alpha)
//            	{
//            		//for (i in rev(dropouts))
//		            //{
//		            //	R <- downdateR(R, i)
//		            //}
//            		gdl_lasso_R_downdate (R, positive[i]);
//            		positive[i] = -1;
//            	}
//            }
//            for(i = 0; i < npositive; i++)
//            {
//            	if (positive[i]==-1)
//            	{
//            		for(ip = i; ip < npositive-1; ip++)
//            		{
//            			positive[i]=positive[i+1];
//            		}
//            		npositive--;
//            		i--;
//            	}		
//            }
//            //positive <- positive[-dropouts]
//            //zero <- im[-positive]
//            //Sign[positive]
//            for(i = nzero = ip = 0; i < M; i++)
//            {
//            	if (ip < npositive && positive[ip] == i)
//            	{
//            		VECTOR_SET (Sign_old, ip, VECTOR_GET (Sign, i));
//            		ip++;
//            		Sign_old->size=ip;
//            	}
//            	else
//            	{
//            		zero[nzero++] = i;
//            		VECTOR_SET (beta, i, 0.0);
//            	}
//            }
//            // beta0 <- backsolve(R, backsolvet(R, Sign[positive])) * Sign[positive]
//            // beta[positive] <- beta0
//            gdl_blas_dtrsv (CblasUpper, CblasTrans, CblasNonUnit, R->matrix, Sign_old);
//       		gdl_blas_dtrsv (CblasUpper, CblasNoTrans, CblasNonUnit, R->matrix, Sign_old);
//            for(i = 0; i < npositive; i++)
//            {
//            	VECTOR_SET (beta, positive[i], VECTOR_GET (Sign_old, i));	
//            }
//        }
//        // Sign_old = Sign * beta
//        for(i = 0; i < M; i++)
//        {
//        	VECTOR_SET (Sign_old, i, VECTOR_GET (Sign, i) * VECTOR_GET (beta, i));
//        }
//        size_t add = 0;
//        double wmax = GDL_NEGINF;
//        if (use_Gram)
//        {
//        	//w <- 1 - Sign * drop(Gram %*% (Sign * beta));
//        	gsl_blas_dgemv (CblasNoTrans, 1.0, GX, Sign_old, 0.0, Vect);
//        	for(i = 0; i < Vect->size; i++)
//        	{
//        		const double s = VECTOR_GET (Sign, i);
//        		double       w = VECTOR_GET (Vect, i);
//        		w = 1.0 - s * w;
//        		VECTOR_SET (W, i, w);
//        		if (w > wmax)
//        		{
//        			wmax = w;
//        			add = i;	
//        		}
//        	}
//        }
//        else
//        {
//        	// jw <- x %*% (Sign * beta)
//        	gsl_blas_dgemv (CblasNoTrans, 1.0, GX, Sign_old, 0.0, Vect);
//        	//w <- 1 - Sign * drop(t(jw) %*% x)
//            for(i = 0; i < M; i++)
//            {
//            	gdl_vector_view xc = gdl_matrix_column (X, i);
//				double * w = VECTOR_PTR (W, i);
//            	gdl_blas_dddot (&(xc.vector), Vect, w);
//				(*w) *= VECTOR_GET (Sign, i);
//				(*w) = 1.0 - (*w);   
//				if ((*w) > wmax)
//        		{
//        			wmax = (*w);
//        			add = i;
//        		}
//            }
//        }
//        if (nzero == 0 || gdl_lasso_all_negzero(W, zero, nzero))
//        {
//            break;
//        }
//        // Get the max in W
//        // add <- order(w)[M]
//        if (use_Gram)
//        {
//            gdl_lasso_R_update (GX, R, add, positive, npositive, eps, use_Gram);
//        }
//        else
//        {
//            gdl_lasso_R_update (GX, R, add, positive, npositive, eps, use_Gram); 
//        }
//        // positive <- c(positive, add)
//        positive[npositive] = add;
//        npositive++;
//        // zero <- setdiff(zero, add) ???
//        for(i = 0; i < npositive; i++)
//        {
//        	VECTOR_SET (Sign_old, i, VECTOR_GET (Sign, positive[i]));	
//        }
//        // beta0 <- backsolve(R, backsolvet(R, Sign[positive])) * Sign[positive]
//        // beta[positive] <- beta0
//        gdl_blas_dtrsv (CblasUpper, CblasTrans, CblasNonUnit, R->matrix, Sign_old);
//   		gdl_blas_dtrsv (CblasUpper, CblasNoTrans, CblasNonUnit, R->matrix, Sign_old);
//        for(i = 0; i < npositive; i++)
//        {
//        	VECTOR_SET (beta, positive[i], VECTOR_GET (Sign_old, i));	
//        }
//    }
//    if (logger)
//    {
////        dropouts <- active[-positive]
////        for (i in dropouts)
////        {
////            cat("NNLS Step:\t Variable", i, "\tdropped\n")
////        }
//    }
//    // clean workspace
//    gdl_matrix_free (GX);
//    
//    list(active = active[positive], R = R, beta = beta, positive = positive)
//}
/**
 * gdl_lasso_perform
 * 
 * This function fit the LASSO model to the given data.
 * This is a C implementation of the lars() function
 * of Brad Efron and Trevor Hastie availabe in the R 
 * package lars.
 *
 * References:
 *    Efron, Hastie, Johnstone and Tibshirani (2003) "Least Angle
 *    Regression" (with discussion) Annals of Statistics
 *    See also: 
 *       http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf
 *       Hastie, Tibshirani and Friedman (2002) Elements of Statistical Learning, Springer, NY.
 * 
 * The function provides all variants of Lasso, and provide the entire sequence
 * of coefficients and fits, starting from zero, to the least squares
 * fit.
 * 
 * For Bayesian LASSO, see gdl_bayesian_lasso.h
 * 
 * Arguments:
 * 
 *  X: matrix of predictors
 *
 *  y: response
 *
 *  algo: One of
 *           - gdl_lasso_algorithm_lasso,
 *           - gdl_lasso_algorithm_lar,
 *           - gdl_lasso_algorithm_frwstpw,
 *           - gdl_lasso_algorithm_stpw
 *
 *  center: If = gdl_true, each variable is centered to have 0 mean. Default is = gdl_true
 * 
 *  normalize: If = gdl_true, each variable is standardized to have unit L2 norm,
 *         otherwise it is left alone. Default is = gdl_true.
 *  
 *  Gram:    The X'X matrix; useful for repeated runs (bootstrap) where a
 *        large X'X stays the same.
 *
 *  eps: An effective zero
 *
 *  max_steps: Limit the number of steps taken; the default is '8 * min(m,
 *         n-intercept)', with m the number of variables, and n the
 *         number of samples. For 'algo="lar"' or 'algo="stepwise"', the
 *         maximum number of steps is 'min(m,n-intercept)'. For
 *         'algo="lasso"' and especially 'algo="forward.stagewise"',
 *         there can be many more terms, because although no more than
 *         'min(m,n-intercept)' variables can be active during any step,
 *         variables are frequently droppped and added as the algorithm
 *         proceeds. Although the default usually guarantees that the
 *         algorithm has proceeded to the saturated fit, users should
 *         check.
 *
 *  use_Gram: When the number m of variables is very large, i.e. larger
 *         than N, then you may not want the program to precompute the Gram
 *         matrix. Default is use.Gram=TRUE
 * 
 *  store_beta: If = gdl_true, store the estimate of the regression coefficients at each iteration
 *              For large data set, setting store_beta = gdl_false can help saving both memory and time
 * 
 *  logger: If not NULL, the function prints out its progress on the given stream
 *
 **/
int
gdl_lasso_perform (gdl_lasso_workspace * work,
                   gdl_matrix * X,                   
                   gdl_vector * y,
                   const gdl_lasso_algorithm * algo,
                   const gdl_boolean center,
                   const gdl_boolean normalize,
                   gdl_matrix * Gram,
                   const double eps,
                   size_t max_steps,
                   const gdl_boolean use_Gram,
                   const gdl_boolean store_beta,
                   gdl_vector * weights,
                   FILE * logger)
{
	const size_t n = X->size1;
	const size_t m = X->size2;
    size_t i,j,ja,ji,aj,k,jj,inew,nnewx,anydrop=0,free_Gram=0;
    int * df, kdf, status = GDL_CONTINUE;
    size_t * newx, * newxi;
    double a, b, c, g, w, A, t, * normx, tmp, min, zmin, Cmax, ssy, gamhat, * dptr, * RSS;
    gdl_lasso_R * R = 0;
   	gdl_vector_view Xc;
	gdl_vector * C, * Cvec, * residuals, * lambda, * Sign, * Gi1, * directions, * W, * U, * av, * beta, * beta_new, * beta_swap;
	gdl_lasso_model * model;
	
	/**********************************************************
	 * Check size
	 **********************************************************/
	if (y->size > work->N)
	{
		GDL_ERROR_VAL ("gdl_lasso_perform [ ERROR ]\t[ LASSO workspace is too small ]\t[ EXIT]\n", GDL_FAILURE, GDL_FAILURE); 
	}
	if (X->size2 > work->M)
	{
		GDL_ERROR_VAL ("gdl_lasso_perform [ ERROR ]\t[ LASSO workspace is too small ]\t[ EXIT]\n", GDL_FAILURE, GDL_FAILURE);
	}
	/**********************************************************
	 * Workspace allocation
	 **********************************************************/
	model     = work->model;
	newx      = work->newx;nnewx=0;
	normx     = work->normx;
	Sign      = work->Sign;work->Sign->size=X->size2;
	Cvec      = work->Cvec;work->Cvec->size=X->size2;
	C         = work->C;work->C->size=X->size2;
	W         = work->W;work->W->size=X->size2;
	Gi1       = work->Gi1;work->Gi1->size=X->size2;
	av        = work->av;work->av->size=X->size2;
	beta      = work->beta;work->beta->size=X->size2;
	beta_new  = work->beta_new;work->beta_new->size=X->size2;
	residuals = work->residuals;work->residuals->size=y->size;
	U         = work->U;work->U->size=y->size;
	df        = work->df;
	RSS       = work->RSS;
	gdl_lasso_model_reset (model);
	gdl_vector_set_zero (beta);
	gdl_vector_set_zero (beta_new);
	/**********************************************************
	 * PREPROCESSING
	 **********************************************************/
	if (center)
	{
		/******************************************************
		 * Center the predictors 
		 * X = X - mu(X)
		 ******************************************************/
		for(j = 0; j < m; j++)
		{
			Xc = gdl_matrix_column (X, j);
			work->mux[j] = gdl_stats_mean (Xc.vector.data, Xc.vector.stride, Xc.vector.size);
			gdl_vector_add_constant (&(Xc.vector), (-1.0)*work->mux[j]);
		}
		/******************************************************
		 * Center the response
		 * y = y - mu(y)
		 ******************************************************/
		 work->muy = gdl_stats_mean (y->data, y->stride, y->size);
		 gdl_vector_add_constant (y, (-1.0)*work->muy);
	}
	if (normalize)
    {
    	//double sd;
    	/******************************************************
		 * normalize the predictors
		 ******************************************************/
		for(j = 0; j < m; j++)
		{
			Xc = gdl_matrix_column (X, j);
			gdl_blas_ddot (&(Xc.vector), &(Xc.vector), normx+j);
			normx[j]=sqrt(normx[j]);
			if (normx[j]/sqrt(Xc.vector.size) < eps)
			{
				gdl_lasso_ignore_predictor (model, j);
				if (logger)
				{
					fprintf (logger, "gdl_lasso_perform [ Step 0 ] Variable %d with standard deviation = %g < eps = %g [ dropped for good ]\n", j, normx[j], eps);
					fflush (logger);
				}
			}
			else
			{
				gdl_lasso_inactive_predictor (model, j);
			}
			if (algo == gdl_lasso_algorithm_adaptive)
			{
				normx[j] *= sqrt(n);
			}
			gdl_vector_scale (&(Xc.vector), 1.0/normx[j]);
		}
    }
    else
    {
    	gdl_lasso_inactive_all (model);
    }
    /*******************************************************
     * Adaptive LASSO
     *******************************************************/
    if (algo == gdl_lasso_algorithm_adaptive)
    {
        if (weights == 0)
        {
        	weights = gdl_vector_alloc (m);
       		for(j = 0; j < m; j++)
			{
				Xc   = gdl_matrix_column (X, j);
				gdl_blas_ddot (&(Xc.vector), y, &a);
				gdl_blas_ddot (&(Xc.vector), &(Xc.vector), &b);
				VECTOR_SET (weights, j, fabs(a/b));
			}
        }
       	for(j = 0; j < m; j++)
		{
			Xc = gdl_matrix_column (X, j);
			gdl_vector_scale (&(Xc.vector), (VECTOR_GET(weights,j)));
		}
    } 
   /*******************************************************
    * Compute the X'X matrix if not defined an use_Gram=true
	******************************************************/
	if (use_Gram && Gram==0)
	{
		if (m > 500 && n < m)
		{
            GDL_WARNING ("There are more than 500 variables and n<m\nYou may wish to restart and set use_Gram=gdl_false\n", GDL_EINVAL);
		}
        if (logger)
        {
            fprintf (logger, "gdl_lasso_perform [ Step 0 ]\t[ Computing Gram = X'X ]\t[ wait...]");
            fflush(logger);
        }
        Gram = gdl_matrix_alloc (m, m);
        gdl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X, X, 0.0, Gram);
        free_Gram=1;
        if (logger)
        {
            fprintf (logger, "\b\b\b\b\b\b\b\bOK ]\n");
            fflush(logger);
        }
    }
    /*******************************************************
    * Compute some summary satistics
	******************************************************/
	for(j = 0; j < m; j++)
	{
		Xc = gdl_matrix_column (X, j);
		dptr = VECTOR_PTR (Cvec, j);
		gdl_blas_ddot (&(Xc.vector), y, dptr);
	}
	gdl_blas_ddot (y, y, &(work->ssy));
	if (!max_steps)
    {
    	max_steps = 8 * GDL_MIN (m, n - center);
    }
    // copy y into residuals
	gdl_vector_memcpy (residuals, y);
    /********************************************
     * Additional Workspace
     *********************************************/
    work->max_steps = max_steps;
    if (work->lambda) gdl_vector_free (work->lambda);
    work->lambda = lambda = gdl_vector_calloc (max_steps);
    if (store_beta)
    {
    	if (work->beta_pure)
    	{
    	 	for(i = 0; i < work->max_steps; i++)
    	 		GDL_FREE (work->beta_pure[i]);	
    	 	GDL_FREE (work->beta_pure);
    	}
    	if (work->beta_pure_idx)
    		GDL_FREE (work->beta_pure_idx);
    	if (work->beta_pure_size)
    		GDL_FREE (work->beta_pure_size);
    	work->beta_pure      = GDL_MALLOC (double *, max_steps);
    	work->beta_pure_idx  = GDL_MALLOC (size_t *, max_steps);
    	work->beta_pure_size = GDL_MALLOC (size_t, max_steps);
    }
    if (work->df) 
    	GDL_FREE (work->df);
    work->df = df = GDL_CALLOC (int, max_steps);
	if (work->RSS)
		GDL_FREE (work->RSS);
	work->RSS = RSS = GDL_CALLOC (double, max_steps);
    /********************************************
     * Initialization
     ******************************************/
    status = GDL_CONTINUE;
    k = 0;
    /********************************************
     * Iterations
     ******************************************/
    while(status == GDL_CONTINUE)
    {
    	kdf = 0;
    	Cmax = 0.0;
    	for(j = 0; j < model->ninactive; j++)
    	{
    		t = VECTOR_GET (Cvec, model->inactives[j]);
    		VECTOR_SET (C, j, t);
    		t = fabs(t);
			if (t > Cmax)
			{
				Cmax = t;
			}
    	}
    	C->size = j;
    	/**
    	 * Check if Cmax >> 0
    	 */
    	if (Cmax < eps * 100)
    	{
            if (logger)
            {
                fprintf (logger, "gdl_lasso_perform [ Step 1 ]\t[Max corr = 0 (%g)]\t[ EXIT ]\n", Cmax);
                fflush (logger);
            }
            break;
        }
        VECTOR_SET (lambda, k, Cmax);
        if (anydrop==0) //
        {
        	t = Cmax - eps;
        	for(j = nnewx = 0; j < C->size; j++)
        	{
        		if (fabs(VECTOR_GET (C, j)) >= t)
        		{
        			newx[nnewx++] = model->inactives[j];
        		}
        	}
            for(j = 0; j < nnewx; j++)
            {
            	if (use_Gram)
            	{
            		R = gdl_lasso_updateR (Gram, R, newx[j], model->actives, model->nactive, eps, use_Gram);
            	}
                else
                {
                	 R = gdl_lasso_updateR (X, R, newx[j], model->actives, model->nactive, eps, use_Gram);
                }
                if (R->rank == model->nactive)
                {
                	// come back to previous R
                	gdl_lasso_R_down (R, model->nactive);
                	// ignore this variable
                	gdl_lasso_ignore_predictor (model, newx[j]);
                	//_add_to_ordered_array (ignores, &nignore, newx[i]);
                	if (logger)
                	{
                       fprintf (logger, "gdl_lasso_perform [ Step %d ]\t[ Variable %d is collinear ]\t[ DROPPED ]\n", k, newx[j]);
                	}
                	kdf--;
                }
                else
                {
                  	VECTOR_SET(Sign, model->nactive, GDL_SIGN(VECTOR_GET(Cvec, newx[j])));
                  	Sign->size = model->nactive+1;
                  	// active this variable
                  	gdl_lasso_active_predictor (model, newx[j]);
                  	if (logger)
                    {
                    	fprintf (logger, "gdl_lasso_perform [ Step %d ]\t[ Variable %d ]\t[ ADDED ]\n", k, newx[j]);
                    }
                    kdf++;
                    if (model->nactive >= GDL_MIN(m - model->nignore, n - center))
    	 				break;    
                }
            }
        }
        else 
        {
        	kdf=-anydrop;
        	anydrop=0;
        }
        Gi1->size = Sign->size;
        gdl_vector_memcpy (Gi1, Sign);
        gdl_blas_dtrsv (CblasUpper, CblasTrans, CblasNonUnit, R->matrix, Gi1);
        gdl_blas_dtrsv (CblasUpper, CblasNoTrans, CblasNonUnit, R->matrix, Gi1);
//        if (algo == gdl_lasso_algorithm_frwstpw)
//        {
//        }
        gdl_blas_ddot (Gi1, Sign, &A);
        A = 1.0/sqrt(A);
        W->size = Gi1->size;
        gdl_vector_memcpy (W, Gi1);
        gdl_vector_scale (W, A);
        if (!use_Gram)
        {
        	for(i = 0; i < n; i++)
        	{
        		dptr = VECTOR_PTR (U, i);
        		(*dptr) = 0.;
	        	for(j = 0; j < model->nactive; j++)
	        	{
        			(*dptr) += MATRIX_GET (X, i, model->actives[j]) * VECTOR_GET (W, j);
        		}
        	}
        }
        if (model->nactive >= GDL_MIN (n - center, m - model->nignore) || algo == gdl_lasso_algorithm_stpw)
        {
        	gamhat = Cmax/A;
        }
        else
        {
            if (use_Gram)
            {
                for(j = 0; j < model->ninactive; j++)
                {
                	dptr = VECTOR_PTR (av, j);
	                (*dptr) = 0;
	                for(i = 0; i < model->nactive; i++)
	                {
	                	(*dptr) += VECTOR_GET (W, i) * MATRIX_GET (Gram, model->actives[i], model->inactives[j]);
	                }	
                }
            }
            else
            {
                for(j = 0; j < model->ninactive; j++)
                {
                	dptr    = VECTOR_PTR (av, j);
	                (*dptr) = 0;
	                for(i = 0; i < n; i++)
	                {
	                	(*dptr) += VECTOR_GET (U, i) * MATRIX_GET (X, i, model->inactives[j]);  
	                }
                }
            }
            av->size = model->ninactive;
            min = Cmax/A;
            for(j = 0; j < model->ninactive; j++)
            {
	        	c = VECTOR_GET (Cvec, model->inactives[j]);
	        	a = VECTOR_GET (av, j);
	            t = (Cmax - c)/(A - a);
	            if (t > eps && t < min)
	            {
	            	min = t;
	            }
	            t = (Cmax + c)/(A + a);
	            if (t > eps && t < min)
	            {
	            	min = t;
	            }
            }
            gamhat = min;
        }
        if (algo == gdl_lasso_algorithm_lasso)
        {
            zmin = gamhat;
            for(i = 0; i < model->nactive; i++)
            {
            	b = VECTOR_GET (beta, model->actives[i]);
            	w = VECTOR_GET (W, i);
            	t = (-1.0)*(b/w);
            	if (t > eps && t < gamhat)
            	{
            		zmin = t;
            	}
            }
            if (zmin < gamhat)
            {
            	gamhat = zmin;
            	for(i = 0; i < model->nactive; i++)
            	{
            		b = VECTOR_GET (beta, model->actives[i]);
            		w = VECTOR_GET (W, i);
            		t = (-1.0)*(b/w);
            		if (t == zmin)
            		{
            			gdl_lasso_drop_cache_predictor (model, model->actives[i]);
            		} 	
            	}
            }
        }
        for(i = 0; i < model->nactive; i++)
        {
        	b = VECTOR_GET (beta, model->actives[i]);
        	w = VECTOR_GET (W, i);
        	//printf (" %g (%d)", b  + gamhat * w, model->actives[i]);
        	VECTOR_SET(beta_new, model->actives[i], (b  + gamhat * w));
        }
        //printf ("\n");
        if (use_Gram)
        {
        	for(i = 0; i < m; i++)
        	{
        		t = 0;
        		for(j = 0; j < model->nactive; j++)
        		{
        			w = VECTOR_GET (W, j);
        			g = MATRIX_GET (Gram, i, model->actives[j]);
        			t += g * w;
        		}
        		VECTOR_SET (Cvec, i, VECTOR_GET (Cvec, i) - gamhat * t);
        	}
        }
        else
        {
        	for(i = 0; i < n; i++)
        	{
        		t = VECTOR_GET (residuals, i);
        		t -= gamhat * VECTOR_GET (U, i);
        		VECTOR_SET (residuals, i, t);
        	}
        	for(i = 0; i < m; i++)
			{
				Xc = gdl_matrix_column (X, i);
				dptr  = gdl_vector_ptr (Cvec, i);
				*dptr = 0;
				gdl_blas_ddot (&(Xc.vector), residuals, dptr);
			}
        }
        if (algo == gdl_lasso_algorithm_lasso && model->ndrop)
        {
        	anydrop=0;
            for(i = 0; i < model->ndrop; i++)
            {
            	if (logger)
            	{
            		fprintf(logger, "gdl_lasso_perform [ Lasso Step %d ]\t[ Variable %d ]\t[ DROPPED ]\n",  k+1, model->actives[model->drops[i]]);
            		fflush (logger);	
            	}
            	
            	R = gdl_lasso_R_downdate(R, model->drops[i]);
            	
            	VECTOR_SET (beta, model->actives[model->drops[i]], 0.0);
            	// remove them from Sign
    			for(ji = model->drops[i]; ji < model->nactive-1; ji++)
    			{
    				VECTOR_SET (Sign, ji, VECTOR_GET (Sign, ji+1));
    			}
    			(Sign->size)--;
            	anydrop++;
            }
            gdl_lasso_drop_apply_predictor (model);            	           
        }
        if (algo == gdl_lasso_algorithm_stpw)
        {
        	gdl_vector_set_zero (Sign);
        }
        /***********************************************
         * TERMINATION
         ***********************************************/
         // RSS
         gdl_vector_memcpy (U, y);
    	 gdl_blas_dgemv (CblasNoTrans, -1.0, X, beta_new, 1.0, U);
    	 gdl_blas_ddot (U, U, RSS+k);
    	 // DF
    	 df[k] = (k > 0 ? df[k-1] : 0) + kdf;
    	 //df[k] = model->nactive;
    	 // Swap beta and beta_new
    	 if (store_beta)
    	 {
    	 	 work->beta_pure_size[k] = model->nactive;
		 	 work->beta_pure[k]      = GDL_MALLOC (double, model->nactive);
		 	 work->beta_pure_idx[k]  = GDL_MALLOC (size_t, model->nactive);
		 	 for(i = 0; i < model->nactive; i++)
		 	 {
		 	 	work->beta_pure[k][i] = VECTOR_GET (beta_new, model->actives[i]) / (normx[model->actives[i]]);
		 	 	if (algo == gdl_lasso_algorithm_adaptive)
		    		work->beta_pure[k][i] *= VECTOR_GET (weights, i);
		    	work->beta_pure_idx[k][i] = model->actives[i];
		 	 }
    	 }
    	 beta_swap = beta;
    	 beta      = beta_new;
    	 beta_new  = beta_swap;
    	 /***********************************************
    	 * Stopping rules
    	 ***********************************************/
    	 if (k == max_steps-1)
    	 	break;
    	 if (model->nactive >= GDL_MIN(m - model->nignore, n - center))
    	 	break;
    	 k = k + 1;
    }
    if (logger)
    {
       fprintf(logger, "gdl_lasso_perform [ Lasso Termination ]\t[ Computing residuals, RSS etc...]\t[ wait... ]");
       fflush(logger);
    }
    work->max_steps = k+1;
    for(i = 0; i < m; i++)
    {
    	VECTOR_SET (beta, i, VECTOR_GET (beta, i) / normx[i]);
    	if (algo == gdl_lasso_algorithm_adaptive)
    	{
    		VECTOR_SET (beta, i, (VECTOR_GET (beta, i)) * (VECTOR_GET (weights, i)));
    	}
    }
    //printf("\n");
    for(i = 0; i <= k; i++) df[i] = df[i] + center;
    if (RSS[k] > work->ssy || RSS[k] < eps || df[k] == 0)
    	work->sigma2 = GDL_NAN;
    else
    	work->sigma2 = RSS[k]/(n-df[k]);
    //printf ("sigma2 = %g (%d)\n", work->sigma2, model->nactive);
//    for(i = 0; i <= k; i++)
//    {
//    	printf("%d rss = %g r2 = %g df = %d Cp = %g\n", i, RSS[i], 1.0-RSS[i]/work->ssy, df[i], (RSS[i]/work->sigma2) - n + 2.0*df[i]);
//    }
    /**
     * Clean workspace
     */
    gdl_lasso_R_free (R);
    if (free_Gram)
    {
    	gdl_matrix_free (Gram);	
    }
    if (logger)
    {
       fprintf(logger, "\b\b\b\b\b\b\b\b\bDONE ]\n");
       fflush(logger);
    }
    return GDL_SUCCESS;
}

#undef VECTOR_SET
#undef VECTOR_PTR
#undef VECTOR_GET
#undef MATRIX_SET
#undef MATRIX_GET
