/*  
 * 	linalg/svd_golub.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:08 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 *  
 *  Copyright (C) 1996, 1997, 1998, 1999, 2000, 2004 Gerard Jungman, Brian Gough
 * 
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */


/* Factorise a general M x N matrix A into,
 *
 *   A = U D V^T
 *
 * where U is a column-orthogonal M x N matrix (U^T U = I), 
 * D is a diagonal N x N matrix, 
 * and V is an N x N orthogonal matrix (V^T V = V V^T = I)
 *
 * U is stored in the original matrix A, which has the same size
 *
 * V is stored as a separate matrix (not V^T). You must take the
 * transpose to form the product above.
 *
 * The diagonal matrix D is stored in the vector S,  D_ii = S_i
 */

#include <math.h> 

#include <gdl/gdl_common.h>
#include <gdl/gdl_math.h>
#include <gdl/gdl_vector.h>
#include <gdl/gdl_matrix.h>
#include <gdl/gdl_linalg.h>

#include "givens.c"
#include "svdstep.c"

typedef struct 
{
	size_t M;
	size_t N;
	gdl_vector * work;
} golub_reinsch_t;

typedef struct 
{
	size_t M;
	size_t N;
	gdl_vector * work;
	gdl_matrix * X;
} m_golub_reinsch_t;

static int
_golub_reinsch_decomp (gdl_matrix * A, gdl_matrix * V, gdl_vector * S, gdl_vector * work)
{
  size_t a, b, i, j;
  
  const size_t M = A->size1;
  const size_t N = A->size2;
  const size_t K = GDL_MIN (M, N);

  if (M < N)
    {
      GDL_ERROR ("svd of MxN matrix, M<N, is not implemented", GDL_EUNIMPL);
    }
  else if (V->size1 != N)
    {
      GDL_ERROR ("square matrix V must match second dimension of matrix A",
                 GDL_EBADLEN);
    }
  else if (V->size1 != V->size2)
    {
      GDL_ERROR ("matrix V must be square", GDL_ENOTSQR);
    }
  else if (S->size != N)
    {
      GDL_ERROR ("length of vector S must match second dimension of matrix A",
                 GDL_EBADLEN);
    }
  else if (work->size != N)
    {
      GDL_ERROR ("length of workspace must match second dimension of matrix A",
                 GDL_EBADLEN);
    }

  /* Handle the case of N = 1 (SVD of a column vector) */

  if (N == 1)
    {
      gdl_vector_view column = gdl_matrix_column (A, 0);
      double norm = gdl_blas_dnrm2 (&column.vector);

      gdl_vector_set (S, 0, norm); 
      gdl_matrix_set (V, 0, 0, 1.0);
      
      if (norm != 0.0)
        {
          gdl_blas_dscal (1.0/norm, &column.vector);
        }

      return GDL_SUCCESS;
    }
  
  {
    gdl_vector_view f = gdl_vector_subvector (work, 0, K - 1);
    
    /* bidiagonalize matrix A, unpack A into U S V */
    
    gdl_linalg_bidiag_decomp (A, S, &f.vector);
    gdl_linalg_bidiag_unpack2 (A, S, &f.vector, V);
    
    /* apply reduction steps to B=(S,Sd) */
    
    chop_small_elements (S, &f.vector);
    
    /* Progressively reduce the matrix until it is diagonal */
    
    b = N - 1;
    
    while (b > 0)
      {
        double fbm1 = gdl_vector_get (&f.vector, b - 1);

        if (fbm1 == 0.0 || gdl_isnan (fbm1))
          {
            b--;
            continue;
          }
        
        /* Find the largest unreduced block (a,b) starting from b
           and working backwards */
        
        a = b - 1;
        
        while (a > 0)
          {
            double fam1 = gdl_vector_get (&f.vector, a - 1);

            if (fam1 == 0.0 || gdl_isnan (fam1))
              {
                break;
              }
            
            a--;
          }
        
        {
          const size_t n_block = b - a + 1;
          gdl_vector_view S_block = gdl_vector_subvector (S, a, n_block);
          gdl_vector_view f_block = gdl_vector_subvector (&f.vector, a, n_block - 1);
          
          gdl_matrix_view U_block =
            gdl_matrix_submatrix (A, 0, a, A->size1, n_block);
          gdl_matrix_view V_block =
            gdl_matrix_submatrix (V, 0, a, V->size1, n_block);
          
          qrstep (&S_block.vector, &f_block.vector, &U_block.matrix, &V_block.matrix);
          
          /* remove any small off-diagonal elements */
          
          chop_small_elements (&S_block.vector, &f_block.vector);
        }
      }
  }
  /* Make singular values positive by reflections if necessary */
  
  for (j = 0; j < K; j++)
    {
      double Sj = gdl_vector_get (S, j);
      
      if (Sj < 0.0)
        {
          for (i = 0; i < N; i++)
            {
              double Vij = gdl_matrix_get (V, i, j);
              gdl_matrix_set (V, i, j, -Vij);
            }
          
          gdl_vector_set (S, j, -Sj);
        }
    }
  
  /* Sort singular values into decreasing order */
  
  for (i = 0; i < K; i++)
    {
      double S_max = gdl_vector_get (S, i);
      size_t i_max = i;
      
      for (j = i + 1; j < K; j++)
        {
          double Sj = gdl_vector_get (S, j);
          
          if (Sj > S_max)
            {
              S_max = Sj;
              i_max = j;
            }
        }
      
      if (i_max != i)
        {
          /* swap eigenvalues */
          gdl_vector_swap_elements (S, i, i_max);
          
          /* swap eigenvectors */
          gdl_matrix_swap_columns (A, i, i_max);
          gdl_matrix_swap_columns (V, i, i_max);
        }
    }
  
  return GDL_SUCCESS;
}

static int
_m_golub_reinsch_decomp (m_golub_reinsch_t * gr, gdl_matrix * A, gdl_matrix * V, gdl_vector * S)
{
  size_t i, j;

  const size_t M = A->size1;
  const size_t N = A->size2;
  
  gdl_matrix * X    = gr->X;
  gdl_vector * work = gr->work;
  
  if (M < N)
    {
      GDL_ERROR ("svd of MxN matrix, M<N, is not implemented", GDL_EUNIMPL);
    }
  else if (V->size1 != N)
    {
      GDL_ERROR ("square matrix V must match second dimension of matrix A",
                 GDL_EBADLEN);
    }
  else if (V->size1 != V->size2)
    {
      GDL_ERROR ("matrix V must be square", GDL_ENOTSQR);
    }
  else if (X->size1 != N)
    {
      GDL_ERROR ("square matrix X must match second dimension of matrix A",
                 GDL_EBADLEN);
    }
  else if (X->size1 != X->size2)
    {
      GDL_ERROR ("matrix X must be square", GDL_ENOTSQR);
    }
  else if (S->size != N)
    {
      GDL_ERROR ("length of vector S must match second dimension of matrix A",
                 GDL_EBADLEN);
    }
  else if (work->size != N)
    {
      GDL_ERROR ("length of workspace must match second dimension of matrix A",
                 GDL_EBADLEN);
    }

  if (N == 1)
    {
      gdl_vector_view column = gdl_matrix_column (A, 0);
      double norm = gdl_blas_dnrm2 (&column.vector);

      gdl_vector_set (S, 0, norm); 
      gdl_matrix_set (V, 0, 0, 1.0);
      
      if (norm != 0.0)
        {
          gdl_blas_dscal (1.0/norm, &column.vector);
        }

      return GDL_SUCCESS;
    }

  /* Convert A into an upper triangular matrix R */

  for (i = 0; i < N; i++)
    {
      gdl_vector_view c = gdl_matrix_column (A, i);
      gdl_vector_view v = gdl_vector_subvector (&c.vector, i, M - i);
      double tau_i = gdl_linalg_householder_transform (&v.vector);

      /* Apply the transformation to the remaining columns */

      if (i + 1 < N)
        {
          gdl_matrix_view m =
            gdl_matrix_submatrix (A, i, i + 1, M - i, N - (i + 1));
          gdl_linalg_householder_hm (tau_i, &v.vector, &m.matrix);
        }

      gdl_vector_set (S, i, tau_i);
    }

  /* Copy the upper triangular part of A into X */

  for (i = 0; i < N; i++)
    {
      for (j = 0; j < i; j++)
        {
          gdl_matrix_set (X, i, j, 0.0);
        }

      {
        double Aii = gdl_matrix_get (A, i, i);
        gdl_matrix_set (X, i, i, Aii);
      }

      for (j = i + 1; j < N; j++)
        {
          double Aij = gdl_matrix_get (A, i, j);
          gdl_matrix_set (X, i, j, Aij);
        }
    }

  /* Convert A into an orthogonal matrix L */

  for (j = N; j > 0 && j--;)
    {
      /* Householder column transformation to accumulate L */
      double tj = gdl_vector_get (S, j);
      gdl_matrix_view m = gdl_matrix_submatrix (A, j, j, M - j, N - j);
      gdl_linalg_householder_hm1 (tj, &m.matrix);
    }

  /* unpack R into X V S */

  _golub_reinsch_decomp (X, V, S, work);

  /* Multiply L by X, to obtain U = L X, stored in U */

  {
    gdl_vector_view sum = gdl_vector_subvector (work, 0, N);

    for (i = 0; i < M; i++)
      {
        gdl_vector_view L_i = gdl_matrix_row (A, i);
        gdl_vector_set_zero (&sum.vector);

        for (j = 0; j < N; j++)
          {
            double Lij = gdl_vector_get (&L_i.vector, j);
            gdl_vector_view X_j = gdl_matrix_row (X, j);
            gdl_blas_daxpy (Lij, &X_j.vector, &sum.vector);
          }

        gdl_vector_memcpy (&L_i.vector, &sum.vector);
      }
  }

  return GDL_SUCCESS;
}

static int
_alloc (void * vstate, size_t M, size_t N)
{
	if (vstate)
	{
		golub_reinsch_t * state = (golub_reinsch_t *) vstate;
		state->M = M;
		state->N = N;
		state->work = gdl_vector_alloc (N);
		return GDL_SUCCESS;
	}
	return GDL_EINVAL;
}

static int 
_free (void * vstate)
{
	if (vstate)
	{
		golub_reinsch_t * state = (golub_reinsch_t *) vstate;
		gdl_vector_free (state->work);	
		return GDL_SUCCESS;
	}
	return GDL_EINVAL;
}

static int
_perform (void * vstate, gdl_matrix * A, gdl_matrix * V, gdl_vector * S, size_t ns)
{
	if (vstate)
	{
		golub_reinsch_t * state = (golub_reinsch_t *) vstate;
		return _golub_reinsch_decomp (A, V, S, state->work);
	}
	return GDL_EINVAL;
}


static int
_malloc (void * vstate, size_t M, size_t N)
{
	if (vstate)
	{
		m_golub_reinsch_t * state = (m_golub_reinsch_t *) vstate;
		state->M = M;
		state->N = N;
		state->work = gdl_vector_alloc (N);
		state->X    = gdl_matrix_alloc (N, N);
		return GDL_SUCCESS;
	}
	return GDL_EINVAL;
}

static int 
_mfree (void * vstate)
{
	if (vstate)
	{
		m_golub_reinsch_t * state = (m_golub_reinsch_t *) vstate;
		gdl_vector_free (state->work);
		gdl_matrix_free (state->X);
		return GDL_SUCCESS;
	}
	return GDL_EINVAL;
}

static int
_mperform (void * vstate, gdl_matrix * A, gdl_matrix * V, gdl_vector * S, size_t ns)
{
	if (vstate)
	{
		m_golub_reinsch_t * state = (m_golub_reinsch_t *) vstate;
		return _m_golub_reinsch_decomp (state, A, V, S);
	}
	return GDL_EINVAL;
}

static const gdl_linalg_svd_workspace_type _golub_reinsch =
{
	"gdl_linalg_svd_golub_reinsch",
	sizeof (golub_reinsch_t),
	&_alloc,
	&_free,
	&_perform
};

const gdl_linalg_svd_workspace_type * gdl_linalg_svd_golub_reinsch = &_golub_reinsch;

static const gdl_linalg_svd_workspace_type _m_golub_reinsch =
{
	"gdl_linalg_svd_m_golub_reinsch",
	sizeof (m_golub_reinsch_t),
	&_malloc,
	&_mfree,
	&_mperform
};

const gdl_linalg_svd_workspace_type * gdl_linalg_svd_m_golub_reinsch = &_m_golub_reinsch;



