/*  
 * 	linalg/ritzit.c
 * 
 *  $Author: baptiste $, $Date: 2008-05-13 15:22:08 $, $Version$
 *
 *  Libgdl : a C library for statistical genetics
 *  
 *  Copyright (C) 2003-2006  Jean-Baptiste Veyrieras, INRA, France.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * 
 */

#include <gdl/gdl_math.h>
#include <gdl/gdl_nrec.h>

/**************************************************************
 * multiplication of matrix A by a vector x, where            *
 *                                                            *
 * A is nrow by ncol (nrow >> ncol)                           *
 * y stores product vector                                    *
 **************************************************************/
static void
sis2_opa(
				 gdl_matrix * A,
				 size_t M,
				 size_t N,
				 double *x,
				 double *y,
				 int t
				 )
{
   size_t i, j;

   if (!t)
   {
   	for (i = 0; i < M; i++)
	   {
	   	  y[i] = 0.0;
	   }
	   for (i = 0; i < M; i++)
	   {
	      for (j = 0; j < N; j++)
	      {
	      	y[i] += gdl_matrix_get (A, i, j)*x[j];
	      }
	   }
   }
   else
   {
	   for (i = 0; i < N; i++)
	   {
	   	  y[i] = 0.0;
	   }
	   for (i = 0; i < N; i++)
	   {
	      for (j = 0; j < M; j++)
	      {
	      	y[i] += gdl_matrix_get (A, j, i)*x[j];
	      }
	   }
   }
   
   return;
}
/************************************************************** 
 * multiplication of matrix B by a vector x, where            *
 *							                                  *
 * B =  A'A, where A is nrow by ncol (nrow >> ncol)           *
 * Hence, B is of order n:=ncol                               *
 * y stores product vector                                    *
 **************************************************************/ 
static void
sis2_opb(
				 gdl_matrix * A,
				 size_t M,
				 size_t N,
				 double *x,
				 double *y,
				 int t
				 )
{
   size_t i;
   double *ztemp;
 
   ztemp = GDL_CALLOC (double, M);
   sis2_opa(A, M, N, x, ztemp, t);
   sis2_opa(A, M, N, ztemp, y, (!t)?1:0);
   GDL_FREE (ztemp);
}			 


/***********************************************************************
 *                                                                     *
 *                              sis2_tred2()                                *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

  Description
  -----------
  
  sis2_tred2() is a translation of the algol procedure TRED2, Num. Math. 11, 
  181-195 (1968) by Martin, Reinsch, and Wikinson.  Handbook for Auto.
  Comp., Vol. II- Linear Algebra, 212-226 (1971)

  This subroutine reduces a real symmetric matrix to a symmetric
  tridiagonal matrix using and accumulating orthogonal similarity
  transformations.

  Arguments
  ---------

  (input)
  offset index of the leading element of the matrix to be
         tridiagonalized. The matrix tridiagonalized should be 
         stored in a[offset:n-1, offset:n-1]

  n	 order of the matrix

  a	 contains the real symmetric input matrix. Only the upper
	 triangle of the matrix need be supplied

  (output)
  d	 contains the diagonal elements of the tridiagonal matrix.
  
  e	 contains the subdiagonal elements of the tridiagonal matrix
	 in its first n-1 positions.

  z	 contains the orthogonal transformation matrix produced in the
	 reduction.

  a and z may coincide. If distinct, a is unaltered.

  Functions used:
  UTILITY: SIGN

***********************************************************************/

static void
sis2_tred2(long offset, long n, double **a, double *d, double *e, double **z)
{
 long jj,ii,i,j,k,l, jp1;
 double h, scale, f, g,  hh, tmp;

 

 for (i=offset;i<n;i++) 
  { 
   for (j=i;j<n;j++)
     {
      z[j][i]=a[i][j];   /*fix this later?.. the rest of the routine 
                           assumes that z has the lower triangular part
                           of the symmetric matrix */
     }
   d[i]=a[i][n-1];
  }


  if (n==1) 
   {
    for (i=offset;i<n;i++)
     {
       d[i]=z[n-1][i];
       z[n-1][i]=0.0;
     }
    z[n-1][n-1]=1.0;
    e[1]=0.0;
    return;
   }

  /*for i = n step -1 until 2 do*/

  for (ii=3;ii<n+2-offset;ii++)
   {
     i=n+2-ii;
     l=i-1;
     h=0.0; 
     scale=0.0;

    /*scale row (algol tol then not needed)*/
     if (l>=1)
       for (k=offset;k<=l;k++)
        {
         scale+= fabs(d[k]);
        }
	
    if ((scale==0.0)||(l<1))
     {
      e[i]=d[l];
      for (j=offset;j<=l;j++)
          {
            d[j]=z[l][j];
            z[i][j]=0.0;
            z[j][i]=0.0;
          }
     }
   else                   /*scale <> 0.0 */
     {
       for (k=offset;k<=l;k++)
        {
         d[k]=d[k]/scale;
         h+=d[k]*d[k];
        }


       f=d[l];
       g=-GDL_NREC_SIGN(sqrt(h), f);
       e[i]=scale * g;
       h-=f*g;
       d[l]=f-g;
   
       /* form A*u */
  
       for (j=offset; j<=l; j++)
          e[j]=0.0;
          
          for (j=offset;j<=l;j++)
            {
             f=d[j];
             z[j][i]=f;
             g= e[j] + z[j][j] * f;
             
             jp1= j + 1;
   
             if (l >= jp1) 
                 {
                  for (k=jp1; k<=l; k++)
                   {
                     g+= z[k][j] * d[k];
                     e[k] += z[k][j] * f;
                   }
                 };
             e[j]=g;
           }

       /* form P */
 
       f= 0.0;
 
       for (j=offset; j<=l; j++)
        {
          e[j]=e[j]/h;
          f+= e[j] * d[j];
        }

       hh= f/ (h+h);
  
       /* form Q */
  
      for (j=offset; j<=l; j++)
       e[j] -= hh * d[j];

      /* form reduced A */

      for (j=offset; j<=l; j++)
       {
         f= d[j];
         g = e[j];

         for (k=j; k<=l; k++)
          z[k][j]= z[k][j] - f * e[k] - g * d[k];

         d[j]=z[l][j];
         z[i][j]=0.0;
       }
    }  /* end scale <> zero */

    d[i]=h;
   }   /* end for ii */
   /*accumulation of transformation matrices */

   for (i=offset + 1;i<n;i++)
    {
     l=i-1;
     z[n-1][l] = z[l][l];
     z[l][l] = 1.0;
     h=d[i];

     if (h != 0.0) 
       {
        for (k=offset; k<=l; k++)
          d[k]= z[k][i]/h;

        for (j=offset; j<=l; j++)
         {
           g= 0.0;
           
           for (k=offset;k<=l; k++)
            g+= z[k][i]*z[k][j];

	   for (k=offset;k<=l;k++)
            z[k][j] -= g * d[k];
         }
       }
       for (k=offset;k<=l;k++) z[k][i]=0.0;
     }
  
     for (i=offset;i<n;i++)
       {
        d[i]=z[n-1][i];
        z[n-1][i]=0.0;
       }
     z[n-1][n-1]=1.0;
     e[0]=0.0;

/*preparation for sis2_tql2.c.. reorder e[]*/
for (i=1+offset;i<n;i++) e[i-1]=e[i]; 

/*preparation for sis2_tql2.c.. z has to be transposed for 
  sis2_tql2 to give correct eigenvectors */
for (ii=offset; ii<n; ii++)
 for (jj=ii; jj<n; jj++)
 {
   tmp=z[ii][jj];
  z[ii][jj]=z[jj][ii];
  z[jj][ii]=tmp;
 }

     return;
}
          
/***********************************************************************
 *                                                                     *
 *				sis2_tql2()   			       *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   sis2_tql2() is a translation of a Fortran version of the Algol
   procedure TQL2, Num. Math. 11, 293-306(1968) by Dowdler, Martin, 
   Reinsch and Wilkinson.
   Handbook for Auto. Comp., vol.II-Linear Algebra, 227-240(1971).  

   This function finds the eigenvalues and eigenvectors of a symmetric
   tridiagonal matrix by the QL method.


   Arguments
   ---------

   (input)                                                             
   offset the index of the leading element  of the input(full) matrix
          to be factored.
   n      order of the symmetric tridiagonal matrix           
   d      contains the diagonal elements of the input matrix        
   e      contains the subdiagonal elements of the input matrix in its
            first n-1 positions.
   z      contains the identity matrix				    
                                                                   
   (output)                                                       
   d      contains the eigenvalues in ascending order.  if an error
            exit is made, the eigenvalues are correct but unordered for
            for indices 0,1,...,ierr.				   
   e      has been destroyed.					  
   z      contains orthonormal eigenvectors of the symmetric   
            tridiagonal (or full) matrix.  if an error exit is made,
            z contains the eigenvectors associated with the stored 
          eigenvalues.					
   ierr   set to zero for normal return, j if the j-th eigenvalue has
            not been determined after 30 iterations.		    
	  (return value)


   Functions used
   --------------
   UTILITY	SIGN
   MISC		dpythag

 ***********************************************************************/

static long
sis2_tql2(long offset, long n, double *d, double *e, double **z)

{
   long j, last, l, l1, l2, m, i, k, iteration;
   double tst1, tst2, g, r, s, s2=0.0, c, c2, c3=0.0, p, f, h, el1, dl1;
   if (n == 1) return(0);
   f = 0.0;
   last = n - 1;
   tst1 = 0.0;
   e[last] = 0.0;

   for (l = offset; l < n; l++) {
      iteration = 0;
      h = fabs(d[l]) + fabs(e[l]);
      if (tst1 < h) tst1 = h;

      /* look for small sub-diagonal element */
      for (m = l; m < n; m++) {
	 tst2 = tst1 + fabs(e[m]);
	 if (tst2 == tst1) break;
      }
      if (m != l) {
	 while (iteration < 30) {
	    iteration += 1;

            /*  form shift */
	    l1 = l + 1;
	    l2 = l1 + 1;
	    g = d[l];
        p = (d[l1] - g) / (2.0 * e[l]);
	    r = dpythag(p, 1.0);
	    d[l] = e[l] / (p + GDL_NREC_SIGN(r, p));
	    d[l1] = e[l] * (p + GDL_NREC_SIGN(r, p));
	    dl1 = d[l1];
	    h = g - d[l];
	    if (l2 < n) 
	       for (i = l2; i < n; i++) d[i] -= h;
            f += h;

	    /* QL transformation */
	    p = d[m];
	    c = 1.0;
	    c2 = c;
	    el1 = e[l1];
	    s = 0.0;
	    i = m - 1;
	    while (i >= l) {
	       c3 = c2;
	       c2 = c;
	       s2 = s;
	       g = c * e[i];
	       h = c * p;
	       r = dpythag(p, e[i]);
	       e[i + 1] = s * r;
	       s = e[i] / r;
	       c = p / r;
	       p = c * d[i] - s * g;
	       d[i + 1]= h + s * (c * g + s * d[i]);

	       /*  form vector */
	       for (k = offset; k < n; k ++) {
	          h = z[i + 1][k];
	          z[i + 1][k] = s * z[i][k] + c * h;
	          z[i][k] = c * z[i][k] - s * h;
	       }
	       i--;
	    }
	    p = -s * s2 * c3 *el1 * e[l] / dl1;
	    e[l] = s * p;
	    d[l] = c * p;
	    tst2 = tst1 + fabs(e[l]);
	    if (tst2 <= tst1) break;
	    if (iteration == 30) 
	       return(l);
         }
      }
      d[l] += f;
   }

   /* order the eigenvalues */
   for (l = 1+offset; l < n; l++) {
      i = l - 1;
      k = i;
      p = d[i];
      for (j = l; j < n; j++) {
	 if (d[j] < p) {
	    k = j;
	    p = d[j];
	 }
      }
      /* ...and corresponding eigenvectors */
      if (k != i) {
	 d[k] = d[i];
	 d[i] = p;
	  for (j = offset; j < n; j ++) {
	     p = z[i][j];
	     z[i][j] = z[k][j];
	     z[k][j] = p;
	  }
      }   
   }
   return(0);
}

/************************************************************** 
 * Function forms the dot product of two vectors.      	      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 

static double
sis2_ddot( long n,double *dx,long incx,double *dy,long incy)

{
   long i;
   double dot_product;

   if (n <= 0 || incx == 0 || incy == 0) return(0.0);
   dot_product = 0.0;
   if (incx == 1 && incy == 1) 
      for (i=0; i < n; i++) 
      	dot_product += (*dx++) * (*dy++);
   else {
      if (incx < 0) dx += (-n+1) * incx;
      if (incy < 0) dy += (-n+1) * incy;
      for (i=0; i < n; i++) {
         dot_product += (*dx) * (*dy);
         dx += incx;
         dy += incy;
      }
   }
   return(dot_product);
}
/************************************************************** 
 * Constant times a vector plus a vector     		      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 

static void
sis2_daxpy (long n,double da,double *dx,long incx,double *dy,long incy)
{
   long i;

   if (n <= 0 || incx == 0 || incy == 0 || da == 0.0) return;
   if (incx == 1 && incy == 1) 
      for (i=0; i < n; i++) {
	 *dy += da * (*dx++);
	 dy++;
      }
   else {
      if (incx < 0) dx += (-n+1) * incx;
      if (incy < 0) dy += (-n+1) * incy;
      for (i=0; i < n; i++) {
         *dy += da * (*dx);
         dx += incx;
         dy += incy;
      }
   }
   return;
}
/************************************************************** 
 * Function scales a vector by a constant.     		      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 
static void
sis2_dscal(long n,double da,double *dx,long incx)

{
   long i;

   if (n <= 0 || incx == 0) return;
   if (incx < 0) dx += (-n+1) * incx;
   for (i=0; i < n; i++) {
      *dx *= da;
      dx += incx;
   }
   return;
}

/****************************************************************
 *                                                              *
 *                       sis2_ritzit()                      *
 *                                                              *
 ****************************************************************/
/****************************************************************

 Description:
 ------------

 This subroutine is a translation of the Fortran-77 procedure
 RITZIT from the SVDPACK library written by Michael W. Berry,
 University of Tennessee, Dept. of Computer Science, 107 Ayres
 Hall, Knoxville TN 37919-1301

 This subroutine determines the absolutely largest eigenvalues
 and corresponding eigenvectors of a real symmetric matrix by
 simultaneous iteration.

 External parameters (constants)
 -------------------------------

 local parameters: 
 -----------------
 
 (input)

 n      the order of the matrix  whose eigenpairs are sought
        (matrix B for the SVD problem)
 kp     the number of simultaneous iteration vectors
 km     the maximum number of iteration steps to be performed. If
        starting values for the iteration vectors are available,
        km should be prefixed with a minus sign.
 eps    the tolerance for accepting eigenvectors
 opb    the name of the subroutine that defines the matrix B. opb
        is called with the parameters (n, u, w) and must 
        compute w=Bu without altering u for the SVD problem
 inf    the name of the subroutine that may be used to obtain 
        information or exert control during execution. inf is
        called with parameters (ks, kg, kh, f, m), where ks
        is the number of the next iteration step, kg is the
        number of already accepted eigenvectors, kh is the number
        already accepted eigenvalues, and f is the array of error
        quantities for the vectors of x. An element of f has the
        value 4.0 until the corresponding eigenvalue of the matrix
        B  has been accepted.
 kem    the number of eigenvalues and corresponding eigenvectors
         of matrix B desired. kem must be less than kp.
 x      contains, if km is negative, the starting values for the
        iteration vectors.

 (output)
 
 km     is unchanged
 kem    is reset to the number of eigenvalues and eigenvectors
        of matrix B actually accepted within the limit of km steps
 imem   the number of bytes needed for this invocation
 x      contains in its first kem columns orthonormal
        eigenvectors of the matrix B,  corresponding to the 
        eigenvalues in array d.  The remaining columns contain 
        approximations to further eigenvectors.
 d      contains in its first kem positions the absolutely
        largest eigenvalues of the matrix B. The remaining positions
        contain approximations to smaller eigenvalues.

 u, w, b, f, cx, x, s and  rq are temporary storage arrays.

 Functions used
 --------------

 BLAS:  sis2_ddot, dscal, sis2_daxpy
 EISP:  sis2_tred2, sis2_tql2, dpythag
 MISC:  opb, inf, DMAX, IMAX
 
*****************************************************************/
static void 
sis2_ritzit (
              long n,
              long kp,
              long km,
              double eps,
              void (*opb) (gdl_matrix *, size_t M, size_t N, double *, double *, int ),
              long kem,
              double **x,
              double *d,
              double *f,
              double *cx,
              double *u,
              long *imem,
              gdl_matrix * a,
              long nrow,
              long ncol,
              int trans
            )
{
	
#define NSIG nrow
#define NMAX ncol+nrow
	
  long i, j, l, i1, l1, size, kg, kh, kz, kz1, kz2, ks, m, m1=0;
  long  jj, ii, ig, ip, ik, jp, k, flag1 ;  
  double  *w[NMAX], *rq, *b[NSIG], *s, *tptr, TmpRes;
  double xks, ee2, e, e2, e1=0.0, xkm, xk, xm1, t;

  /*******************************************************************
   * allocate memory for temporary storage arrays used in ritzit only*
   *                                                                 *
   *            rq - (kp)                                            *
   *             w - (NSIG * n)                                      *
   *             b - (NSIG * kp)                                     *
   *             s - (kp)                                            *
   *******************************************************************/

   size =    kp * (NSIG +2) + NSIG * n;
   
   rq = (double *) gdl_malloc(size * sizeof(double));
   
   tptr= rq + kp;
   for (i=0; i< NSIG; i++){
    w[i]= tptr;
    tptr+=n;
   }

   /*w=tptr; tptr+=(n * kp);*/
   for (i=0; i<NSIG; i++) {
    b[i]=tptr;
    tptr+=kp;
   };
   s=tptr;

   /*finished allocating memory*/


  *imem += size;
  *imem = sizeof(double) * (*imem);

  ee2 = 1.0 + 1.0e-1 * eps;
  e   = 0.0;
  kg  = -1;
  kh  = -1;
  kz  = 1367;
  kz1 = 0;
  kz2 = 0;
  ks  = 0;
  m   = 1;

  for (l=0; l<kp; l++){
    f[l] = 4.0e+0;
   cx[l] = 0.0;
   rq[l] = 0.0;
 } 
 
  if (km >= 0)    /*generate random initial iteration vectors*/
      for (j=0; j<kp; j++)
       for (l=0; l<n; l++){
         kz = (3125 * kz) % 65536;  
        x[j][l] = (double) (kz - 32768);
      }
  km =abs(km);
  l1 = 1;
  i1 = 1;
  jp=kp;
flag1=0;
#define NRow kp
#define NCol n

  /* extend orthonormalization to all kp rows of x 

     Variables used here:
 
     NCol                     : number of columns of x
     NRow                     : number of rows of x
     ii, ik, jp, k, l1        : indexing integers
     t, TmpRes                : double precision storage.


     at the end of this section of code, 

     x   : transpose(Q)
     b   : R

     where X = QR is the orthogonal factorization of the matrix X
     stored in array x

     invokes: sis2_ddot, sis2_daxpy, sis2_dscal (BLAS)
              sqrt               (math.h)
  */

  ik = l1 -1; 
  for (i=0; i< jp; i++){
    for (k=l1-1; k<jp; k++) b[i][k]= 0.0;
   
    if (i >= l1-1){
      ik=i+1;
      b[i][i]=sqrt(sis2_ddot(NCol, &x[i][0], 1 ,  &x[i][0], 1));
      t=0.0;
      if (b[i][i] != 0.0) t=1.0 / b[i][i];
      sis2_dscal(NCol, t, &(x[i][0]),  1);
     }

   for (ii=ik; ii< NRow; ii++){
     TmpRes=0.0;
     for (jj=0; jj<NCol; jj++)
       TmpRes+= x[ii][jj] * x[i][jj];
     b[i][ii]=TmpRes;
  }
  
    for (k=ik; k<jp;k++) 
        sis2_daxpy(NCol, -b[i][k], &x[i][0], 1, &x[k][0], 1);

  } /* end for i */

  ig = 0;
  ip = kp - 1;
  
 /*statement 70  from original RITZIT begins here*/
while (1){

  flag1=1;  /* so that jacobi step is done at least once */

  while (flag1){
        /* jacobi step modified */
        for(k=ig; k< kp; k++){
          opb(a, nrow, ncol, &x[k][0], &w[0][0], trans);
          for (j=0; j<n; j++) x[k][j]=w[0][j];
         }
        l1=ig + 1;
        jp=kp;
        flag1=0; /*flag1 is set only if re-orthog needs to be done*/

     /* extend orthonormalization to all kp rows of x 

     Variables used here:
 
     NCol                     : number of columns of x
     NRow                     : number of rows of x
     ii, ik, jp, k, l1        : indexing integers
     t, TmpRes                : double precision storage.


     at the end of this section of code, 

     x   : transpose(Q)
     b   : R

     where X = QR is the orthogonal factorization of the matrix X
     stored in array x

     invokes: sis2_ddot, sis2_daxpy, sis2_dscal (BLAS)
              sqrt               (math.h)
  */

    ik = l1 -1; 
  for (i=0; i< jp; i++){
    for (k=l1-1; k<jp; k++) b[i][k]= 0.0;
   
    if (i >= l1-1){
      ik=i+1;
      b[i][i]=sqrt(sis2_ddot(NCol, &x[i][0], 1 ,  &x[i][0], 1));
      t=0.0;
      if (b[i][i] != 0.0) t=1.0 / b[i][i];
      sis2_dscal(NCol, t, &(x[i][0]),  1);
     }

   for (ii=ik; ii< NRow; ii++){
     TmpRes=0.0;
     for (jj=0; jj<NCol; jj++)
       TmpRes+= x[ii][jj] * x[i][jj];
     b[i][ii]=TmpRes;
  }
  
    for (k=ik; k<jp;k++) 
        sis2_daxpy(NCol, -b[i][k], &x[i][0], 1, &x[k][0], 1);

  } /* end for i */

       if ( ks<=0 ) {
         /* measures against unhappy choice of initial vectors*/
         for (k=0;k<kp; k++)
           if (b[k][k] ==0.0)
              for (j=0;j<n;j++){
                 kz= (3125 * kz) % 65536;
                 x[k][j] = (double)(kz-32768);
                 flag1=1;
               }
       }
     if (flag1){
       l1=1;
       ks=1; /*we dont want to re-initialize x[][] again*/

   /* extend orthonormalization to all kp rows of x 

     Variables used here:
 
     NCol                     : number of columns of x
     NRow                     : number of rows of x
     ii, ik, jp, k, l1        : indexing integers
     t, TmpRes                : double precision storage.


     at the end of this section of code, 

     x   : transpose(Q)
     b   : R

     where X = QR is the orthogonal factorization of the matrix X
     stored in array x

     invokes: sis2_ddot, sis2_daxpy, sis2_dscal (BLAS)
              sqrt               (math.h)
  */

    ik = l1 -1; 
  for (i=0; i< jp; i++){
    for (k=l1-1; k<jp; k++) b[i][k]= 0.0;
   
    if (i >= l1-1){
      ik=i+1;
      b[i][i]=sqrt(sis2_ddot(NCol, &x[i][0], 1 ,  &x[i][0], 1));
      t=0.0;
      if (b[i][i] != 0.0) t=1.0 / b[i][i];
      sis2_dscal(NCol, t, &(x[i][0]),  1);
     }

   for (ii=ik; ii< NRow; ii++){
     TmpRes=0.0;
     for (jj=0; jj<NCol; jj++)
       TmpRes+= x[ii][jj] * x[i][jj];
     b[i][ii]=TmpRes;
  }
  
    for (k=ik; k<jp;k++) 
        sis2_daxpy(NCol, -b[i][k], &x[i][0], 1, &x[k][0], 1);

  } /* end for i */
      } 
    } /*end while flag1 */

  for (k= ig; k<kp; k++)
    for (l=k; l<kp; l++){
      t = 0.0;

      for (i=l; i<kp; i++)
        t+= b[k][i] * b[l][i];

      /* negate matrix to reverse eigenvalue ordering */
      b[k][l] = -t;
    }
  j=kp - kg - 1;

  sis2_tred2(ig, kp, b, d, u, b);
  ii=sis2_tql2(ig, kp, d, u, b);


  for (k=ig; k< kp; k++)
   d[k]=sqrt(GDL_MAX_DBL(-d[k], 0.0));
  

 for (j=ig; j<kp; j++)
  for (k=0; k<n; k++) 
   w[j][k]=0.0;

   for (j=ig; j<kp; j++)
     for (l=ig; l<kp; l++){
       TmpRes=b[j][l];
       for (k=0; k<n; k++)
         w[j][k] += TmpRes  * x[l][k];}   /* w is going to be transposed as 
                                         compared to the fortran version */
     

  for (j=ig; j<kp; j++)
   for (k=0; k<n; k++)
      x[j][k]=w[j][k]; 

  xks=(double)(++ks);
  if (d[kp-1] > e) e=d[kp-1 ];

  /* randomization */
  if (kz1<3) {
    for (j=0; j<n; j++) {
       kz= (3125 * kz) % 65536;
       x[kp-1][j] = (double) (kz -32768);
    }
    l1=kp;
    jp=kp;

  /* extend orthonormalization to all kp rows of x 

     Variables used here:
 
     NCol                     : number of columns of x
     NRow                     : number of rows of x
     ii, ik, jp, k, l1        : indexing integers
     t, TmpRes                : double precision storage.


     at the end of this section of code, 

     x   : transpose(Q)
     b   : R

     where X = QR is the orthogonal factorization of the matrix X
     stored in array x

     invokes: sis2_ddot, sis2_daxpy, sis2_dscal (BLAS)
              sqrt               (math.h)
  */

    ik = l1 -1; 
  for (i=0; i< jp; i++){
    for (k=l1-1; k<jp; k++) b[i][k]= 0.0;
   
    if (i >= l1-1){
      ik=i+1;
      b[i][i]=sqrt(sis2_ddot(NCol, &x[i][0], 1 ,  &x[i][0], 1));
      t=0.0;
      if (b[i][i] != 0.0) t=1.0 / b[i][i];
      sis2_dscal(NCol, t, &(x[i][0]),  1);
     }

   for (ii=ik; ii< NRow; ii++){
     TmpRes=0.0;
     for (jj=0; jj<NCol; jj++)
       TmpRes+= x[ii][jj] * x[i][jj];
     b[i][ii]=TmpRes;
  }
  
    for (k=ik; k<jp;k++) 
        sis2_daxpy(NCol, -b[i][k], &x[i][0], 1, &x[k][0], 1);

  } /* end for i */

  }
  
  /*compute control quantities cx */
  for (k=ig; k<ip;k++){
    t=(d[k] - e)*(d[k] + e);
    if (t <= 0.0) cx[k]=0.0; 
    else 
           if (e ==0.0) cx[k] = 1.0e+3 + log(d[k]);
           else  cx[k]= log( (d[k]+sqrt(t)) / e);
  }
    
    /*acceptance test for eigenvalues including adjustment of 
      kem and kh such that 
           d[kem] > e, 
           d[kh]  > e, and,
           d[kem] does not oscillate strongly */

    for (k=ig; k<kem; k++)
       if ((d[k] <= e) ||
           ((kz1>1) && (d[k] <= 9.99e-1 * rq[k]))){
          kem=k-1;
          break;
       }

   if (!kem) break; 

   k=kh+1;

   while ((d[k] != 0.0) && ( d[k] <= ee2 * rq[k])){
      kh=k++;
   }

   do {
    if (d[k] <= e) kh= k- 1;
    --k;
   }
   while (k > kem-1);

  /*acceptance test for eigenvectors */

  l= kg;
  e2= 0.0;
 
  for (k=ig; k<ip; k++){
     if (k == l+1){
       /*check for nested eigenvalues */
       l=k;
       l1=k;
       if (k != ip){
          ik= k + 1;
          s[0] = 5.0e-1 / xks;
          t= 1.0 / (double)(ks * m);
          for (j=ik; j<ip; j++){
            if ((cx[j] * (cx[j] + s[0]) + t) <= (cx[j-1] * cx[j-1]))
                break;
            l=j;
          }
       }
     }
     if (l > kh) {l = l1; break;}
  
         opb(a, nrow, ncol, &x[k][0], &w[0][0], trans);
         s[0]= 0.0;
      
         for (j=0; j<=l; j++)
            if (fabs(d[j] - d[k]) < 1.0e-2 * d[k]){
                t=0.0;
 
                for (i=0; i<n; i++) 
                    t+= w[0][i] * x[j][i];

                for (i=0; i<n; i++) {
                    w[0][i] =w[0][i] - t * x[j][i];
                }

                s[0]+= t*t;
            }
  
         t=0.0;

         for (i=0; i<n; i++) 
              t+= w[0][i] * w[0][i];

         if (s[0] != 0.0) t=sqrt(t/(s[0] + t)); 
         else t=1.0;
   
         if (t > e2) e2=t;

         if (k==l){
             /* test for acceptance of group of eigenvectors*/
             if ((l>=kem-1) &&
                 (d[kem] * f[kem] < eps * (d[kem] -e)))
               kg=kem;

             if (e2 < f[l])
               for (j=l1; j<=l; j++) f[j]=e2;

             if ((l <= kem) &&
                 (d[l] * f[l] < eps * (d[l]-e)))
               kg=l;

             ig=kg+1;
         }
}

  /* statements 570 to 660  from original RITZIT*/

   if (e<= 4.0e-2 * d[0]) {
     m=1;
     k=1;
   }
   else {
     e2=2.0e0/e;
     e1=5.1e-1 * e2;
     k= 2 * GDL_MAX_INT((long)(4.0e0/cx[0]), 1);
     if (m>k) m=k;
   }
   /*reduce kem if convergence would be too slow */
   xkm=(double)km;
   if ((f[kem-1] != 0.0) &&
       (xks < 9.0e-1 *xkm)){
         xk=(double)k;
         s[0]=xk * cx[kem-1];
         if (s[0] < 5.0e-2) t=5.0e-1 * s[0] *cx[kem-1];
         else t=cx[kem-1] + log(5.0e-1 * (1.0 + exp(-2.0e0 * s[0])))/xk;
         s[0]=log(d[kem-1] * f[kem-1]/(eps * (d[kem-1] -e)))/t;
         if ((xkm - xks) * xkm < s[0] * xks) kem--;
   }
   //inf(ks,kg,kh,f,m);

  if ((kg >= kem-1) ||
      (ks >= km))      break;
 
  for (k=ig; k<kp; k++)  rq[k] = d[k];

  do {
      /*statements 680-700 */
      if (ks + m > km) {
           kz2=-1;
           if (m >1) m = 2* ((km -ks +1)/2);
      }
      else
          m1=m;

      /*shortcut last intermediate block if all error quantities f are
        sufficiently small */
      if (l >= kem){
        s[0]= d[kem-1] * f[kem-1]/(eps *(d[kem-1] -e));
        t= s[0] * s[0] - 1.0;
        if (t <= 0.0) break;
        s[0] = log(s[0] + sqrt(t))/(cx[kem-1] -cx[kh+1]);
        m1=2 * (long)(5.0e-1 * s[0] + 1.01e0);
        if (m1<=m) kz2=-1;
        else m1=m;
     }
     xm1=(double) m1;
     
                                   /*chebyshev iteration */
     if (m==1)
       for (k=ig; k<kp; k++){
       opb(a, nrow, ncol, &x[k][0], &w[0][0], trans);
       for (i=0; i<n; i++) 
           x[k][i] = w[0][i];
       }
     else                                /*degree != 1.0 */
       for (k=ig; k<kp; k++){
          opb(a, nrow, ncol, &x[k][0], &w[0][0], trans);

          for (i=0; i<n; i++) u[i]=e1 * w[0][i];

          opb(a, nrow, ncol, u, &w[0][0], trans);

          for (i=0; i<n; i++) x[k][i]= e2 *w[0][i] - x[k][i];

          if (m1>=4)
               for (j=3; j<m1; j+=2){
                 opb(a, nrow, ncol, &x[k][0], &w[0][0], trans);
 
                 for (i=0; i<n; i++)
                    u[i]=e2 * w[0][i] - u[i];

                 opb(a, nrow, ncol, u, &w[0][0], trans);

                 for (i=0; i<n; i++)
                     x[k][i] = e2 * w[0][i] - x[k][i];
                }
       }

       
       l1=ig+1;

    /* extend orthonormalization to all kp rows of x

     Variables used here:
 
     NCol                     : number of columns of x
     NRow                     : number of rows of x
     ii, ik, jp, k, l1        : indexing integers
     t, TmpRes                : double precision storage.


     at the end of this section of code, 

     x   : transpose(Q)
     b   : R

     where X = QR is the orthogonal factorization of the matrix X
     stored in array x

     invokes: sis2_ddot, sis2_daxpy, sis2_dscal (BLAS)
              sqrt               (math.h)
  */

    ik = l1 -1; 
  for (i=0; i< jp; i++){
    for (k=l1-1; k<jp; k++) b[i][k]= 0.0;
   
    if (i >= l1-1){
      ik=i+1;
      b[i][i]=sqrt(sis2_ddot(NCol, &x[i][0], 1 ,  &x[i][0], 1));
      t=0.0;
      if (b[i][i] != 0.0) t=1.0 / b[i][i];
      sis2_dscal(NCol, t, &(x[i][0]),  1);
     }

   for (ii=ik; ii< NRow; ii++){
     TmpRes=0.0;
     for (jj=0; jj<NCol; jj++)
       TmpRes+= x[ii][jj] * x[i][jj];
     b[i][ii]=TmpRes;
  }
  
    for (k=ik; k<jp;k++) 
        sis2_daxpy(NCol, -b[i][k], &x[i][0], 1, &x[k][0], 1);

  } /* end for i */

      /*discounting the error quantities F */
      if (kg!=kh){
         if (m ==1) 
              for (k=ig; k<=kh; k++) f[k]= f[k] * (d[kh+1]/d[k]);
         else {
               t=exp(-xm1 * cx[kh+1]);
               for (k=ig; k<=kh; k++){
                   s[0]=exp(-xm1 * (cx[k]-cx[kh+1]));
                   f[k] = s[0] * f[k] * (1.0 + t*t)/(1.0 + (s[0] *t)*(s[0]*t));
               }
         } 
      }
      ks=ks+m1;
      kz2=kz2 - m1;
     
   } /*possible repetition of intermediate steps*/
   while (kz2>=0);

   kz1++;
   kz2 = 2 * kz1;
   m = 2 * m; 

}  /* end while kz2<=0 ... go to 70*/

/*statement 900 of original RITZIT program begins here*/

kem = kg;
l1=1; 
jp=kp-1;

  /* extend orthonormalization to all kp rows of x 

     Variables used here:
 
     NCol                     : number of columns of x
     NRow                     : number of rows of x
     ii, ik, jp, k, l1        : indexing integers
     t, TmpRes                : double precision storage.


     at the end of this section of code, 

     x   : transpose(Q)
     b   : R

     where X = QR is the orthogonal factorization of the matrix X
     stored in array x

     invokes: sis2_ddot, sis2_daxpy, sis2_dscal (BLAS)
              sqrt               (math.h)
  */

    ik = l1 -1; 
  for (i=0; i< jp; i++){
    for (k=l1-1; k<jp; k++) b[i][k]= 0.0;
   
    if (i >= l1-1){
      ik=i+1;
      b[i][i]=sqrt(sis2_ddot(NCol, &x[i][0], 1 ,  &x[i][0], 1));
      t=0.0;
      if (b[i][i] != 0.0) t=1.0 / b[i][i];
      sis2_dscal(NCol, t, &(x[i][0]),  1);
     }

   for (ii=ik; ii< NRow; ii++){
     TmpRes=0.0;
     for (jj=0; jj<NCol; jj++)
       TmpRes+= x[ii][jj] * x[i][jj];
     b[i][ii]=TmpRes;
  }
  
    for (k=ik; k<jp;k++) 
        sis2_daxpy(NCol, -b[i][k], &x[i][0], 1, &x[k][0], 1);

  } /* end for i */

/*statements 920 up to last card of the original RITZIT program */
 
for (k=0; k<ip; k++)
  for (i=k; i<ip; i++) b[k][i]=0.0;

for (k=0; k<ip; k++) {
   opb(a, nrow, ncol, &x[k][0], &w[0][0], trans);

   for (i=0; i<=k; i++)
        for (l=0; l<n; l++)
          b[i][k]=b[i][k] - x[i][l] * w[0][l];
   }

sis2_tred2(0, ip, b, d, u, b);
sis2_tql2(0, ip, d, u, b);

/*reordering of eigenvalues and eigenvectors according to the magnitudes 
  of the former */

for (i=0; i< ip; i++)
  if (i!=ip-1){ 
       k=i;
       t=d[i];
       ii=i+1;
       for (j=ii; j<ip; j++)
         if (fabs(d[j]) > fabs(t)){
             k=j; 
             t=d[j];
         }
       if (k!=i) {
          d[k] = d[i];
          d[i] = t;

          for (j=0; j<ip; j++){
            s[j]= b[i][j];
            b[i][j] = b[k][j];
            b[k][j] = s[j];
          }
      }
   d[i]=-d[i];
   }

  for (i=0; i<ip; i++)
   for(j=0; j<n; j++)
    w[i][j]=0.0;

  for (i=0; i<ip; i++)
   for (k=0; k<ip; k++){
    TmpRes=b[i][k];
    for (j=0; j<n; j++)
      w[i][j]+=TmpRes * x[k][j];
   }

     for (i=0; i<ip; i++)
        for (j=0; j<n; j++)
           x[i][j] = w[i][j];

     d[kp-1]=e;

#undef NSIG
#undef NMAX

  /* free memory at the end of ritzit*/
  free(rq);
  return;

}  /*end ritzit*/

