// File: s_las2.cc -- Implements the las2 class
// Author: Suvrit Sra
// Date: 14 nov, 2003

/*************************************************************************
 THE ORIGINAL SVDPAKC COPYRIGHT
                           (c) Copyright 1993
                        University of Tennessee
                          All Rights Reserved                          
 *************************************************************************/

#include <cstdio>
#include <cmath>
#include <cerrno>
#include <cstdlib>
#include <cstring>
#include <unistd.h>
#include <fcntl.h>

#include "s_las2.h"


#define STORQ 1
#define RETRQ 2
#define STORP 3
#define RETRP 4
#define MAXLL 2

using namespace ssvd;
/***********************************************************************
 *                                                                     *
 *				landr()				       *
 *        Lanczos algorithm with selective orthogonalization           *
 *                    Using Simon's Recurrence                         *
 *                       (double precision)                            *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   landr() is the LAS2 driver routine that, upon entry,
     (1)  checks for the validity of input parameters of the 
	  B-eigenproblem 
     (2)  determines several machine constants
     (3)  makes a Lanczos run
     (4)  calculates B-eigenvectors (singular vectors of A) if requested 
	  by user


   arguments
   ---------

   (input)
   n        dimension of the eigenproblem for A'A
   lanmax   upper limit of desired number of Lanczos steps
   maxprs   upper limit of desired number of eigenpairs
   nnzero   number of nonzeros in matrix A
   endl     left end of interval containing unwanted eigenvalues of B
   endr     right end of interval containing unwanted eigenvalues of B
   vectors  1 indicates both eigenvalues and eigenvectors are wanted
              and they can be found in output file lav2; 
	    0 indicates only eigenvalues are wanted
   kappa    relative accuracy of ritz values acceptable as eigenvalues
	      of B (singular values of A)
   r        work array

   (output)
   j        number of Lanczos steps actually taken                     
   neig     number of ritz values stabilized                           
   ritz     array to hold the ritz values                              
   bnd      array to hold the error bounds


   External parameters
   -------------------

   Defined and documented in las2.h


   local parameters
   -------------------

   ibeta    radix for the floating-point representation
   it       number of base ibeta digits in the floating-point significand
   irnd     floating-point addition rounded or chopped
   machep   machine relative precision or round-off error
   negeps   largest negative integer
   wptr	    array of pointers each pointing to a work space


   Functions used
   --------------

   MISC         dmax, machar, check_parameters
   LAS2         ritvec, lanso

 ***********************************************************************/

long s_las2::landr(long fpo2, FILE* fp, bool ascii, long n, 
		   long lanmax, long maxprs, long nnzero, double endl,  
		   double endr, bool vectors, double kappa, double *ritz, 
		   double *bnd, double *r)

{
   long i, size, ibeta, it, irnd, machep, negep;
   double *wptr[10], *tptr, *tptr2;

   /* data validation */
   if (check_parameters(maxprs, lanmax, n, endl, endr, vectors, nnzero))
      return(-1);

   /* Compute machine precision */ 
   machar(&ibeta, &it, &irnd, &machep, &negep);

   eps1 = eps * sqrt( (double) n );
   reps = sqrt(eps);
   eps34 = reps * sqrt(reps);

   /* allocate work area and initialize pointers         *
    * ptr             symbolic name         size         *
    * wptr[0]             r                  n           *
    * wptr[1]             q		     n           *
    * wptr[2]             q_previous         n           *
    * wptr[3]             p		     n           *
    * wptr[4]             p_previous         n           *
    * wptr[5]             wrk                n           *
    * wptr[6]             alf              lanmax        *
    * wptr[7]             eta              lanmax        *
    * wptr[8]             oldeta           lanmax        *
    * wptr[9]             bet              lanmax+1      */
   //fprintf(stderr, "s_las2::landr() about to start doing computation\n");
   size = 5 * n + (lanmax * 4 + 1);
   tptr = NULL;
   if (!(tptr = (double *) malloc(size * sizeof(double)))){
      perror("FIRST MALLOC FAILED in LANDR()");
      exit(errno);
   }
   tptr2 = tptr;
   wptr[0] = r;
   for (i = 1; i <= 5; i++) {
      wptr[i] = tptr;
      tptr += n;
   }
   for (i = 6; i <= 9; i++) {
      wptr[i] = tptr;
      tptr += lanmax;
   }

   lanso(n, lanmax, maxprs, endl, endr, ritz, bnd, wptr);

   /* compute eigenvectors */
   if (vectors) {
      if (!(xv1 = (double *) malloc((nrow+ncol)*(j+1)*sizeof(double))) ||
	  !(xv2 = (double *) malloc(ncol * sizeof(double)))) {
	  perror("SECOND MALLOC FAILED in LANDR()");
	  exit(errno);
	 }
      kappa = dmax(fabs(kappa), eps34);
      ritvec(fpo2, fp, ascii, n, kappa, ritz, bnd, wptr[6], wptr[9], wptr[4], wptr[5]);
   }

   free(tptr2);
   return(0);
}





/***********************************************************************
 *                                                                     *
 *                         stpone()                                    *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   Function performs the first step of the Lanczos algorithm.  It also
   does a step of extended local re-orthogonalization.

   Arguments 
   ---------

   (input)
   n      dimension of the eigenproblem for matrix B

   (output)
   ierr   error flag
   wptr   array of pointers that point to work space that contains
	    wptr[0]             r[j]
	    wptr[1]             q[j]
	    wptr[2]             q[j-1]
	    wptr[3]             p
	    wptr[4]             p[j-1]
	    wptr[6]             diagonal elements of matrix T 


   Functions used
   --------------

   BLAS		daxpy, datx, dcopy, ddot, dscal
   USER		store, opb
   LAS		startv

 ***********************************************************************/

void s_las2::stpone(long n, double *wrkptr[])

{
   double t, *alf;
   alf = wrkptr[6];

   /* get initial vector; default is random */
   rnm = startv(n, wrkptr);
   if (rnm == 0.0 || ierr != 0) return;

   /* normalize starting vector */
   t = 1.0 / rnm;
   datx(n, t, wrkptr[0], 1, wrkptr[1], 1);
   dscal(n, t, wrkptr[3], 1);

   /* take the first step */
   mopb(n, wrkptr[3], wrkptr[0]);
   alf[0] = ddot(n, wrkptr[0], 1, wrkptr[3], 1);
   daxpy(n, -alf[0], wrkptr[1], 1, wrkptr[0], 1);
   t = ddot(n, wrkptr[0], 1, wrkptr[3], 1);
   daxpy(n, -t, wrkptr[1], 1, wrkptr[0], 1);
   alf[0] += t;
   dcopy(n, wrkptr[0], 1, wrkptr[4], 1);
   rnm = sqrt(ddot(n, wrkptr[0], 1, wrkptr[4], 1));
   anorm = rnm + fabs(alf[0]);
   tol = reps * anorm;
   return;
}


/***********************************************************************
 *                                                                     *
 *                         startv()                                    *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   Function delivers a starting vector in r and returns |r|; it returns 
   zero if the range is spanned, and ierr is non-zero if no starting 
   vector within range of operator can be found.

   Parameters 
   ---------

   (input)
   n      dimension of the eigenproblem matrix B
   wptr   array of pointers that point to work space
   j      starting index for a Lanczos run
   eps    machine epsilon (relative precision)

   (output)
   wptr   array of pointers that point to work space that contains
	  r[j], q[j], q[j-1], p[j], p[j-1]
   ierr   error flag (nonzero if no starting vector can be found)

   Functions used
   --------------

   BLAS		ddot, dcopy, daxpy
   USER		opb, store
   MISC		random

 ***********************************************************************/

double s_las2::startv(long n, double *wptr[])

{
   double rnm2, *r, t;
   long irand;
   long id, i;

   /* get initial vector; default is random */
   rnm2 = ddot(n, wptr[0], 1, wptr[0], 1);
   irand = 918273 + j;
   r = wptr[0];
   for (id = 0; id < 3; id++) {
      if (id > 0 || j > 0 || rnm2 == 0) 
	 for (i = 0; i < n; i++) r[i] = mrandom(&irand);
      dcopy(n, wptr[0], 1, wptr[3], 1);

      /* apply operator to put r in range (essential if m singular) */
      mopb(n, wptr[3], wptr[0]);
      dcopy(n, wptr[0], 1, wptr[3], 1);
      rnm2 = ddot(n, wptr[0], 1, wptr[3], 1);
      if (rnm2 > 0.0) break;
   }

   /* fatal error */
   if (rnm2 <= 0.0) {
      ierr = 8192;
      return(-1);
   }
   if (j > 0) {
      for (i = 0; i < j; i++) {
         store(n, RETRQ, i, wptr[5]);
	 t = -ddot(n, wptr[3], 1, wptr[5], 1);
	 daxpy(n, t, wptr[5], 1, wptr[0], 1);
      }

      /* make sure q[j] is orthogonal to q[j-1] */
      t = ddot(n, wptr[4], 1, wptr[0], 1);
      daxpy(n, -t, wptr[2], 1, wptr[0], 1);
      dcopy(n, wptr[0], 1, wptr[3], 1);
      t = ddot(n, wptr[3], 1, wptr[0], 1);
      if (t <= eps * rnm2) t = 0.0;
      rnm2 = t;
   }
   return(sqrt(rnm2));
}



/***********************************************************************
 *                                                                     *
 *				imtqlb()			       *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   imtqlb() is a translation of a Fortran version of the Algol
   procedure IMTQL1, Num. Math. 12, 377-383(1968) by Martin and 
   Wilkinson, as modified in Num. Math. 15, 450(1970) by Dubrulle.  
   Handbook for Auto. Comp., vol.II-Linear Algebra, 241-248(1971).  
   See also B. T. Smith et al, Eispack Guide, Lecture Notes in 
   Computer Science, Springer-Verlag, (1976).

   The function finds the eigenvalues of a symmetric tridiagonal
   matrix by the implicit QL method.


   Arguments 
   ---------

   (input)
   n      order of the symmetric tridiagonal matrix                   
   d      contains the diagonal elements of the input matrix           
   e      contains the subdiagonal elements of the input matrix in its
          last n-1 positions.  e[0] is arbitrary	             

   (output)
   d      contains the eigenvalues in ascending order.  if an error
            exit is made, the eigenvalues are correct and ordered for
            indices 0,1,...ierr, but may not be the smallest eigenvalues.
   e      has been destroyed.					    
   ierr   set to zero for normal return, j if the j-th eigenvalue has
            not been determined after 30 iterations.		    

   Functions used
   --------------

   UTILITY	fsign
   MISC		pythag

 ***********************************************************************/

void s_las2::imtqlb(long n, double d[], double e[], double bnd[])

{
   long last, l, m, i, iteration;

   /* various flags */
   long exchange, convergence, underflow;	

   double b, test, g, r, s, c, p, f;

   if (n == 1) return;
   ierr = 0;
   bnd[0] = 1.0;
   last = n - 1;
   for (i = 1; i < n; i++) {
      bnd[i] = 0.0;
      e[i-1] = e[i];
   }
   e[last] = 0.0;
   for (l = 0; l < n; l++) {
      iteration = 0;
      while (iteration <= 30) {
	 for (m = l; m < n; m++) {
	    convergence = FALSE;
	    if (m == last) break;
	    else {
	       test = fabs(d[m]) + fabs(d[m+1]);
	       if (test + fabs(e[m]) == test) convergence = TRUE;
	    }
	    if (convergence) break;
	 }
	    p = d[l]; 
	    f = bnd[l]; 
	 if (m != l) {
	    if (iteration == 30) {
	       ierr = l;
	       return;
	    }
	    iteration += 1;
	    /*........ form shift ........*/
	    g = (d[l+1] - p) / (2.0 * e[l]);
	    r = pythag(g, 1.0);
	    g = d[m] - p + e[l] / (g + fsign(r, g));
	    s = 1.0;
	    c = 1.0;
	    p = 0.0;
	    underflow = FALSE;
	    i = m - 1;
	    while (underflow == FALSE && i >= l) {
	       f = s * e[i];
	       b = c * e[i];
	       r = pythag(f, g);
	       e[i+1] = r;
	       if (r == 0.0) underflow = TRUE;
	       else {
		  s = f / r;
		  c = g / r;
		  g = d[i+1] - p;
		  r = (d[i] - g) * s + 2.0 * c * b;
		  p = s * r;
		  d[i+1] = g + p;
		  g = c * r - b;
		  f = bnd[i+1];
		  bnd[i+1] = s * bnd[i] + c * f;
		  bnd[i] = c * bnd[i] - s * f;
		  i--;
	       }
	    }       /* end while (underflow != FALSE && i >= l) */
	    /*........ recover from underflow .........*/
	    if (underflow) {
	       d[i+1] -= p;
	       e[m] = 0.0;
	    }
	    else {
	       d[l] -= p;
	       e[l] = g;
	       e[m] = 0.0;
	    }
	 } 		       		   /* end if (m != l) */
	 else {

            /* order the eigenvalues */
	    exchange = TRUE;
	    if (l != 0) {
	       i = l;
	       while (i >= 1 && exchange == TRUE) {
	          if (p < d[i-1]) {
		     d[i] = d[i-1];
		     bnd[i] = bnd[i-1];
	             i--;
	          }
	          else exchange = FALSE;
	       }
	    }
	    if (exchange) i = 0;
	    d[i] = p;
	    bnd[i] = f; 
	    iteration = 31;
	 }
      }			       /* end while (iteration <= 30) */
   }				   /* end for (l=0; l<n; l++) */
   return;
}


/***********************************************************************
 *                                                                     *
 *			lanczos_step()                                 *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   Function embodies a single Lanczos step

   Arguments 
   ---------

   (input)
   n        dimension of the eigenproblem for matrix B
   first    start of index through loop				      
   last     end of index through loop				     
   wptr	    array of pointers pointing to work space		    
   alf	    array to hold diagonal of the tridiagonal matrix T
   eta      orthogonality estimate of Lanczos vectors at step j   
   oldeta   orthogonality estimate of Lanczos vectors at step j-1
   bet      array to hold off-diagonal of T                     
   ll       number of intitial Lanczos vectors in local orthog. 
              (has value of 0, 1 or 2)			
   enough   stop flag			

   Functions used
   --------------

   BLAS		ddot, dscal, daxpy, datx, dcopy
   USER		store
   LAS		purge, ortbnd, startv
   UTILITY	imin, imax

 ***********************************************************************/

void s_las2::lanczos_step(long n, long first, long last, double *wptr[],
		  double *alf, double *eta, double *oldeta,
		  double *bet, long *ll, long *enough)

{
   double t, *mid;
   long i;

   for (j=first; j<last; j++) {
      mid     = wptr[2];
      wptr[2] = wptr[1];
      wptr[1] = mid;
      mid     = wptr[3];
      wptr[3] = wptr[4];
      wptr[4] = mid;

      store(n, STORQ, j-1, wptr[2]);
      if (j-1 < MAXLL) store(n, STORP, j-1, wptr[4]);
      bet[j] = rnm;

      /* restart if invariant subspace is found */
      if (!bet[j]) {
	 rnm = startv(n, wptr);
	 if (ierr) return;
	 if (!rnm) *enough = TRUE;
      }
      if (*enough) break;

      /* take a lanczos step */
      t = 1.0 / rnm;
      datx(n, t, wptr[0], 1, wptr[1], 1);
      dscal(n, t, wptr[3], 1);
      mopb(n, wptr[3], wptr[0]);
      daxpy(n, -rnm, wptr[2], 1, wptr[0], 1);
      alf[j] = ddot(n, wptr[0], 1, wptr[3], 1);
      daxpy(n, -alf[j], wptr[1], 1, wptr[0], 1);

      /* orthogonalize against initial lanczos vectors */
      if (j <= MAXLL && (fabs(alf[j-1]) > 4.0 * fabs(alf[j])))
	 *ll = j;  
      for (i=0; i < imin(*ll, j-1); i++) {
	 store(n, RETRP, i, wptr[5]);
	 t = ddot(n, wptr[5], 1, wptr[0], 1);
	 store(n, RETRQ, i, wptr[5]);
         daxpy(n, -t, wptr[5], 1, wptr[0], 1);
	 eta[i] = eps1;
	 oldeta[i] = eps1;
      }

      /* extended local reorthogonalization */
      t = ddot(n, wptr[0], 1, wptr[4], 1);
      daxpy(n, -t, wptr[2], 1, wptr[0], 1);
      if (bet[j] > 0.0) bet[j] = bet[j] + t;
      t = ddot(n, wptr[0], 1, wptr[3], 1);
      daxpy(n, -t, wptr[1], 1, wptr[0], 1);
      alf[j] = alf[j] + t;
      dcopy(n, wptr[0], 1, wptr[4], 1);
      rnm = sqrt(ddot(n, wptr[0], 1, wptr[4], 1));
      anorm = bet[j] + fabs(alf[j]) + rnm;
      tol = reps * anorm;

      /* update the orthogonality bounds */
      ortbnd(alf, eta, oldeta, bet);

      /* restore the orthogonality state when needed */
      purge(n,*ll,wptr[0],wptr[1],wptr[4],wptr[3],wptr[5],eta,oldeta);
      if (rnm <= tol) rnm = 0.0;
   }
   return;
}



/***********************************************************************
 *                                                                     *
 *                          ortbnd()                                   *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   Funtion updates the eta recurrence

   Arguments 
   ---------

   (input)
   alf      array to hold diagonal of the tridiagonal matrix T         
   eta      orthogonality estimate of Lanczos vectors at step j        
   oldeta   orthogonality estimate of Lanczos vectors at step j-1     
   bet      array to hold off-diagonal of T                          
   n        dimension of the eigenproblem for matrix B		    
   j        dimension of T					  
   rnm	    norm of the next residual vector			 
   eps1	    roundoff estimate for dot product of two unit vectors

   (output)
   eta      orthogonality estimate of Lanczos vectors at step j+1     
   oldeta   orthogonality estimate of Lanczos vectors at step j        


   Functions used
   --------------

   BLAS		dswap

 ***********************************************************************/

void s_las2::ortbnd(double *alf, double *eta, double *oldeta, double *bet)

{
   long i;
   if (j < 1) return;
   if (rnm) {
      if (j > 1) {
	 oldeta[0] = (bet[1] * eta[1] + (alf[0]-alf[j]) * eta[0] -
		      bet[j] * oldeta[0]) / rnm + eps1;
      }
      for (i=1; i<=j-2; i++) 
	 oldeta[i] = (bet[i+1] * eta[i+1] + (alf[i]-alf[j]) * eta[i] +
		      bet[i] * eta[i-1] - bet[j] * oldeta[i])/rnm + eps1;
   }
   oldeta[j-1] = eps1;
   dswap(j, oldeta, 1, eta, 1);  
   eta[j] = eps1;
   return;
}


/***********************************************************************
 *                                                                     *
 *				purge()                                *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   Function examines the state of orthogonality between the new Lanczos
   vector and the previous ones to decide whether re-orthogonalization 
   should be performed


   Arguments 
   ---------

   (input)
   n        dimension of the eigenproblem for matrix B		       
   ll       number of intitial Lanczos vectors in local orthog.       
   r        residual vector to become next Lanczos vector            
   q        current Lanczos vector			           
   ra       previous Lanczos vector
   qa       previous Lanczos vector
   wrk      temporary vector to hold the previous Lanczos vector
   eta      state of orthogonality between r and prev. Lanczos vectors 
   oldeta   state of orthogonality between q and prev. Lanczos vectors
   j        current Lanczos step				     

   (output)
   r	    residual vector orthogonalized against previous Lanczos 
	      vectors
   q        current Lanczos vector orthogonalized against previous ones


   Functions used
   --------------

   BLAS		daxpy,  dcopy,  idamax,  ddot
   USER		store

 ***********************************************************************/

void s_las2::purge(long n, long ll, double *r, double *q, double *ra,  
	   double *qa, double *wrk, double *eta, double *oldeta)

{
   double t, tq, tr, reps1;
   long k, iteration, flag, i;

   if (j < ll+2) return; 

   k = idamax(j - (ll+1), &eta[ll], 1) + ll;
   if (fabs(eta[k]) > reps) {
      reps1 = eps1 / reps;
      iteration = 0;
      flag = TRUE;
      while (iteration < 2 && flag) {
	 if (rnm > tol) {

	    /* bring in a lanczos vector t and orthogonalize both 
	     * r and q against it */
	    tq = 0.0;
	    tr = 0.0;
            for (i = ll; i < j; i++) {
	       store(n,  RETRQ,  i,  wrk);
	       t   = -ddot(n, qa, 1, wrk, 1);
	       tq += fabs(t);
	       daxpy(n,  t,  wrk,  1,  q,  1);
	       t   = -ddot(n, ra, 1, wrk, 1);
	       tr += fabs(t);
	       daxpy(n, t, wrk, 1, r, 1);
	    }
	    dcopy(n, q, 1, qa, 1);
	    t   = -ddot(n, r, 1, qa, 1);
	    tr += fabs(t);
	    daxpy(n, t, q, 1, r, 1);
	    dcopy(n, r, 1, ra, 1);
	    rnm = sqrt(ddot(n, ra, 1, r, 1));
	    if (tq <= reps1 && tr <= reps1 * rnm) flag = FALSE;
	 }
	 iteration++;
      }
      for (i = ll; i <= j; i++) { 
	 eta[i] = eps1;
	 oldeta[i] = eps1;
      }
   }
   return;
}


/***************************************************************** 
 * Function finds the index of element having max. absolute value*
 * based on FORTRAN 77 routine from Linpack by J. Dongarra       *
 *****************************************************************/ 

long s_las2::idamax(long n,double *dx,long incx)

{
   long ix,i,imax;
   double dtemp, dmax;

   if (n < 1) return(-1);
   if (n == 1) return(0);
   if (incx == 0) return(-1);

   if (incx < 0) ix = (-n+1) * incx;
   else ix = 0;
   imax = ix;
   dx += ix;
   dmax = fabs(*dx);
   for (i=1; i < n; i++) {
      ix += incx;
      dx += incx;
      dtemp = fabs(*dx);
      if (dtemp > dmax) {
	 dmax = dtemp;
	 imax = ix;
      }
   }
   return(imax);
}


/***********************************************************************
 *                                                                     *
 *			error_bound()                                  *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   Function massages error bounds for very close ritz values by placing 
   a gap between them.  The error bounds are then refined to reflect 
   this.


   Arguments 
   ---------

   (input)
   endl     left end of interval containing unwanted eigenvalues
   endr     right end of interval containing unwanted eigenvalues
   ritz     array to store the ritz values
   bnd      array to store the error bounds
   enough   stop flag


   Functions used
   --------------

   BLAS		idamax
   UTILITY	dmin

 ***********************************************************************/

void s_las2::error_bound(long *enough, double endl, double endr, 
		 double *ritz, double *bnd)
{
   long mid, i;
   double gapl, gap;

   /* massage error bounds for very close ritz values */
   mid = idamax(j + 1, bnd, 1);

   for (i=((j+1) + (j-1)) / 2; i >= mid + 1; i -= 1)
      if (fabs(ritz[i-1] - ritz[i]) < eps34 * fabs(ritz[i])) 
         if (bnd[i] > tol && bnd[i-1] > tol) {
	    bnd[i-1] = sqrt(bnd[i] * bnd[i] + bnd[i-1] * bnd[i-1]);
	    bnd[i] = 0.0;
         }
	 

   for (i=((j+1) - (j-1)) / 2; i <= mid - 1; i +=1 ) 
      if (fabs(ritz[i+1] - ritz[i]) < eps34 * fabs(ritz[i])) 
	 if (bnd[i] > tol && bnd[i+1] > tol) {
	    bnd[i+1] = sqrt(bnd[i] * bnd[i] + bnd[i+1] * bnd[i+1]);
	    bnd[i] = 0.0;
         }

   /* refine the error bounds */
   neig = 0;
   gapl = ritz[j] - ritz[0];
   for (i = 0; i <= j; i++) {
      gap = gapl;
      if (i < j) gapl = ritz[i+1] - ritz[i];
      gap = dmin(gap, gapl);
      if (gap > bnd[i]) bnd[i] = bnd[i] * (bnd[i] / gap);
      if (bnd[i] <= 16.0 * eps * fabs(ritz[i])) {
	 neig += 1;
	 if (!*enough) *enough = endl < ritz[i] && ritz[i] < endr;
      }
   }   
   return;
}




/***********************************************************************
 *                                                                     *
 *                        ritvec()                                     *
 * 	    Function computes the singular vectors of matrix A	       *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   This function is invoked by landr() only if eigenvectors of the cyclic
   eigenproblem are desired.  When called, ritvec() computes the 
   singular vectors of A and writes the result to an unformatted file.


   Parameters
   ----------

   (input)
   n	      dimension of the eigenproblem for matrix B
   kappa      relative accuracy of ritz values acceptable as 
		eigenvalues of B (singular values of A)
   ritz       array of ritz values
   bnd        array of error bounds
   alf        array of diagonal elements of the tridiagonal matrix T
   bet        array of off-diagonal elements of T
   w1, w2     work space
   fp_out2    pointer to unformatted output file
   j	      number of Lanczos iterations performed

   (output)
   xv1        array of eigenvectors of B (singular vectors of A)
   ierr	      error code
	      0 for normal return from imtql2()
	      k if convergence did not occur for k-th eigenvalue in 
		imtql2()
   nsig       number of accepted ritz values based on kappa

   (local)
   s	      work array which is initialized to the identity matrix of
	      order (j + 1) upon calling imtql2().  After the call, 
	      s contains the orthonormal eigenvectors of the symmetric 
	      tridiagonal matrix T

   Functions used
   --------------

   BLAS		dscal, dcopy, daxpy
   USER		store
   		imtql2

 ***********************************************************************/

void s_las2::ritvec(long fpo2, FILE* fp, bool ascii, long n, 
		    double kappa, double *ritz, double *bnd,
		    double *alf, double *bet, double *w1, double *w2)

{
   long js, jsq, i, k, size, id, id2, tmp;
   double *s;

   js = j + 1;
   jsq = js * js;
   size = sizeof(double) * n;

   if(!(s = (double *) malloc (jsq * sizeof(double)))) {
      perror("MALLOC FAILED in RITVEC()");
      exit(errno);
   }

   /* initialize s as an identity matrix */
   for (i = 0; i < jsq; i++) s[i] = 0.0;
   for (i = 0; i < jsq; i+= (js + 1)) s[i] = 1.0;
   dcopy(js, alf, 1, w1, -1);
   dcopy(j, &bet[1], 1, &w2[1], -1);

   /* compute eigenvalues of T */
   imtql2(js, js, w1, w2, s);
   if (ierr) return;

   /* on return w1 contains eigenvalues in ascending order and s 
    * contains the corresponding eigenvectors */

   //   write(fpo2, (char *)&n, sizeof(n));
   //write(fpo2, (char *)&js, sizeof(js));
   //write(fpo2, (char *)&kappa, sizeof(kappa));

   nsig = 0;
   id = 0;
   // Count the number of valid singular values
   for (k = 0; k < js; k++) {
     if (bnd[k] <=  kappa*fabs(ritz[k]) )
       ++nsig;
   }

   write_header(fpo2, fp, ascii, nrow, ncol, nsig, "las2");       

   nsig = 0;
   id2 = jsq - js;
   for (k = 0; k < js; k++) {
      tmp = id2;
      if (bnd[k] <= kappa * fabs(ritz[k]) && k > js-neig-1) {
	 for (i = 0; i < n; i++) w1[i] = 0.0;
         for (i = 0; i < js; i++) {
	    store(n, RETRQ, i, w2);
	    daxpy(n, s[tmp], w2, 1, w1, 1);
	    tmp -= js;
         }
	 if (ascii) {
	   for (int zz = 0; zz < size/sizeof(double); zz++) {
	     fprintf(fp, "%f ", w1[zz]);
	   }
	   fprintf(fp, "\n");
	 }
	 else
	   write(fpo2, (char *)w1, size);

      /* store the w1 vector row-wise in array xv1;   
       * size of xv1 is (j+1) * (nrow+ncol) elements 
       * and each vector, even though only ncol long,
       * will have (nrow+ncol) elements in xv1.      
       * It is as if xv1 is a 2-d array (j+1) by     
       * (nrow+ncol) and each vector occupies a row  */

         for (i = 0; i < n; i++) xv1[id++] = w1[i];
         id += nrow;
         nsig += 1;
      }
      id2++;
   }
   free(s);
   return;
}


/***********************************************************************
 *                                                                     *
 *                     store()                                         *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   store() is a user-supplied function which, based on the input
   operation flag, stores to or retrieves from memory a vector.


   Arguments 
   ---------

   (input)
   n       length of vector to be stored or retrieved
   isw     operation flag:
	     isw = 1 request to store j-th Lanczos vector q(j)
	     isw = 2 request to retrieve j-th Lanczos vector q(j)
	     isw = 3 request to store q(j) for j = 0 or 1
	     isw = 4 request to retrieve q(j) for j = 0 or 1
   s	   contains the vector to be stored for a "store" request 

   (output)
   s	   contains the vector retrieved for a "retrieve" request 

   Functions used
   --------------

   BLAS		dcopy

 ***********************************************************************/

void s_las2::store(long n, long isw, long j, double *s)

{
   switch(isw) {
   case STORQ:	dcopy(n, s, 1, &a[(j+MAXLL) * n], 1);
		break;
   case RETRQ:	dcopy(n, &a[(j+MAXLL) * n], 1, s, 1);
		break;
   case STORP:	if (j >= MAXLL) {
		   fprintf(stderr,"store: (STORP) j >= MAXLL \n");
		   break;
		}
		dcopy(n, s, 1, &a[j*n], 1);
		break;
   case RETRP:	if (j >= MAXLL) {
		   fprintf(stderr,"store: (RETRP) j >= MAXLL \n");
		   break;
		}
   		dcopy(n, &a[j*n], 1, s, 1);
		break;
   }
   return;
}



/***********************************************************************
 *                                                                     *
 *                          lanso()                                    *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************

   Description
   -----------

   Function determines when the restart of the Lanczos algorithm should 
   occur and when it should terminate.

   Arguments 
   ---------

   (input)
   n         dimension of the eigenproblem for matrix B
   lanmax    upper limit of desired number of lanczos steps           
   maxprs    upper limit of desired number of eigenpairs             
   endl      left end of interval containing unwanted eigenvalues
   endr      right end of interval containing unwanted eigenvalues
   ritz      array to hold the ritz values                       
   bnd       array to hold the error bounds                          
   wptr      array of pointers that point to work space:            
  	       wptr[0]-wptr[5]  six vectors of length n		
  	       wptr[6] array to hold diagonal of the tridiagonal matrix T
  	       wptr[9] array to hold off-diagonal of T	
  	       wptr[7] orthogonality estimate of Lanczos vectors at 
		 step j
 	       wptr[8] orthogonality estimate of Lanczos vectors at 
		 step j-1

   (output)
   j         number of Lanczos steps actually taken
   neig      number of ritz values stabilized
   ritz      array to hold the ritz values
   bnd       array to hold the error bounds
   ierr      (globally declared) error flag
	     ierr = 8192 if stpone() fails to find a starting vector
	     ierr = k if convergence did not occur for k-th eigenvalue
		    in imtqlb()
	     ierr = 0 otherwise


   Functions used
   --------------

   LAS		stpone, error_bound, lanczos_step
   MISC		dsort2
   UTILITY	imin, imax

 ***********************************************************************/

void s_las2::lanso(long n, long lanmax, long maxprs, double endl,
	   double endr, double *ritz, double *bnd, double *wptr[])

{
   double *r, *alf, *eta, *oldeta, *bet, *wrk;
   long ll, first, last, ENOUGH, id1, id2, id3, i, l;

   r = wptr[0];
   alf = wptr[6];
   eta = wptr[7];
   oldeta = wptr[8];
   bet = wptr[9];
   wrk = wptr[5];
   j = 0;

   /* take the first step */
   stpone(n, wptr);
   if (!rnm || ierr) return;
   eta[0] = eps1;
   oldeta[0] = eps1;
   ll = 0;
   first = 1;
   last = imin(maxprs + imax(8,maxprs), lanmax);
   ENOUGH = FALSE;
   id1 = 0;
   while (id1 < maxprs && !ENOUGH) {
      if (rnm <= tol) rnm = 0.0;

      /* the actual lanczos loop */
      lanczos_step(n, first, last, wptr, alf, eta, oldeta, bet, &ll,
		   &ENOUGH);
      if (ENOUGH) j = j - 1;
      else j = last - 1;
      first = j + 1;
      bet[j+1] = rnm;

      /* analyze T */
      l = 0;
      for (id2 = 0; id2 < j; id2++) {
	 if (l > j) break;
         for (i = l; i <= j; i++) if (!bet[i+1]) break;
	 if (i > j) i = j;

	 /* now i is at the end of an unreduced submatrix */
	 dcopy(i-l+1, &alf[l],   1, &ritz[l],  -1);
	 dcopy(i-l,   &bet[l+1], 1, &wrk[l+1], -1);

	 imtqlb(i-l+1, &ritz[l], &wrk[l], &bnd[l]);

	 if (ierr) {
	    printf("IMTQLB FAILED TO CONVERGE (IERR = %d)\n",
		    ierr);
	    printf("L = %d    I = %d\n", l, i);
            for (id3 = l; id3 <= i; id3++) 
	       printf("%d  %lg  %lg  %lg\n",
	       id3, ritz[id3], wrk[id3], bnd[id3]);
	 }
         for (id3 = l; id3 <= i; id3++) 
	    bnd[id3] = rnm * fabs(bnd[id3]);
	 l = i + 1;
      }

      /* sort eigenvalues into increasing order */
      dsort2((j+1) / 2, j + 1, ritz, bnd);

      /* massage error bounds for very close ritz values */
      error_bound(&ENOUGH, endl, endr, ritz, bnd);

      /* should we stop? */
      if (neig < maxprs) {
	 if (!neig) last = first + 9;
	 else last = first + imax(3, 1 + ((j-5) * (maxprs-neig)) / neig);
	 last = imin(last, lanmax);
      }
      else ENOUGH = TRUE;
      ENOUGH = ENOUGH || first >= lanmax;
      id1++;
   }
   store(n, STORQ, j, wptr[1]);
   return;
}



/**************************************************************
 * multiplication of matrix B by vector x, where B = A'A,     *
 * and A is nrow by ncol (nrow >> ncol). Hence, B is of order *
 * n = ncol (y stores product vector).		              *
 **************************************************************/

void s_las2::mopb(long n, double *x, double *y)

{
   long i, j, end;
   
   mxvcount += 2;
   for (i = 0; i < n; i++) y[i] = 0.0;
   for (i = 0; i < nrow; i++) ztemp[i] = 0.0;

   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++) 
	 ztemp[rowind[j]] += value[j] * (*x); 
      x++;
   }
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++) 
	 *y += value[j] * ztemp[rowind[j]];
      y++;
   }
   return;
}

/***********************************************************
 * multiplication of matrix A by vector x, where A is 	   *
 * nrow by ncol (nrow >> ncol).  y stores product vector.  *
 ***********************************************************/

void s_las2::mopa(double *x, double *y)

{
   long end, i, j;
   
   mxvcount += 1;
   for (i = 0; i < nrow; i++) y[i] = 0.0;

   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++)
	 y[rowind[j]] += value[j] * x[i]; 
   }
   return;
}
