d3/db4/slaed2_8f_source.html

*> \brief \b SLAED2 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download SLAED2 + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaed2.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaed2.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaed2.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,

*                          Q2, INDX, INDXC, INDXP, COLTYP, INFO )

*

*       .. Scalar Arguments ..

*       INTEGER            INFO, K, LDQ, N, N1

*       REAL               RHO

*       ..

*       .. Array Arguments ..

*       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),

*      $                   INDXQ( * )

*       REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),

*      $                   W( * ), Z( * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> SLAED2 merges the two sets of eigenvalues together into a single

*> sorted set.  Then it tries to deflate the size of the problem.

*> There are two ways in which deflation can occur:  when two or more

*> eigenvalues are close together or if there is a tiny entry in the

*> Z vector.  For each such occurrence the order of the related secular

*> equation problem is reduced by one.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[out] K

*> \verbatim

*>          K is INTEGER

*>         The number of non-deflated eigenvalues, and the order of the

*>         related secular equation. 0 <= K <=N.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>         The dimension of the symmetric tridiagonal matrix.  N >= 0.

*> \endverbatim

*>

*> \param[in] N1

*> \verbatim

*>          N1 is INTEGER

*>         The location of the last eigenvalue in the leading sub-matrix.

*>         min(1,N) <= N1 <= N/2.

*> \endverbatim

*>

*> \param[in,out] D

*> \verbatim

*>          D is REAL array, dimension (N)

*>         On entry, D contains the eigenvalues of the two submatrices to

*>         be combined.

*>         On exit, D contains the trailing (N-K) updated eigenvalues

*>         (those which were deflated) sorted into increasing order.

*> \endverbatim

*>

*> \param[in,out] Q

*> \verbatim

*>          Q is REAL array, dimension (LDQ, N)

*>         On entry, Q contains the eigenvectors of two submatrices in

*>         the two square blocks with corners at (1,1), (N1,N1)

*>         and (N1+1, N1+1), (N,N).

*>         On exit, Q contains the trailing (N-K) updated eigenvectors

*>         (those which were deflated) in its last N-K columns.

*> \endverbatim

*>

*> \param[in] LDQ

*> \verbatim

*>          LDQ is INTEGER

*>         The leading dimension of the array Q.  LDQ >= max(1,N).

*> \endverbatim

*>

*> \param[in,out] INDXQ

*> \verbatim

*>          INDXQ is INTEGER array, dimension (N)

*>         The permutation which separately sorts the two sub-problems

*>         in D into ascending order.  Note that elements in the second

*>         half of this permutation must first have N1 added to their

*>         values. Destroyed on exit.

*> \endverbatim

*>

*> \param[in,out] RHO

*> \verbatim

*>          RHO is REAL

*>         On entry, the off-diagonal element associated with the rank-1

*>         cut which originally split the two submatrices which are now

*>         being recombined.

*>         On exit, RHO has been modified to the value required by

*>         SLAED3.

*> \endverbatim

*>

*> \param[in] Z

*> \verbatim

*>          Z is REAL array, dimension (N)

*>         On entry, Z contains the updating vector (the last

*>         row of the first sub-eigenvector matrix and the first row of

*>         the second sub-eigenvector matrix).

*>         On exit, the contents of Z have been destroyed by the updating

*>         process.

*> \endverbatim

*>

*> \param[out] DLAMDA

*> \verbatim

*>          DLAMDA is REAL array, dimension (N)

*>         A copy of the first K eigenvalues which will be used by

*>         SLAED3 to form the secular equation.

*> \endverbatim

*>

*> \param[out] W

*> \verbatim

*>          W is REAL array, dimension (N)

*>         The first k values of the final deflation-altered z-vector

*>         which will be passed to SLAED3.

*> \endverbatim

*>

*> \param[out] Q2

*> \verbatim

*>          Q2 is REAL array, dimension (N1**2+(N-N1)**2)

*>         A copy of the first K eigenvectors which will be used by

*>         SLAED3 in a matrix multiply (SGEMM) to solve for the new

*>         eigenvectors.

*> \endverbatim

*>

*> \param[out] INDX

*> \verbatim

*>          INDX is INTEGER array, dimension (N)

*>         The permutation used to sort the contents of DLAMDA into

*>         ascending order.

*> \endverbatim

*>

*> \param[out] INDXC

*> \verbatim

*>          INDXC is INTEGER array, dimension (N)

*>         The permutation used to arrange the columns of the deflated

*>         Q matrix into three groups:  the first group contains non-zero

*>         elements only at and above N1, the second contains

*>         non-zero elements only below N1, and the third is dense.

*> \endverbatim

*>

*> \param[out] INDXP

*> \verbatim

*>          INDXP is INTEGER array, dimension (N)

*>         The permutation used to place deflated values of D at the end

*>         of the array.  INDXP(1:K) points to the nondeflated D-values

*>         and INDXP(K+1:N) points to the deflated eigenvalues.

*> \endverbatim

*>

*> \param[out] COLTYP

*> \verbatim

*>          COLTYP is INTEGER array, dimension (N)

*>         During execution, a label which will indicate which of the

*>         following types a column in the Q2 matrix is:

*>         1 : non-zero in the upper half only;

*>         2 : dense;

*>         3 : non-zero in the lower half only;

*>         4 : deflated.

*>         On exit, COLTYP(i) is the number of columns of type i,

*>         for i=1 to 4 only.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit.

*>          < 0:  if INFO = -i, the i-th argument had an illegal value.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \date September 2012

*

*> \ingroup auxOTHERcomputational

*

*> \par Contributors:

*  ==================

*>

*> Jeff Rutter, Computer Science Division, University of California

*> at Berkeley, USA \n

*>  Modified by Francoise Tisseur, University of Tennessee

*>

*  =====================================================================

      SUBROUTINE slaed2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,

     $                   q2, indx, indxc, indxp, coltyp, info )

*

*  -- LAPACK computational routine (version 3.4.2) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*     September 2012

*

*     .. Scalar Arguments ..

      INTEGER            info, k, ldq, n, n1

      REAL               rho

*     ..

*     .. Array Arguments ..

      INTEGER            coltyp( * ), indx( * ), indxc( * ), indxp( * ),

     $                   indxq( * )

      REAL               d( * ), dlamda( * ), q( ldq, * ), q2( * ),

     $                   w( * ), z( * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      REAL               mone, zero, one, two, eight

      parameter( mone = -1.0e0, zero = 0.0e0, one = 1.0e0,

     $                   two = 2.0e0, eight = 8.0e0 )

*     ..

*     .. Local Arrays ..

      INTEGER            ctot( 4 ), psm( 4 )

*     ..

*     .. Local Scalars ..

      INTEGER            ct, i, imax, iq1, iq2, j, jmax, js, k2, n1p1,

     $                   n2, nj, pj

      REAL               c, eps, s, t, tau, tol

*     ..

*     .. External Functions ..

      INTEGER            isamax

      REAL               slamch, slapy2

      EXTERNAL           isamax, slamch, slapy2

*     ..

*     .. External Subroutines ..

      EXTERNAL           scopy, slacpy, slamrg, srot, sscal, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, min, sqrt

*     ..

*     .. Executable Statements ..

*

*     Test the input parameters.

*

      info = 0

*

      IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( ldq.LT.max( 1, n ) ) THEN

         info = -6

      ELSE IF( min( 1, ( n / 2 ) ).GT.n1 .OR. ( n / 2 ).LT.n1 ) THEN

         info = -3

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'SLAED2', -info )

         return

      END IF

*

*     Quick return if possible

*

      IF( n.EQ.0 )

     $   return

*

      n2 = n - n1

      n1p1 = n1 + 1

*

      IF( rho.LT.zero ) THEN

         CALL sscal( n2, mone, z( n1p1 ), 1 )

      END IF

*

*     Normalize z so that norm(z) = 1.  Since z is the concatenation of

*     two normalized vectors, norm2(z) = sqrt(2).

*

      t = one / sqrt( two )

      CALL sscal( n, t, z, 1 )

*

*     RHO = ABS( norm(z)**2 * RHO )

*

      rho = abs( two*rho )

*

*     Sort the eigenvalues into increasing order

*

      DO 10 i = n1p1, n

         indxq( i ) = indxq( i ) + n1

   10 continue

*

*     re-integrate the deflated parts from the last pass

*

      DO 20 i = 1, n

         dlamda( i ) = d( indxq( i ) )

   20 continue

      CALL slamrg( n1, n2, dlamda, 1, 1, indxc )

      DO 30 i = 1, n

         indx( i ) = indxq( indxc( i ) )

   30 continue

*

*     Calculate the allowable deflation tolerance

*

      imax = isamax( n, z, 1 )

      jmax = isamax( n, d, 1 )

      eps = slamch( 'Epsilon' )

      tol = eight*eps*max( abs( d( jmax ) ), abs( z( imax ) ) )

*

*     If the rank-1 modifier is small enough, no more needs to be done

*     except to reorganize Q so that its columns correspond with the

*     elements in D.

*

      IF( rho*abs( z( imax ) ).LE.tol ) THEN

         k = 0

         iq2 = 1

         DO 40 j = 1, n

            i = indx( j )

            CALL scopy( n, q( 1, i ), 1, q2( iq2 ), 1 )

            dlamda( j ) = d( i )

            iq2 = iq2 + n

   40    continue

         CALL slacpy( 'A', n, n, q2, n, q, ldq )

         CALL scopy( n, dlamda, 1, d, 1 )

         go to 190

      END IF

*

*     If there are multiple eigenvalues then the problem deflates.  Here

*     the number of equal eigenvalues are found.  As each equal

*     eigenvalue is found, an elementary reflector is computed to rotate

*     the corresponding eigensubspace so that the corresponding

*     components of Z are zero in this new basis.

*

      DO 50 i = 1, n1

         coltyp( i ) = 1

   50 continue

      DO 60 i = n1p1, n

         coltyp( i ) = 3

   60 continue

*

*

      k = 0

      k2 = n + 1

      DO 70 j = 1, n

         nj = indx( j )

         IF( rho*abs( z( nj ) ).LE.tol ) THEN

*

*           Deflate due to small z component.

*

            k2 = k2 - 1

            coltyp( nj ) = 4

            indxp( k2 ) = nj

            IF( j.EQ.n )

     $         go to 100

         ELSE

            pj = nj

            go to 80

         END IF

   70 continue

   80 continue

      j = j + 1

      nj = indx( j )

      IF( j.GT.n )

     $   go to 100

      IF( rho*abs( z( nj ) ).LE.tol ) THEN

*

*        Deflate due to small z component.

*

         k2 = k2 - 1

         coltyp( nj ) = 4

         indxp( k2 ) = nj

      ELSE

*

*        Check if eigenvalues are close enough to allow deflation.

*

         s = z( pj )

         c = z( nj )

*

*        Find sqrt(a**2+b**2) without overflow or

*        destructive underflow.

*

         tau = slapy2( c, s )

         t = d( nj ) - d( pj )

         c = c / tau

         s = -s / tau

         IF( abs( t*c*s ).LE.tol ) THEN

*

*           Deflation is possible.

*

            z( nj ) = tau

            z( pj ) = zero

            IF( coltyp( nj ).NE.coltyp( pj ) )

     $         coltyp( nj ) = 2

            coltyp( pj ) = 4

            CALL srot( n, q( 1, pj ), 1, q( 1, nj ), 1, c, s )

            t = d( pj )*c**2 + d( nj )*s**2

            d( nj ) = d( pj )*s**2 + d( nj )*c**2

            d( pj ) = t

            k2 = k2 - 1

            i = 1

   90       continue

            IF( k2+i.LE.n ) THEN

               IF( d( pj ).LT.d( indxp( k2+i ) ) ) THEN

                  indxp( k2+i-1 ) = indxp( k2+i )

                  indxp( k2+i ) = pj

                  i = i + 1

                  go to 90

               ELSE

                  indxp( k2+i-1 ) = pj

               END IF

            ELSE

               indxp( k2+i-1 ) = pj

            END IF

            pj = nj

         ELSE

            k = k + 1

            dlamda( k ) = d( pj )

            w( k ) = z( pj )

            indxp( k ) = pj

            pj = nj

         END IF

      END IF

      go to 80

  100 continue

*

*     Record the last eigenvalue.

*

      k = k + 1

      dlamda( k ) = d( pj )

      w( k ) = z( pj )

      indxp( k ) = pj

*

*     Count up the total number of the various types of columns, then

*     form a permutation which positions the four column types into

*     four uniform groups (although one or more of these groups may be

*     empty).

*

      DO 110 j = 1, 4

         ctot( j ) = 0

  110 continue

      DO 120 j = 1, n

         ct = coltyp( j )

         ctot( ct ) = ctot( ct ) + 1

  120 continue

*

*     PSM(*) = Position in SubMatrix (of types 1 through 4)

*

      psm( 1 ) = 1

      psm( 2 ) = 1 + ctot( 1 )

      psm( 3 ) = psm( 2 ) + ctot( 2 )

      psm( 4 ) = psm( 3 ) + ctot( 3 )

      k = n - ctot( 4 )

*

*     Fill out the INDXC array so that the permutation which it induces

*     will place all type-1 columns first, all type-2 columns next,

*     then all type-3's, and finally all type-4's.

*

      DO 130 j = 1, n

         js = indxp( j )

         ct = coltyp( js )

         indx( psm( ct ) ) = js

         indxc( psm( ct ) ) = j

         psm( ct ) = psm( ct ) + 1

  130 continue

*

*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA

*     and Q2 respectively.  The eigenvalues/vectors which were not

*     deflated go into the first K slots of DLAMDA and Q2 respectively,

*     while those which were deflated go into the last N - K slots.

*

      i = 1

      iq1 = 1

      iq2 = 1 + ( ctot( 1 )+ctot( 2 ) )*n1

      DO 140 j = 1, ctot( 1 )

         js = indx( i )

         CALL scopy( n1, q( 1, js ), 1, q2( iq1 ), 1 )

         z( i ) = d( js )

         i = i + 1

         iq1 = iq1 + n1

  140 continue

*

      DO 150 j = 1, ctot( 2 )

         js = indx( i )

         CALL scopy( n1, q( 1, js ), 1, q2( iq1 ), 1 )

         CALL scopy( n2, q( n1+1, js ), 1, q2( iq2 ), 1 )

         z( i ) = d( js )

         i = i + 1

         iq1 = iq1 + n1

         iq2 = iq2 + n2

  150 continue

*

      DO 160 j = 1, ctot( 3 )

         js = indx( i )

         CALL scopy( n2, q( n1+1, js ), 1, q2( iq2 ), 1 )

         z( i ) = d( js )

         i = i + 1

         iq2 = iq2 + n2

  160 continue

*

      iq1 = iq2

      DO 170 j = 1, ctot( 4 )

         js = indx( i )

         CALL scopy( n, q( 1, js ), 1, q2( iq2 ), 1 )

         iq2 = iq2 + n

         z( i ) = d( js )

         i = i + 1

  170 continue

*

*     The deflated eigenvalues and their corresponding vectors go back

*     into the last N - K slots of D and Q respectively.

*

      IF( k.LT.n ) THEN

         CALL slacpy( 'A', n, ctot( 4 ), q2( iq1 ), n,

     $                q( 1, k+1 ), ldq )

         CALL scopy( n-k, z( k+1 ), 1, d( k+1 ), 1 )

      END IF

*

*     Copy CTOT into COLTYP for referencing in SLAED3.

*

      DO 180 j = 1, 4

         coltyp( j ) = ctot( j )

  180 continue

*

  190 continue

      return

*

*     End of SLAED2

*

      END