◆ dlarrv2()

subroutine dlarrv2	(	integer	n,
		double precision	vl,
		double precision	vu,
		double precision, dimension( * )	d,
		double precision, dimension( * )	l,
		double precision	pivmin,
		integer, dimension( * )	isplit,
		integer	m,
		integer	dol,
		integer	dou,
		integer	needil,
		integer	neediu,
		double precision	minrgp,
		double precision	rtol1,
		double precision	rtol2,
		double precision, dimension( * )	w,
		double precision, dimension( * )	werr,
		double precision, dimension( * )	wgap,
		integer, dimension( * )	iblock,
		integer, dimension( * )	indexw,
		double precision, dimension( * )	gers,
		double precision, dimension( * )	sdiam,
		double precision, dimension( ldz, * )	z,
		integer	ldz,
		integer, dimension( * )	isuppz,
		double precision, dimension( * )	work,
		integer, dimension( * )	iwork,
		logical	vstart,
		logical	finish,
		integer	maxcls,
		integer	ndepth,
		integer	parity,
		integer	zoffset,
		integer	info
	)
Definition at line 1 of file dlarrv2.f.
 
*  -- ScaLAPACK auxiliary routine (version 2.0) --
*     Univ. of Tennessee, Univ. of California Berkeley, Univ of Colorado Denver
*     July 4, 2010
*
      IMPLICIT NONE
*
*     .. Scalar Arguments ..
      INTEGER            DOL, DOU, INFO, LDZ, M, N, MAXCLS,
     $                   NDEPTH, NEEDIL, NEEDIU, PARITY, ZOFFSET
      DOUBLE PRECISION   MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU
      LOGICAL VSTART, FINISH 
*     ..
*     .. Array Arguments ..
      INTEGER            IBLOCK( * ), INDEXW( * ), ISPLIT( * ),
     $                   ISUPPZ( * ), IWORK( * )
      DOUBLE PRECISION   D( * ), GERS( * ), L( * ), SDIAM( * ), 
     $                   W( * ), WERR( * ),
     $                   WGAP( * ), WORK( * )
      DOUBLE PRECISION  Z( LDZ, * )
*
*  Purpose
*  =======
*
*  DLARRV2 computes the eigenvectors of the tridiagonal matrix
*  T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T.
*  The input eigenvalues should have been computed by DLARRE2A
*  or by precious calls to DLARRV2.
*
*  The major difference between the parallel and the sequential construction
*  of the representation tree is that in the parallel case, not all eigenvalues
*  of a given cluster might be computed locally. Other processors might "own"
*  and refine part of an eigenvalue cluster. This is crucial for scalability. 
*  Thus there might be communication necessary before the current level of the 
*  representation tree can be parsed. 
*
*  Please note:
*  1. The calling sequence has two additional INTEGER parameters, 
*     DOL and DOU, that should satisfy M>=DOU>=DOL>=1. 
*     These parameters are only relevant for the case JOBZ = 'V'.
*     DLARRV2  ONLY computes the eigenVECTORS 
*     corresponding to eigenvalues DOL through DOU in W. (That is,
*     instead of computing the eigenvectors belonging to W(1) 
*     through W(M), only the eigenvectors belonging to eigenvalues
*     W(DOL) through W(DOU) are computed. In this case, only the
*     eigenvalues DOL:DOU are guaranteed to be accurately refined
*     to all figures by Rayleigh-Quotient iteration.
*
*  2. The additional arguments VSTART, FINISH, NDEPTH, PARITY, ZOFFSET 
*     are included as a thread-safe implementation equivalent to SAVE variables.
*     These variables store details about the local representation tree which is
*     computed layerwise. For scalability reasons, eigenvalues belonging to the 
*     locally relevant representation tree might be computed on other processors.
*     These need to be communicated before the inspection of the RRRs can proceed
*     on any given layer.           
*     Note that only when the variable FINISH is true, the computation has ended
*     All eigenpairs between DOL and DOU have been computed. M is set = DOU - DOL + 1.
*
*  3. DLARRV2 needs more workspace in Z than the sequential DLARRV. 
*     It is used to store the conformal embedding of the local representation tree.  
* 
*  Arguments
*  =========
*
*  N       (input) INTEGER
*          The order of the matrix.  N >= 0.
*
*  VL      (input) DOUBLE PRECISION
*  VU      (input) DOUBLE PRECISION
*          Lower and upper bounds of the interval that contains the desired
*          eigenvalues. VL < VU. Needed to compute gaps on the left or right
*          end of the extremal eigenvalues in the desired RANGE.
*          VU is currently not used but kept as parameter in case needed.
*
*  D       (input/output) DOUBLE PRECISION array, dimension (N)
*          On entry, the N diagonal elements of the diagonal matrix D.
*          On exit, D is overwritten.
*
*  L       (input/output) DOUBLE PRECISION array, dimension (N)
*          On entry, the (N-1) subdiagonal elements of the unit
*          bidiagonal matrix L are in elements 1 to N-1 of L 
*          (if the matrix is not splitted.) At the end of each block
*          is stored the corresponding shift as given by DLARRE.
*          On exit, L is overwritten.
*
*  PIVMIN  (in) DOUBLE PRECISION
*          The minimum pivot allowed in the sturm sequence.
*
*  ISPLIT  (input) INTEGER array, dimension (N)
*          The splitting points, at which T breaks up into blocks.
*          The first block consists of rows/columns 1 to
*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
*          through ISPLIT( 2 ), etc.
*
*  M       (input) INTEGER
*          The total number of input eigenvalues.  0 <= M <= N.
*
*  DOL     (input) INTEGER
*  DOU     (input) INTEGER
*          If the user wants to compute only selected eigenvectors from all
*          the eigenvalues supplied, he can specify an index range DOL:DOU.
*          Or else the setting DOL=1, DOU=M should be applied. 
*          Note that DOL and DOU refer to the order in which the eigenvalues 
*          are stored in W. 
*          If the user wants to compute only selected eigenpairs, then
*          the columns DOL-1 to DOU+1 of the eigenvector space Z contain the
*          computed eigenvectors. All other columns of Z are set to zero.
*          If DOL > 1, then Z(:,DOL-1-ZOFFSET) is used.
*          If DOU < M, then Z(:,DOU+1-ZOFFSET) is used.
*
*
*  NEEDIL  (input/output) INTEGER
*  NEEDIU  (input/output) INTEGER
*          Describe which are the left and right outermost eigenvalues 
*          that still need to be included in the computation. These indices
*          indicate whether eigenvalues from other processors are needed to
*          correctly compute the conformally embedded representation tree.
*          When DOL<=NEEDIL<=NEEDIU<=DOU, all required eigenvalues are local
*          to the processor and no communication is required to compute its
*          part of the representation tree.
*
*  MINRGP  (input) DOUBLE PRECISION
*          The minimum relativ gap threshold to decide whether an eigenvalue
*          or a cluster boundary is reached.
*
*  RTOL1   (input) DOUBLE PRECISION
*  RTOL2   (input) DOUBLE PRECISION
*           Parameters for bisection.
*           An interval [LEFT,RIGHT] has converged if
*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
*
*  W       (input/output) DOUBLE PRECISION array, dimension (N)
*          The first M elements of W contain the APPROXIMATE eigenvalues for
*          which eigenvectors are to be computed. The eigenvalues
*          should be grouped by split-off block and ordered from
*          smallest to largest within the block. (The output array
*          W from DSTEGR2A is expected here.) Furthermore, they are with
*          respect to the shift of the corresponding root representation
*          for their block. On exit, 
*          W holds those UNshifted eigenvalues
*          for which eigenvectors have already been computed.
*
*  WERR    (input/output) DOUBLE PRECISION array, dimension (N)
*          The first M elements contain the semiwidth of the uncertainty
*          interval of the corresponding eigenvalue in W
*
*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N)
*          The separation from the right neighbor eigenvalue in W.
*
*  IBLOCK  (input) INTEGER array, dimension (N)
*          The indices of the blocks (submatrices) associated with the
*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue
*          W(i) belongs to the first block from the top, =2 if W(i)
*          belongs to the second block, etc.
*
*  INDEXW  (input) INTEGER array, dimension (N)
*          The indices of the eigenvalues within each block (submatrix);
*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the
*          i-th eigenvalue W(i) is the 10-th eigenvalue in the second block.
*
*  GERS    (input) DOUBLE PRECISION array, dimension (2*N)
*          The N Gerschgorin intervals (the i-th Gerschgorin interval
*          is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should
*          be computed from the original UNshifted matrix.
*          Currently NOT used but kept as parameter in case it becomes
*          needed in the future.
*
*  SDIAM   (input) DOUBLE PRECISION array, dimension (N)
*          The spectral diameters for all unreduced blocks.
*
*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) )
*          If INFO = 0, the first M columns of Z contain the
*          orthonormal eigenvectors of the matrix T
*          corresponding to the input eigenvalues, with the i-th
*          column of Z holding the eigenvector associated with W(i).
*          In the distributed version, only a subset of columns
*          is accessed, see DOL,DOU and ZOFFSET.
*
*  LDZ     (input) INTEGER
*          The leading dimension of the array Z.  LDZ >= 1, and if
*          JOBZ = 'V', LDZ >= max(1,N).
*
*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) )
*          The support of the eigenvectors in Z, i.e., the indices
*          indicating the nonzero elements in Z. The I-th eigenvector
*          is nonzero only in elements ISUPPZ( 2*I-1 ) through
*          ISUPPZ( 2*I ).
*
*  WORK    (workspace) DOUBLE PRECISION array, dimension (12*N)
*
*  IWORK   (workspace) INTEGER array, dimension (7*N)
*
*  VSTART  (input/output) LOGICAL 
*          .TRUE. on initialization, set to .FALSE. afterwards.
*
*  FINISH  (input/output) LOGICAL 
*          A flag that indicates whether all eigenpairs have been computed.
*
*  MAXCLS  (input/output) INTEGER
*          The largest cluster worked on by this processor in the 
*          representation tree.
*
*  NDEPTH  (input/output) INTEGER
*          The current depth of the representation tree. Set to
*          zero on initial pass, changed when the deeper levels of
*          the representation tree are generated. 
*
*  PARITY  (input/output) INTEGER
*          An internal parameter needed for the storage of the
*          clusters on the current level of the representation tree.
*
*  ZOFFSET (input) INTEGER
*          Offset for storing the eigenpairs when Z is distributed
*          in 1D-cyclic fashion.
*
*  INFO    (output) INTEGER
*          = 0:  successful exit
*
*          > 0:  A problem occured in DLARRV2.
*          < 0:  One of the called subroutines signaled an internal problem. 
*                Needs inspection of the corresponding parameter IINFO
*                for further information.
*
*          =-1:  Problem in DLARRB2 when refining a child's eigenvalues.
*          =-2:  Problem in DLARRF2 when computing the RRR of a child.
*                When a child is inside a tight cluster, it can be difficult
*                to find an RRR. A partial remedy from the user's point of
*                view is to make the parameter MINRGP smaller and recompile.
*                However, as the orthogonality of the computed vectors is 
*                proportional to 1/MINRGP, the user should be aware that 
*                he might be trading in precision when he decreases MINRGP.
*          =-3:  Problem in DLARRB2 when refining a single eigenvalue
*                after the Rayleigh correction was rejected.
*          = 5:  The Rayleigh Quotient Iteration failed to converge to 
*                full accuracy in MAXITR steps.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            MAXITR, USE30, USE31, USE32A, USE32B
      parameter( maxitr = 10, use30=30, use31=31, 
     $                     use32a=3210, use32b = 3211 )
      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, HALF
      parameter( zero = 0.0d0, one = 1.0d0, 
     $                     two = 2.0d0, three = 3.0d0,
     $                     four = 4.0d0, half = 0.5d0)
*     ..
*     .. Local Arrays ..
      INTEGER            SPLACE( 4 )
*     ..
*     .. Local Scalars ..
      LOGICAL            DELREF, ESKIP, NEEDBS, ONLYLC, STP2II, TRYMID,
     $                   TRYRQC, USEDBS, USEDRQ
      INTEGER            I, IBEGIN, IEND, II, IINCLS, IINDC1, IINDC2,
     $                   IINDWK, IINFO, IM, IN, INDEIG, INDLD, INDLLD,
     $                   INDWRK, ISUPMN, ISUPMX, ITER, ITMP1, ITWIST, J,
     $                   JBLK, K, KK, MINIWSIZE, MINWSIZE, MYWFST,
     $                   MYWLST, NCLUS, NEGCNT, NEWCLS, NEWFST, NEWFTT,
     $                   NEWLST, NEWSIZ, OFFSET, OLDCLS, OLDFST, OLDIEN,
     $                   OLDLST, OLDNCL, P, Q, VRTREE, WBEGIN, WEND,
     $                   WINDEX, WINDMN, WINDPL, ZFROM, ZINDEX, ZTO,
     $                   ZUSEDL, ZUSEDU, ZUSEDW
      DOUBLE PRECISION   AVGAP, BSTRES, BSTW, ENUFGP, EPS, FUDGE, GAP,
     $                   GAPTOL, LAMBDA, LEFT, LGAP, LGPVMN, LGSPDM,
     $                   LOG_IN, MGAP, MINGMA, MYERR, NRMINV, NXTERR,
     $                   ORTOL, RESID, RGAP, RIGHT, RLTL30, RQCORR,
     $                   RQTOL, SAVEGP, SGNDEF, SIGMA, SPDIAM, SSIGMA,
     $                   TAU, TMP, TOL, ZTZ
*     ..
*     .. External Functions ..
      DOUBLE PRECISION  DLAMCH
      DOUBLE PRECISION   DDOT, DNRM2
      EXTERNAL           ddot, dlamch, dnrm2
*     ..
*     .. External Subroutines ..
      EXTERNAL           daxpy, dcopy, dlar1va, dlarrb2,
     $                   dlarrf2, dlaset, dscal
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC abs, dble, max, min, sqrt
*     ..
*     .. Executable Statements ..
*     ..
 
 
      info = 0
*     The first N entries of WORK are reserved for the eigenvalues
      indld = n+1
      indlld= 2*n+1
      indwrk= 3*n+1
      minwsize = 12 * n
 
*     IWORK(IINCLS+JBLK) holds the number of clusters on the current level 
*     of the reptree for block JBLK  
      iincls = 0
*     IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current
*     layer and the one above.
      iindc1 = n
      iindc2 = 2*n
      iindwk = 3*n + 1
      miniwsize = 7 * n
 
      eps = dlamch( 'Precision' )
      rqtol = two * eps
 
      tryrqc = .true.
*     Decide which representation tree criterion to use
*     USE30 = Lapack 3.0 criterion
*     USE31 = LAPACK 3.1 criterion
*     USE32A = two criteria, determines singletons with USE31, and groups with avgap.
*     USE32B = two criteria, determines singletons with USE31, and groups with USE30.
      vrtree = use32a
*
      lgpvmn = log( pivmin )
 
 
      IF(vstart) THEN
*      
*        PREPROCESSING, DONE ONLY IN THE FIRST CALL
*
         vstart = .false.   
*
         maxcls = 1
 
*        Set delayed eigenvalue refinement
*        In order to enable more parallelism, refinement
*        must be done immediately and cannot be delayed until
*        the next representation tree level.
         delref = .false.
 
         DO 1 i= 1,minwsize
            work( i ) = zero 
 1       CONTINUE
 
         DO 2 i= 1,miniwsize
            iwork( i ) = 0
 2       CONTINUE
 
         zusedl = 1
         IF(dol.GT.1) THEN
*           Set lower bound for use of Z
            zusedl = dol-1
         ENDIF
         zusedu = m
         IF(dou.LT.m) THEN
*           Set lower bound for use of Z
            zusedu = dou+1
         ENDIF
*        The width of the part of Z that is used
         zusedw = zusedu - zusedl + 1
*
         CALL dlaset( 'Full', n, zusedw, zero, zero, 
     $                    z(1,(zusedl-zoffset)), ldz )
 
*        Initialize NDEPTH, the current depth of the representation tree
         ndepth = 0
*        Initialize parity 
         parity = 1
 
*        Go through blocks, initialize data structures
         ibegin = 1
         wbegin = 1
         DO 10 jblk = 1, iblock( m )
            iend = isplit( jblk )
            sigma = l( iend )
            wend = wbegin - 1
 3          CONTINUE
            IF( wend.LT.m ) THEN
               IF( iblock( wend+1 ).EQ.jblk ) THEN
                  wend = wend + 1
                  GO TO 3
               END IF
            END IF
            IF( wend.LT.wbegin ) THEN
               iwork( iincls + jblk ) = 0
               ibegin = iend + 1
               GO TO 10
            ELSEIF( (wend.LT.dol).OR.(wbegin.GT.dou) ) THEN
               iwork( iincls + jblk ) = 0
               ibegin = iend + 1
               wbegin = wend + 1
               GO TO 10
            END IF
*           The number of eigenvalues in the current block
            im = wend - wbegin + 1
*           This is for a 1x1 block
            IF( ibegin.EQ.iend ) THEN
               iwork( iincls + jblk ) = 0
               z( ibegin, (wbegin-zoffset) ) = one
               isuppz( 2*wbegin-1 ) = ibegin
               isuppz( 2*wbegin ) = ibegin
               w( wbegin ) = w( wbegin ) + sigma
               work( wbegin ) = w( wbegin )
               ibegin = iend + 1
               wbegin = wbegin + 1
               GO TO 10
            END IF
            CALL dcopy( im, w( wbegin ), 1, 
     &                work( wbegin ), 1 )        
*           We store in W the eigenvalue approximations w.r.t. the original
*           matrix T.
            DO 5 i=1,im
               w(wbegin+i-1) = w(wbegin+i-1)+sigma
 5          CONTINUE
 
*           Initialize cluster counter for this block
            iwork( iincls + jblk ) = 1
            iwork( iindc1+ibegin ) = 1
            iwork( iindc1+ibegin+1 ) = im
*
            ibegin = iend + 1
            wbegin = wend + 1
10       CONTINUE
*
      ENDIF 
 
*     Init NEEDIL and NEEDIU
      needil = dou
      neediu = dol      
 
*     Here starts the main loop
*     Only one pass through the loop is done until no collaboration
*     with other processors is needed. 
 40   CONTINUE
 
      parity = 1 - parity
 
*     For each block, build next level of representation tree
*     if there are still remaining clusters 
      ibegin = 1
      wbegin = 1
      DO 170 jblk = 1, iblock( m )
         iend = isplit( jblk )
         sigma = l( iend )
*        Find the eigenvectors of the submatrix indexed IBEGIN
*        through IEND.
         IF(m.EQ.n) THEN
*           all eigenpairs are computed
            wend = iend
         ELSE
*           count how many wanted eigenpairs are in this block
            wend = wbegin - 1
 15         CONTINUE
            IF( wend.LT.m ) THEN
               IF( iblock( wend+1 ).EQ.jblk ) THEN
                  wend = wend + 1
                  GO TO 15
               END IF
            END IF
         ENDIF
 
         oldncl = iwork( iincls + jblk )
         IF( oldncl.EQ.0 ) THEN
            ibegin = iend + 1
            wbegin = wend + 1
            GO TO 170
         END IF
*        OLDIEN is the last index of the previous block
         oldien = ibegin - 1
*        Calculate the size of the current block
         in = iend - ibegin + 1
*        The number of eigenvalues in the current block
         im = wend - wbegin + 1
 
*        Find local spectral diameter of the block
         spdiam = sdiam(jblk)
         lgspdm = log( spdiam + pivmin )
*        Compute ORTOL parameter, similar to DSTEIN
         ortol = spdiam*1.0d-3
*        Compute average gap
         avgap = spdiam/dble(in-1)
*        Compute the minimum of average gap and ORTOL parameter 
*        This can used as a lower bound for acceptable separation 
*        between eigenvalues 
         enufgp = min(ortol,avgap)
 
*        Any 1x1 block has been treated before
 
*        loop while( OLDNCLS.GT.0 )
*        generate the next representation tree level for the current block
         IF( oldncl.GT.0 ) THEN
*           This is a crude protection against infinitely deep trees
            IF( ndepth.GT.m ) THEN
               info = -2
               RETURN
            ENDIF
*           breadth first processing of the current level of the representation
*           tree: OLDNCL = number of clusters on current level
*           NCLUS is the number of clusters for the next level of the reptree
*           reset NCLUS to count the number of child clusters 
            nclus = 0
*
            log_in = log(dble(in))
*
            rltl30 = min( 1.0d-2, one / dble( in ) )
*
            IF( parity.EQ.0 ) THEN
               oldcls = iindc1+ibegin-1
               newcls = iindc2+ibegin-1
            ELSE
               oldcls = iindc2+ibegin-1
               newcls = iindc1+ibegin-1
            END IF
*           Process the clusters on the current level
            DO 150 i = 1, oldncl
               j = oldcls + 2*i
*              OLDFST, OLDLST = first, last index of current cluster.
*                               cluster indices start with 1 and are relative
*                               to WBEGIN when accessing W, WGAP, WERR, Z
               oldfst = iwork( j-1 )
               oldlst = iwork( j )
               IF( ndepth.GT.0 ) THEN
*                 Retrieve relatively robust representation (RRR) of cluster
*                 that has been computed at the previous level
*                 The RRR is stored in Z and overwritten once the eigenvectors
*                 have been computed or when the cluster is refined 
 
                  IF((dol.EQ.1).AND.(dou.EQ.m)) THEN
*                    Get representation from location of the leftmost evalue
*                    of the cluster
                     j = wbegin + oldfst - 1
                  ELSE
                     IF(wbegin+oldfst-1.LT.dol) THEN
*                       Get representation from the left end of Z array 
                        j = dol - 1
                     ELSEIF(wbegin+oldfst-1.GT.dou) THEN
*                       Get representation from the right end of Z array 
                        j = dou
                     ELSE
                        j = wbegin + oldfst - 1
                     ENDIF
                  ENDIF
                  CALL dcopy( in, z( ibegin, (j-zoffset) ), 
     $               1, d( ibegin ), 1 )
                  CALL dcopy( in-1, z( ibegin, (j+1-zoffset) ), 
     $               1, l( ibegin ),1 )
                  sigma = z( iend, (j+1-zoffset) )
*                 Set the corresponding entries in Z to zero
                  CALL dlaset( 'Full', in, 2, zero, zero,
     $                         z( ibegin, (j-zoffset) ), ldz )
               END IF
 
*              Compute DL and DLL of current RRR
               DO 50 j = ibegin, iend-1
                  tmp = d( j )*l( j )
                  work( indld-1+j ) = tmp
                  work( indlld-1+j ) = tmp*l( j )
   50          CONTINUE
 
               IF( ndepth.GT.0 .AND. delref ) THEN
*                 P and Q are index of the first and last eigenvalue to compute
*                 within the current block
                  p = indexw( wbegin-1+oldfst )
                  q = indexw( wbegin-1+oldlst )
*                 Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET
*                 thru' Q-OFFSET elements of these arrays are to be used.
C                  OFFSET = P-OLDFST
                  offset = indexw( wbegin ) - 1
*                 perform limited bisection (if necessary) to get approximate 
*                 eigenvalues to the precision needed.
                  CALL dlarrb2( in, d( ibegin ), 
     $                         work(indlld+ibegin-1),
     $                         p, q, rtol1, rtol2, offset, 
     $                         work(wbegin),wgap(wbegin),werr(wbegin),
     $                         work( indwrk ), iwork( iindwk ),
     $                         pivmin, lgpvmn, lgspdm, in, iinfo )
                  IF( iinfo.NE.0 ) THEN
                     info = -1
                     RETURN
                  ENDIF       
*                 We also recompute the extremal gaps. W holds all eigenvalues
*                 of the unshifted matrix and must be used for computation
*                 of WGAP, the entries of WORK might stem from RRRs with 
*                 different shifts. The gaps from WBEGIN-1+OLDFST to
*                 WBEGIN-1+OLDLST are correctly computed in DLARRB2.
*                 However, we only allow the gaps to become greater since 
*                 this is what should happen when we decrease WERR
                  IF( oldfst.GT.1) THEN
                     wgap( wbegin+oldfst-2 ) = 
     $             max(wgap(wbegin+oldfst-2),
     $                 w(wbegin+oldfst-1)-werr(wbegin+oldfst-1) 
     $                 - w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) )
                  ENDIF
                  IF( wbegin + oldlst -1 .LT. wend ) THEN
                     wgap( wbegin+oldlst-1 ) = 
     $               max(wgap(wbegin+oldlst-1), 
     $                   w(wbegin+oldlst)-werr(wbegin+oldlst) 
     $                   - w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) )
                  ENDIF
*                 Each time the eigenvalues in WORK get refined, we store
*                 the newly found approximation with all shifts applied in W
                  DO 53 j=oldfst,oldlst
                     w(wbegin+j-1) = work(wbegin+j-1)+sigma
 53               CONTINUE
               ELSEIF( (ndepth.EQ.0) .OR. (.NOT.delref) ) THEN 
*                 Some of the eigenvalues might have been computed on
*                 other processors                  
*                 Recompute gaps for this cluster 
*                 (all eigenvalues have the same
*                 representation, i.e. the same shift, so this is easy)
                  DO 54 j = oldfst, oldlst-1
                     myerr = werr(wbegin + j - 1) 
                     nxterr = werr(wbegin + j )
                     wgap(wbegin+j-1) = max(wgap(wbegin+j-1),
     $                    (   work(wbegin+j) - nxterr ) 
     $                  - ( work(wbegin+j-1) + myerr )
     $                                     )
 54               CONTINUE
               END IF
*
*              Process the current node.
*
               newfst = oldfst
               DO 140 j = oldfst, oldlst
                  IF( j.EQ.oldlst ) THEN
*                    we are at the right end of the cluster, this is also the
*                    boundary of the child cluster                    
                     newlst = j
                  ELSE 
                     IF (vrtree.EQ.use30) THEN
                        IF(wgap( wbegin + j -1).GE.
     $                     rltl30 * abs(work(wbegin + j -1)) ) THEN
*                          the right relgap is big enough by the Lapack 3.0 criterion
                           newlst = j
                        ELSE
*                          inside a child cluster, the relative gap is not
*                          big enough.
                           GOTO 140
                        ENDIF
                     ELSE IF (vrtree.EQ.use31) THEN
                        IF ( wgap( wbegin + j -1).GE.
     $                      minrgp* abs( work(wbegin + j -1) ) ) THEN
*                          the right relgap is big enough by the Lapack 3.1 criterion
*                          (NEWFST,..,NEWLST) is well separated from the following 
                           newlst = j
                        ELSE
*                          inside a child cluster, the relative gap is not
*                          big enough.
                           GOTO 140
                        ENDIF
                     ELSE IF (vrtree.EQ.use32a) THEN
                        IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.
     $                      minrgp* abs(work(wbegin+j-1)) ) ) THEN
*                          the right relgap is big enough by the Lapack 3.1 criterion
*                          Found a singleton
                           newlst = j
                        ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.
     $                           ( wgap(wbegin+j-2).GE.
     $                             minrgp* abs(work(wbegin+j-1)) ).AND. 
     $                           ( wgap(wbegin+j-1).GE.
     $                             minrgp* abs(work(wbegin+j-1)) ) 
     $                     ) THEN
*                          Found a singleton
                           newlst = j
                        ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.
     $                     (minrgp*abs(work(wbegin+j-1)) ) ) 
     $                     THEN
*                          the right relgap is big enough by the Lapack 3.1 criterion
                           newlst = j
                        ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.
     $                     (wgap(wbegin+j-1).GE.enufgp))
     $                     THEN
*                          the right gap is bigger than ENUFGP
*                          Care needs to be taken with this criterion to make
*                          sure it does not create a remaining `false' singleton
                           newlst = j
                        ELSE
*                          inside a child cluster, the relative gap is not
*                          big enough.
                           GOTO 140
                        ENDIF
                     ELSE IF (vrtree.EQ.use32b) THEN
                        IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.
     $                      minrgp* abs(work(wbegin+j-1)) ) ) THEN
*                          the right relgap is big enough by the Lapack 3.1 criterion
*                          Found a singleton
                           newlst = j
                        ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.
     $                           ( wgap(wbegin+j-2).GE.
     $                             minrgp* abs(work(wbegin+j-1)) ).AND. 
     $                           ( wgap(wbegin+j-1).GE.
     $                             minrgp* abs(work(wbegin+j-1)) ) 
     $                     ) THEN
*                          Found a singleton
                           newlst = j
                        ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.
     $                     (minrgp*abs(work(wbegin+j-1)) ) ) 
     $                     THEN
*                          the right relgap is big enough by the Lapack 3.1 criterion
                           newlst = j
                        ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.
     $                     (wgap( wbegin + j -1).GE.
     $                     rltl30 * abs(work(wbegin + j -1)) ))
     $                     THEN
*                          the right relgap is big enough by the Lapack 3.0 criterion
*                          Care needs to be taken with this criterion to make
*                          sure it does not create a remaining `false' singleton
                           newlst = j
                        ELSE
*                          inside a child cluster, the relative gap is not
*                          big enough.
                           GOTO 140
                        ENDIF
                     END IF
                  END IF
 
*                 Compute size of child cluster found
                  newsiz = newlst - newfst + 1
                  maxcls = max( newsiz, maxcls )
 
*                 NEWFTT is the place in Z where the new RRR or the computed
*                 eigenvector is to be stored
                  IF((dol.EQ.1).AND.(dou.EQ.m)) THEN
*                    Store representation at location of the leftmost evalue
*                    of the cluster
                     newftt = wbegin + newfst - 1
                  ELSE
                     IF(wbegin+newfst-1.LT.dol) THEN
*                       Store representation at the left end of Z array 
                        newftt = dol - 1
                     ELSEIF(wbegin+newfst-1.GT.dou) THEN
*                       Store representation at the right end of Z array 
                        newftt = dou
                     ELSE
                        newftt = wbegin + newfst - 1
                     ENDIF
                  ENDIF
*                 FOR 1D-DISTRIBUTED Z, COMPUTE NEWFTT SHIFTED BY ZOFFSET
                  newftt = newftt - zoffset
 
                  IF( newsiz.GT.1) THEN
*
*                    Current child is not a singleton but a cluster.
*
*
                     IF((wbegin+newlst-1.LT.dol).OR.
     $                  (wbegin+newfst-1.GT.dou)) THEN
*                       if the cluster contains no desired eigenvalues
*                       skip the computation of that branch of the rep. tree
                        GOTO 139
                     ENDIF
 
*                    Compute left and right cluster gap.
*
                     IF( newfst.EQ.1 ) THEN
                        lgap = max( zero, 
     $                       w(wbegin)-werr(wbegin) - vl )
                     ELSE
                        lgap = wgap( wbegin+newfst-2 )
                     ENDIF
                     rgap = wgap( wbegin+newlst-1 )
*
*                    For larger clusters, record the largest gap observed 
*                    somewhere near the middle of the cluster as a possible 
*                    alternative position for a shift when TRYMID is TRUE
*                    
                     mgap = zero
                     IF(newsiz.GE.50) THEN
                        kk = newfst
                        DO 545 k =newfst+newsiz/3,newlst-newsiz/3
                           IF(mgap.LT.wgap( wbegin+k-1 )) THEN
                              kk = k
                              mgap = wgap( wbegin+k-1 )
                           ENDIF
 545                    CONTINUE
                     ENDIF
                     
*
*                    Record the left- and right-most eigenvalues needed
*                    for the next level of the representation tree
                     needil = min(needil,wbegin+newfst-1)
                     neediu = max(neediu,wbegin+newlst-1)
 
*
*                    Check if middle gap is large enough to shift there
*
                     gap = min(lgap,rgap)
                     trymid = (mgap.GT.gap)
 
                     splace(1) = newfst
                     splace(2) = newlst
                     IF(trymid) THEN
                        splace(3) = kk
                        splace(4) = kk+1
                     ELSE
                        splace(3) = newfst
                        splace(4) = newlst
                     ENDIF
*
*                    Compute left- and rightmost eigenvalue of child
*                    to high precision in order to shift as close
*                    as possible and obtain as large relative gaps
*                    as possible
*
 
                     DO 55 k =1,4
                        p = indexw( wbegin-1+splace(k) )
                        offset = indexw( wbegin ) - 1
                        CALL dlarrb2( in, d(ibegin), 
     $                       work( indlld+ibegin-1 ),p,p,
     $                       rqtol, rqtol, offset, 
     $                       work(wbegin),wgap(wbegin),
     $                       werr(wbegin),work( indwrk ), 
     $                       iwork( iindwk ), 
     $                       pivmin, lgpvmn, lgspdm, in, iinfo )
 55                  CONTINUE
*
*                    Compute RRR of child cluster.
*                    Note that the new RRR is stored in Z                     
*
C                    DLARRF2 needs LWORK = 2*N
                     CALL dlarrf2( in, d( ibegin ), l( ibegin ),
     $                         work(indld+ibegin-1), 
     $                         splace(1), splace(2), 
     $                         splace(3), splace(4), work(wbegin),
     $                         wgap(wbegin), werr(wbegin), trymid,
     $                         spdiam, lgap, rgap, pivmin, tau, 
     $                         z( ibegin, newftt ),
     $                         z( ibegin, newftt+1 ),
     $                         work( indwrk ), iinfo )
                     IF( iinfo.EQ.0 ) THEN
*                       a new RRR for the cluster was found by DLARRF2
*                       update shift and store it         
                        ssigma = sigma + tau
                        z( iend, newftt+1 ) = ssigma
*                       WORK() are the midpoints and WERR() the semi-width
*                       Note that the entries in W are unchanged.
                        DO 116 k = newfst, newlst
                           fudge = 
     $                          three*eps*abs(work(wbegin+k-1))
                           work( wbegin + k - 1 ) = 
     $                          work( wbegin + k - 1) - tau
                           fudge = fudge + 
     $                          four*eps*abs(work(wbegin+k-1))
*                          Fudge errors
                           werr( wbegin + k - 1 ) =
     $                          werr( wbegin + k - 1 ) + fudge
 116                    CONTINUE
 
                        nclus = nclus + 1
                        k = newcls + 2*nclus
                        iwork( k-1 ) = newfst
                        iwork( k ) = newlst
*
                        IF(.NOT.delref) THEN
                           onlylc = .true.
*
                           IF(onlylc) THEN
                              mywfst = max(wbegin-1+newfst,dol-1)
                              mywlst = min(wbegin-1+newlst,dou+1)
                           ELSE
                              mywfst = wbegin-1+newfst
                              mywlst = wbegin-1+newlst 
                           ENDIF
 
*                          Compute LLD of new RRR
                           DO 5000 k = ibegin, iend-1
                              work( indwrk-1+k ) = 
     $                        z(k,newftt)*
     $                       (z(k,newftt+1)**2)
 5000                      CONTINUE
*                          P and Q are index of the first and last 
*                          eigenvalue to compute within the new cluster
                           p = indexw( mywfst )
                           q = indexw( mywlst )
*                          Offset for the arrays WORK, WGAP and WERR
                           offset = indexw( wbegin ) - 1
*                          perform limited bisection (if necessary) to get approximate 
*                          eigenvalues to the precision needed.
                           CALL dlarrb2( in, 
     $                         z(ibegin, newftt ),
     $                         work(indwrk+ibegin-1),
     $                         p, q, rtol1, rtol2, offset, 
     $                         work(wbegin),wgap(wbegin),werr(wbegin),
     $                         work( indwrk+n ), iwork( iindwk ),
     $                         pivmin, lgpvmn, lgspdm, in, iinfo )
                           IF( iinfo.NE.0 ) THEN
                              info = -1
                              RETURN
                           ENDIF       
*                          Each time the eigenvalues in WORK get refined, we store
*                          the newly found approximation with all shifts applied in W
                           DO 5003 k=newfst,newlst
                              w(wbegin+k-1) = work(wbegin+k-1)+ssigma
 5003                      CONTINUE
                        ENDIF
*
                     ELSE    
                        info = -2
                        RETURN
                     ENDIF      
                  ELSE
*
*                    Compute eigenvector of singleton
*
                     iter = 0
*                    
                     tol = four * log_in * eps
*
                     k = newfst
                     windex = wbegin + k - 1
                     zindex = windex - zoffset
                     windmn = max(windex - 1,1)
                     windpl = min(windex + 1,m)
                     lambda = work( windex )
*                    Check if eigenvector computation is to be skipped
                     IF((windex.LT.dol).OR.
     $                  (windex.GT.dou)) THEN
                        eskip = .true.
                        GOTO 125
                     ELSE
                        eskip = .false.
                     ENDIF
                     left = work( windex ) - werr( windex )
                     right = work( windex ) + werr( windex )
                     indeig = indexw( windex )
                     IF( k .EQ. 1) THEN
                        lgap = eps*max(abs(left),abs(right))
                     ELSE
                        lgap = wgap(windmn)
                     ENDIF
                     IF( k .EQ. im) THEN
                        rgap = eps*max(abs(left),abs(right))
                     ELSE
                        rgap = wgap(windex)
                     ENDIF
                     gap = min( lgap, rgap )
                     IF(( k .EQ. 1).OR.(k .EQ. im)) THEN
                        gaptol = zero
                     ELSE
                        gaptol = gap * eps
                     ENDIF
                     isupmn = in
                     isupmx = 1
*                    Update WGAP so that it holds the minimum gap 
*                    to the left or the right. This is crucial in the
*                    case where bisection is used to ensure that the
*                    eigenvalue is refined up to the required precision.
*                    The correct value is restored afterwards.
                     savegp = wgap(windex)
                     wgap(windex) = gap
*                    We want to use the Rayleigh Quotient Correction
*                    as often as possible since it converges quadratically
*                    when we are close enough to the desired eigenvalue.
*                    However, the Rayleigh Quotient can have the wrong sign
*                    and lead us away from the desired eigenvalue. In this
*                    case, the best we can do is to use bisection.
                     usedbs = .false.
                     usedrq = .false.
*                    Bisection is initially turned off unless it is forced
                     needbs =  .NOT.tryrqc 
*                    Reset ITWIST
                     itwist = 0
 120                 CONTINUE
*                    Check if bisection should be used to refine eigenvalue
                     IF(needbs) THEN
*                       Take the bisection as new iterate
                        usedbs = .true.
*                       Temporary copy of twist index needed
                        itmp1 = itwist
                        offset = indexw( wbegin ) - 1
                        CALL dlarrb2( in, d(ibegin), 
     $                       work(indlld+ibegin-1),indeig,indeig,
     $                       zero, two*eps, offset, 
     $                       work(wbegin),wgap(wbegin),
     $                       werr(wbegin),work( indwrk ), 
     $                       iwork( iindwk ), 
     $                       pivmin, lgpvmn, lgspdm, itmp1, iinfo )
                        IF( iinfo.NE.0 ) THEN
                           info = -3
                           RETURN
                        ENDIF       
                        lambda = work( windex )
*                       Reset twist index from inaccurate LAMBDA to
*                       force computation of true MINGMA  
                        itwist = 0
                     ENDIF
*                    Given LAMBDA, compute the eigenvector.
                     CALL dlar1va( in, 1, in, lambda, d(ibegin),
     $                    l( ibegin ), work(indld+ibegin-1), 
     $                    work(indlld+ibegin-1),
     $                    pivmin, gaptol, z( ibegin, zindex),
     $                    .NOT.usedbs, negcnt, ztz, mingma,
     $                    itwist, isuppz( 2*windex-1 ),
     $                    nrminv, resid, rqcorr, work( indwrk ) )
                     IF(iter .EQ. 0) THEN
                        bstres = resid
                        bstw = lambda
                     ELSEIF(resid.LT.bstres) THEN
                        bstres = resid
                        bstw = lambda
                     ENDIF
                     isupmn = min(isupmn,isuppz( 2*windex-1 ))
                     isupmx = max(isupmx,isuppz( 2*windex ))
                     iter = iter + 1
*                    
*                    Convergence test for Rayleigh-Quotient iteration
*                    (omitted when Bisection has been used)
*
                     IF( resid.GT.tol*gap .AND. abs( rqcorr ).GT.
     $                    rqtol*abs( lambda ) .AND. .NOT. usedbs) 
     $                    THEN
*                       We need to check that the RQCORR update doesn't
*                       move the eigenvalue away from the desired one and
*                       towards a neighbor. -> protection with bisection
                        IF(indeig.LE.negcnt) THEN
*                          The wanted eigenvalue lies to the left
                           sgndef = -one
                        ELSE
*                          The wanted eigenvalue lies to the right
                           sgndef = one
                        ENDIF
*                       We only use the RQCORR if it improves the
*                       the iterate reasonably.
                        IF( ( rqcorr*sgndef.GE.zero )
     $                       .AND.( lambda + rqcorr.LE. right)
     $                       .AND.( lambda + rqcorr.GE. left)
     $                       ) THEN
                           usedrq = .true.
*                          Store new midpoint of bisection interval in WORK
                           IF(sgndef.EQ.one) THEN
*                             The current LAMBDA is on the left of the true
*                             eigenvalue
                              left = lambda
                           ELSE   
*                             The current LAMBDA is on the right of the true
*                             eigenvalue
                              right = lambda
                           ENDIF  
                           work( windex ) = 
     $                       half * (right + left)
*                          Take RQCORR since it has the correct sign and
*                          improves the iterate reasonably
                           lambda = lambda + rqcorr
*                          Update width of error interval
                           werr( windex ) =                
     $                             half * (right-left)
                        ELSE
                           needbs = .true.
                        ENDIF
                        IF(right-left.LT.rqtol*abs(lambda)) THEN
*                             The eigenvalue is computed to bisection accuracy
*                             compute eigenvector and stop
                           usedbs = .true.
                           GOTO 120
                        ELSEIF( iter.LT.maxitr ) THEN
                           GOTO 120
                        ELSEIF( iter.EQ.maxitr ) THEN
                           needbs = .true.
                           GOTO 120
                        ELSE
                           info = 5
                           RETURN
                        END IF
                     ELSE 
                        stp2ii = .false.
                        IF(usedrq .AND. usedbs .AND. 
     $                     bstres.LE.resid) THEN
                           lambda = bstw
                           stp2ii = .true.
                        ENDIF
                        IF (stp2ii) THEN
                           CALL dlar1va( in, 1, in, lambda,
     $                          d( ibegin ), l( ibegin ), 
     $                          work(indld+ibegin-1), 
     $                          work(indlld+ibegin-1),
     $                          pivmin, gaptol, 
     $                          z( ibegin, zindex ),
     $                          .NOT.usedbs, negcnt, ztz, mingma,
     $                          itwist, 
     $                          isuppz( 2*windex-1 ),
     $                          nrminv, resid, rqcorr, work( indwrk ) )
                        ENDIF
                        work( windex ) = lambda
                     END IF
*
*                    Compute FP-vector support w.r.t. whole matrix
*
                     isuppz( 2*windex-1 ) = isuppz( 2*windex-1 )+oldien
                     isuppz( 2*windex ) = isuppz( 2*windex )+oldien
                     zfrom = isuppz( 2*windex-1 )
                     zto = isuppz( 2*windex )
                     isupmn = isupmn + oldien
                     isupmx = isupmx + oldien
*                    Ensure vector is ok if support in the RQI has changed
                     IF(isupmn.LT.zfrom) THEN
                        DO 122 ii = isupmn,zfrom-1
                           z( ii, zindex ) = zero
 122                    CONTINUE
                     ENDIF   
                     IF(isupmx.GT.zto) THEN
                        DO 123 ii = zto+1,isupmx
                           z( ii, zindex ) = zero
 123                    CONTINUE
                     ENDIF   
                     CALL dscal( zto-zfrom+1, nrminv,
     $                       z( zfrom, zindex ), 1 )
 125                 CONTINUE
*                    Update W 
                     w( windex ) = lambda+sigma
*                    Recompute the gaps on the left and right
*                    But only allow them to become larger and not
*                    smaller (which can only happen through "bad"
*                    cancellation and doesn't reflect the theory
*                    where the initial gaps are underestimated due
*                    to WERR being too crude.)
                     IF(.NOT.eskip) THEN
                        IF( k.GT.1) THEN
                           wgap( windmn ) = max( wgap(windmn),
     $                          w(windex)-werr(windex) 
     $                          - w(windmn)-werr(windmn) )
                        ENDIF
                        IF( windex.LT.wend ) THEN
                           wgap( windex ) = max( savegp, 
     $                          w( windpl )-werr( windpl ) 
     $                          - w( windex )-werr( windex) )
                        ENDIF
                     ENDIF
                  ENDIF
*                 here ends the code for the current child
*
 139              CONTINUE
*                 Proceed to any remaining child nodes
                  newfst = j + 1
 140           CONTINUE
 150        CONTINUE
*           Store number of clusters             
            iwork( iincls + jblk ) = nclus
*
         END IF
         ibegin = iend + 1
         wbegin = wend + 1
 170  CONTINUE
*
*     Check if everything is done: no clusters left for 
*     this processor in any block
*
      finish = .true.
      DO 180 jblk = 1, iblock( m )      
         finish = finish .AND. (iwork(iincls + jblk).EQ.0)
 180  CONTINUE
 
      IF(.NOT.finish) THEN
         ndepth = ndepth + 1
         IF((needil.GE.dol).AND.(neediu.LE.dou)) THEN
*           Once this processor's part of the 
*           representation tree consists exclusively of eigenvalues
*           between DOL and DOU, it can work independently from all 
*           others.
            GOTO 40
         ENDIF
      ENDIF
*
 
      RETURN
*
*     End of DLARRV2
*
Here is the call graph for this function:
Here is the caller graph for this function: