◆ pdlaqr3()

recursive subroutine pdlaqr3	(	logical	wantt,
		logical	wantz,
		integer	n,
		integer	ktop,
		integer	kbot,
		integer	nw,
		double precision, dimension( * )	h,
		integer, dimension( * )	desch,
		integer	iloz,
		integer	ihiz,
		double precision, dimension( * )	z,
		integer, dimension( * )	descz,
		integer	ns,
		integer	nd,
		double precision, dimension( kbot )	sr,
		double precision, dimension( kbot )	si,
		double precision, dimension( * )	v,
		integer, dimension( * )	descv,
		integer	nh,
		double precision, dimension( * )	t,
		integer, dimension( * )	desct,
		integer	nv,
		double precision, dimension( * )	wv,
		integer, dimension( * )	descw,
		double precision, dimension( * )	work,
		integer	lwork,
		integer, dimension( * )	iwork,
		integer	liwork,
		integer	reclevel
	)
Definition at line 1 of file pdlaqr3.f.
*
*     Contribution from the Department of Computing Science and HPC2N,
*     Umea University, Sweden
*
*  -- ScaLAPACK auxiliary routine (version 2.0.1) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     Univ. of Colorado Denver and University of California, Berkeley.
*     January, 2012
*
      IMPLICIT NONE
*
*     .. Scalar Arguments ..
      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LWORK, N, ND, NH, NS,
     $                   NV, NW, LIWORK, RECLEVEL
      LOGICAL            WANTT, WANTZ
*     ..
*     .. Array Arguments ..
      INTEGER            DESCH( * ), DESCZ( * ), DESCT( * ), DESCV( * ),
     $                   DESCW( * ), IWORK( * )
      DOUBLE PRECISION   H( * ), SI( KBOT ), SR( KBOT ), T( * ),
     $                   V( * ), WORK( * ), WV( * ),
     $                   Z( * )
*     ..
*
*  Purpose
*  =======
*
*  Aggressive early deflation:
*
*  This subroutine accepts as input an upper Hessenberg matrix H and
*  performs an orthogonal similarity transformation designed to detect
*  and deflate fully converged eigenvalues from a trailing principal
*  submatrix.  On output H has been overwritten by a new Hessenberg
*  matrix that is a perturbation of an orthogonal similarity
*  transformation of H.  It is to be hoped that the final version of H
*  has many zero subdiagonal entries.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  WANTT   (global input) LOGICAL
*          If .TRUE., then the Hessenberg matrix H is fully updated
*          so that the quasi-triangular Schur factor may be
*          computed (in cooperation with the calling subroutine).
*          If .FALSE., then only enough of H is updated to preserve
*          the eigenvalues.
*
*  WANTZ   (global input) LOGICAL
*          If .TRUE., then the orthogonal matrix Z is updated so
*          so that the orthogonal Schur factor may be computed
*          (in cooperation with the calling subroutine).
*          If .FALSE., then Z is not referenced.
*
*  N       (global input) INTEGER
*          The order of the matrix H and (if WANTZ is .TRUE.) the
*          order of the orthogonal matrix Z.
*
*  KTOP    (global input) INTEGER
*          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
*          KBOT and KTOP together determine an isolated block
*          along the diagonal of the Hessenberg matrix.
*
*  KBOT    (global input) INTEGER
*          It is assumed without a check that either
*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
*          determine an isolated block along the diagonal of the
*          Hessenberg matrix.
*
*  NW      (global input) INTEGER
*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
*
*  H       (local input/output) DOUBLE PRECISION array, dimension
*             (DESCH(LLD_),*)
*          On input the initial N-by-N section of H stores the
*          Hessenberg matrix undergoing aggressive early deflation.
*          On output H has been transformed by an orthogonal
*          similarity transformation, perturbed, and the returned
*          to Hessenberg form that (it is to be hoped) has some
*          zero subdiagonal entries.
*
*  DESCH   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix H.
*
*  ILOZ    (global input) INTEGER
*  IHIZ    (global input) INTEGER
*          Specify the rows of Z to which transformations must be
*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
*
*  Z       (input/output) DOUBLE PRECISION array, dimension
*             (DESCH(LLD_),*)
*          IF WANTZ is .TRUE., then on output, the orthogonal
*          similarity transformation mentioned above has been
*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
*          If WANTZ is .FALSE., then Z is unreferenced.
*
*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix Z.
*
*  NS      (global output) INTEGER
*          The number of unconverged (ie approximate) eigenvalues
*          returned in SR and SI that may be used as shifts by the
*          calling subroutine.
*
*  ND      (global output) INTEGER
*          The number of converged eigenvalues uncovered by this
*          subroutine.
*
*  SR      (global output) DOUBLE PRECISION array, dimension KBOT
*  SI      (global output) DOUBLE PRECISION array, dimension KBOT
*          On output, the real and imaginary parts of approximate
*          eigenvalues that may be used for shifts are stored in
*          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
*          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
*          The real and imaginary parts of converged eigenvalues
*          are stored in SR(KBOT-ND+1) through SR(KBOT) and
*          SI(KBOT-ND+1) through SI(KBOT), respectively.
*
*  V       (global workspace) DOUBLE PRECISION array, dimension 
*             (DESCV(LLD_),*)
*          An NW-by-NW distributed work array.
*
*  DESCV   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix V.
*
*  NH      (input) INTEGER scalar
*          The number of columns of T.  NH.GE.NW.
*
*  T       (global workspace) DOUBLE PRECISION array, dimension 
*             (DESCV(LLD_),*)
*
*  DESCT   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix T.
*
*  NV      (global input) INTEGER
*          The number of rows of work array WV available for
*          workspace.  NV.GE.NW.
*
*  WV      (global workspace) DOUBLE PRECISION array, dimension 
*             (DESCW(LLD_),*)
*
*  DESCW   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix WV.
*
*  WORK    (local workspace) DOUBLE PRECISION array, dimension LWORK.
*          On exit, WORK(1) is set to an estimate of the optimal value
*          of LWORK for the given values of N, NW, KTOP and KBOT.
*
*  LWORK   (local input) INTEGER
*          The dimension of the work array WORK.  LWORK = 2*NW
*          suffices, but greater efficiency may result from larger
*          values of LWORK.
*
*          If LWORK = -1, then a workspace query is assumed; PDLAQR3
*          only estimates the optimal workspace size for the given
*          values of N, NW, KTOP and KBOT.  The estimate is returned
*          in WORK(1).  No error message related to LWORK is issued
*          by XERBLA.  Neither H nor Z are accessed.
*
*  IWORK   (local workspace) INTEGER array, dimension (LIWORK)
*
*  LIWORK  (local input) INTEGER
*          The length of the workspace array IWORK
*
*  ================================================================
*  Based on contributions by
*        Robert Granat and Meiyue Shao,
*        Department of Computing Science and HPC2N,
*        Umea University, Sweden
*
*  ================================================================
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      INTEGER            RECMAX
      LOGICAL            SORTGRAD
      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3,
     $                     sortgrad = .false. )
      DOUBLE PRECISION   ZERO, ONE
      parameter( zero = 0.0d0, one = 1.0d0 )
*     ..
*     .. Local Scalars ..
      DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
     $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP,
     $                   ELEM, ELEM1, ELEM2, ELEM3, R1, ANORM, RNORM,
     $                   RESAED
      INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
     $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
     $                   LWKOPT, NMIN, LLDH, LLDZ, LLDT, LLDV, LLDWV,
     $                   ICTXT, NPROW, NMAX, NPCOL, MYROW, MYCOL, NB,
     $                   IROFFH, M, RCOLS, TAUROWS, RROWS, TAUCOLS,
     $                   ITAU, IR, IPW, NPROCS, MLOC, IROFFHH,
     $                   ICOFFHH, HHRSRC, HHCSRC, HHROWS, HHCOLS,
     $                   IROFFZZ, ICOFFZZ, ZZRSRC, ZZCSRC, ZZROWS,
     $                   ZZCOLS, IERR, TZROWS0, TZCOLS0, IERR0, IPT0,
     $                   IPZ0, IPW0, NB2, ROUND, LILST, KK, LILST0,
     $                   IWRK1, RSRC, CSRC, LWK4, LWK5, IWRK2, LWK6,
     $                   LWK7, LWK8, ILWKOPT, TZROWS, TZCOLS, NSEL,
     $                   NPMIN, ICTXT_NEW, MYROW_NEW, MYCOL_NEW
      LOGICAL            BULGE, SORTED, LQUERY
*     ..
*     .. Local Arrays ..
      INTEGER            PAR( 6 ), DESCR( DLEN_ ),
     $                   DESCTAU( DLEN_ ), DESCHH( DLEN_ ),
     $                   DESCZZ( DLEN_ ), DESCTZ0( DLEN_ ),
     $                   PMAP( 64*64 )
      DOUBLE PRECISION   DDUM( 1 )
*     ..
*     .. External Functions ..
      DOUBLE PRECISION   DLAMCH, PDLANGE
      INTEGER            PILAENVX, NUMROC, INDXG2P, ICEIL, BLACS_PNUM
      EXTERNAL           dlamch, pilaenvx, numroc, indxg2p, pdlange,
     $                   mpi_wtime, iceil, blacs_pnum
*     ..
*     .. External Subroutines ..
      EXTERNAL           pdcopy, pdgehrd, pdgemm, dlabad, pdlacpy,
     $                   pdlaqr1, dlanv2, pdlaqr0, pdlarf, pdlarfg,
     $                   pdlaset, pdtrord, pdelget, pdelset,
     $                   pdlamve, blacs_gridinfo, blacs_gridmap,
     $                   blacs_gridexit, pdgemr2d
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          abs, dble, int, max, min, sqrt
*     ..
*     .. Executable Statements ..
      ictxt = desch( ctxt_ )
      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
      nprocs = nprow*npcol
*
*     Extract local leading dimensions, blockfactors, offset for
*     keeping the alignment requirements and size of deflation window.
*
      lldh  = desch( lld_ )
      lldz  = descz( lld_ )
      lldt  = desct( lld_ )
      lldv  = descv( lld_ )
      lldwv = descw( lld_ )
      nb = desch( mb_ )
      iroffh = mod( ktop - 1, nb )
      jw = min( nw, kbot-ktop+1 )
      nsel = nb+jw
*
*     Extract environment variables for parallel eigenvalue reordering.
*
      par(1) = pilaenvx(ictxt, 17, 'PDLAQR3', 'SV', jw, nb, -1, -1)
      par(2) = pilaenvx(ictxt, 18, 'PDLAQR3', 'SV', jw, nb, -1, -1)
      par(3) = pilaenvx(ictxt, 19, 'PDLAQR3', 'SV', jw, nb, -1, -1)
      par(4) = pilaenvx(ictxt, 20, 'PDLAQR3', 'SV', jw, nb, -1, -1)
      par(5) = pilaenvx(ictxt, 21, 'PDLAQR3', 'SV', jw, nb, -1, -1)
      par(6) = pilaenvx(ictxt, 22, 'PDLAQR3', 'SV', jw, nb, -1, -1)
*
*     Check if workspace query.
*
      lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
*
*     Estimate optimal workspace.
*
      IF( jw.LE.2 ) THEN
         lwkopt = 1
      ELSE
*
*        Workspace query calls to PDGEHRD and PDORMHR.
*
         taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
         taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
     $        npcol )
         CALL pdgehrd( jw, 1, jw, t, 1, 1, desct, work, work, -1,
     $        info )
         lwk1 = int( work( 1 ) ) + taurows*taucols
*
*        Workspace query call to PDORMHR.
*
         CALL pdormhr( 'Right', 'No', jw, jw, 1, jw, t, 1, 1, desct,
     $        work, v, 1, 1, descv, work, -1, info )
         lwk2 = int( work( 1 ) )
*
*        Workspace query call to PDLAQR0.
*
         nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
         nmax = ( n-1 ) / 3
         IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
     $        .AND. reclevel.LT.recmax ) THEN
            CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
     $           jw+iroffh, t, desct, sr, si, 1, jw, v, descv,
     $           work, -1, iwork, liwork-nsel, infqr, 
     $           reclevel+1 )
            lwk3 = int( work( 1 ) )
            iwrk1 = iwork( 1 )
         ELSE
            rsrc = desct( rsrc_ )
            csrc = desct( csrc_ )
            desct( rsrc_ ) = 0
            desct( csrc_ ) = 0
            CALL pdlaqr1( .true., .true., jw+iroffh, 1, jw+iroffh, t,
     $           desct, sr, si, 1, jw+iroffh, v, descv, work, -1,
     $           iwork, liwork-nsel, infqr )
            desct( rsrc_ ) = rsrc
            desct( csrc_ ) = csrc
            lwk3 = int( work( 1 ) )
            iwrk1 = iwork( 1 )
         END IF
*
*        Workspace in case of alignment problems.
*
         tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
         tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
         lwk4 = 2 * tzrows0*tzcols0
*
*        Workspace check for reordering.
*
         CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
     $        desct, v, 1, 1, descv, ddum, ddum, mloc, work, -1,
     $        iwork, liwork-nsel, info )
         lwk5 = int( work( 1 ) )
         iwrk2 = iwork( 1 )
*
*        Extra workspace for reflecting back spike
*        (workspace for PDLARF approximated for simplicity).
*
         rrows =  numroc( n+iroffh, nb, myrow, descv(rsrc_), nprow )
         rcols =  numroc( 1, 1, mycol, descv(csrc_), npcol )
         lwk6 = rrows*rcols + taurows*taucols +
     $        2*iceil(iceil(jw+iroffh,nb),nprow)*nb
     $         *iceil(iceil(jw+iroffh,nb),npcol)*nb
*
*        Extra workspace needed by PBLAS update calls
*        (also estimated for simplicity).
*
         lwk7 = max( iceil(iceil(jw,nb),nprow)*nb *
     $               iceil(iceil(n-kbot,nb),npcol)*nb,
     $               iceil(iceil(ihiz-iloz+1,nb),nprow)*nb *
     $               iceil(iceil(jw,nb),npcol)*nb,
     $               iceil(iceil(kbot-jw,nb),nprow)*nb *
     $               iceil(iceil(jw,nb),npcol)*nb )
*
*        Residual check workspace.
*
         lwk8 = 0
*
*        Optimal workspace.
*
         lwkopt = max( lwk1, lwk2, lwk3+lwk4, lwk5, lwk6, lwk7, lwk8 )
         ilwkopt = max( iwrk1, iwrk2 )
      END IF
*
*     Quick return in case of workspace query.
*
      work( 1 ) = dble( lwkopt )
*
*     IWORK(1:NSEL) is used as the array SELECT for PDTRORD.
*
      iwork( 1 ) = ilwkopt + nsel
      IF( lquery )
     $   RETURN
*
*     Nothing to do for an empty active block ...
      ns = 0
      nd = 0
      IF( ktop.GT.kbot )
     $   RETURN
*     ... nor for an empty deflation window.
*
      IF( nw.LT.1 )
     $   RETURN
*
*     Machine constants.
*
      safmin = dlamch( 'SAFE MINIMUM' )
      safmax = one / safmin
      CALL dlabad( safmin, safmax )
      ulp = dlamch( 'PRECISION' )
      smlnum = safmin*( dble( n ) / ulp )
*
*     Setup deflation window.
*
      jw = min( nw, kbot-ktop+1 )
      kwtop = kbot - jw + 1
      IF( kwtop.EQ.ktop ) THEN
         s = zero
      ELSE
         CALL pdelget( 'All', '1-Tree', s, h, kwtop, kwtop-1, desch )
      END IF
*
      IF( kbot.EQ.kwtop ) THEN
*
*        1-by-1 deflation window: not much to do.
*
         CALL pdelget( 'All', '1-Tree', sr( kwtop ), h, kwtop, kwtop,
     $        desch )
         si( kwtop ) = zero
         ns = 1
         nd = 0
         IF( abs( s ).LE.max( smlnum, ulp*abs( sr( kwtop ) ) ) )
     $        THEN
            ns = 0
            nd = 1
            IF( kwtop.GT.ktop )
     $         CALL pdelset( h, kwtop, kwtop-1 , desch, zero )
         END IF
         RETURN
      END IF
*
      IF( kwtop.EQ.ktop .AND. kbot-kwtop.EQ.1 ) THEN
*
*        2-by-2 deflation window: a little more to do.
*
         CALL pdelget( 'All', '1-Tree', aa, h, kwtop, kwtop, desch )
         CALL pdelget( 'All', '1-Tree', bb, h, kwtop, kwtop+1, desch )
         CALL pdelget( 'All', '1-Tree', cc, h, kwtop+1, kwtop, desch )
         CALL pdelget( 'All', '1-Tree', dd, h, kwtop+1, kwtop+1, desch )
         CALL dlanv2( aa, bb, cc, dd, sr(kwtop), si(kwtop),
     $        sr(kwtop+1), si(kwtop+1), cs, sn )
         ns = 0
         nd = 2
         IF( cc.EQ.zero ) THEN
            i = kwtop
            IF( i+2.LE.n .AND. wantt )
     $         CALL pdrot( n-i-1, h, i, i+2, desch, desch(m_), h, i+1,
     $              i+2, desch, desch(m_), cs, sn, work, lwork, info )
            IF( i.GT.1 )
     $         CALL pdrot( i-1, h, 1, i, desch, 1, h, 1, i+1, desch, 1,
     $              cs, sn, work, lwork, info )
            IF( wantz )
     $         CALL pdrot( ihiz-iloz+1, z, iloz, i, descz, 1, z, iloz,
     $              i+1, descz, 1, cs, sn, work, lwork, info )
            CALL pdelset( h, i, i, desch, aa )
            CALL pdelset( h, i, i+1, desch, bb )
            CALL pdelset( h, i+1, i, desch, cc )
            CALL pdelset( h, i+1, i+1, desch, dd )
         END IF
         work( 1 ) = dble( lwkopt )
         RETURN
      END IF
*
*     Calculate new value for IROFFH in case deflation window
*     was adjusted.
*
      iroffh = mod( kwtop - 1, nb )
*
*     Adjust number of rows and columns of T matrix descriptor
*     to prepare for call to PDBTRORD.
*
      desct( m_ ) = jw+iroffh
      desct( n_ ) = jw+iroffh
*
*     Convert to spike-triangular form.  (In case of a rare QR failure,
*     this routine continues to do aggressive early deflation using that
*     part of the deflation window that converged using INFQR here and
*     there to keep track.)
*
*     Copy the trailing submatrix to the working space.
*
      CALL pdlaset( 'All', iroffh, jw+iroffh, zero, one, t, 1, 1,
     $     desct )
      CALL pdlaset( 'All', jw, iroffh, zero, zero, t, 1+iroffh, 1,
     $     desct )
      CALL pdlacpy( 'All', 1, jw, h, kwtop, kwtop, desch, t, 1+iroffh,
     $     1+iroffh, desct )
      CALL pdlacpy( 'Upper', jw-1, jw-1, h, kwtop+1, kwtop, desch, t,
     $     1+iroffh+1, 1+iroffh, desct )
      IF( jw.GT.2 )
     $   CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 1+iroffh+2,
     $        1+iroffh, desct )
      CALL pdlacpy( 'All', jw-1, 1, h, kwtop+1, kwtop+jw-1, desch, t,
     $     1+iroffh+1, 1+iroffh+jw-1, desct )
*
*     Initialize the working orthogonal matrix.
*
      CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one, v, 1, 1,
     $     descv )
*
*     Compute the Schur form of T.
*
      npmin = pilaenvx( ictxt, 23, 'PDLAQR3', 'SV', jw, nb, nprow,
     $     npcol )
      nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
      nmax = ( n-1 ) / 3
      IF( min(nprow, npcol).LE.npmin+1 .OR. reclevel.GE.1 ) THEN
*
*        The AED window is large enough.
*        Compute the Schur decomposition with all processors.
*
         IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
     $        .AND. reclevel.LT.recmax ) THEN
            CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
     $           jw+iroffh, t, desct, sr( kwtop-iroffh ),
     $           si( kwtop-iroffh ), 1+iroffh, jw+iroffh, v, descv,
     $           work, lwork, iwork(nsel+1), liwork-nsel, infqr,
     $           reclevel+1 )
         ELSE
            IF( desct(rsrc_).EQ.0 .AND. desct(csrc_).EQ.0 ) THEN
               IF( jw+iroffh.GT.desct( mb_ ) ) THEN
                  CALL pdlaqr1( .true., .true., jw+iroffh, 1,
     $                 jw+iroffh, t, desct, sr( kwtop-iroffh ),
     $                 si( kwtop-iroffh ), 1, jw+iroffh, v,
     $                 descv, work, lwork, iwork(nsel+1), liwork-nsel,
     $                 infqr )
               ELSE
                  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
                     CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
     $                    jw+iroffh, t, desct(lld_),
     $                    sr( kwtop-iroffh ), si( kwtop-iroffh ),
     $                    1+iroffh, jw+iroffh, v, descv(lld_), infqr )
                  ELSE
                     infqr = 0
                  END IF
                  IF( nprocs.GT.1 )
     $               CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
     $                    1, -1, -1, -1, -1, -1 )
               END IF
            ELSEIF( jw+iroffh.LE.desct( mb_ ) ) THEN
               IF( myrow.EQ.desct(rsrc_) .AND. mycol.EQ.desct(csrc_) )
     $              THEN
                  CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
     $                 jw+iroffh, t, desct(lld_),
     $                 sr( kwtop-iroffh ), si( kwtop-iroffh ),
     $                 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
               ELSE
                  infqr = 0
               END IF
               IF( nprocs.GT.1 )
     $         CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
     $              1, -1, -1, -1, -1, -1 )
            ELSE
               tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
               tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
               CALL descinit( desctz0, jw+iroffh, jw+iroffh, nb, nb, 0,
     $              0, ictxt, max(1,tzrows0), ierr0 )
               ipt0 = 1
               ipz0 = ipt0 + max(1,tzrows0)*tzcols0
               ipw0 = ipz0 + max(1,tzrows0)*tzcols0
               CALL pdlamve( 'All', jw+iroffh, jw+iroffh, t, 1, 1,
     $              desct, work(ipt0), 1, 1, desctz0, work(ipw0) )
               CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one,
     $              work(ipz0), 1, 1, desctz0 )
               CALL pdlaqr1( .true., .true., jw+iroffh, 1,
     $              jw+iroffh, work(ipt0), desctz0,
     $              sr( kwtop-iroffh ), si( kwtop-iroffh ),
     $              1, jw+iroffh, work(ipz0),
     $              desctz0, work(ipw0), lwork-ipw0+1, iwork(nsel+1),
     $              liwork-nsel, infqr )
               CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipt0), 1,
     $              1, desctz0, t, 1, 1, desct, work(ipw0) )
               CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipz0), 1,
     $              1, desctz0, v, 1, 1, descv, work(ipw0) )
            END IF
         END IF
      ELSE
*
*        The AED window is too small.
*        Redistribute the AED window to a subgrid
*        and do the computation on the subgrid.
*
         ictxt_new = ictxt
         DO 20 i = 0, npmin-1
            DO 10 j = 0, npmin-1
               pmap( j+1+i*npmin ) = blacs_pnum( ictxt, i, j )
 10         CONTINUE
 20      CONTINUE
         CALL blacs_gridmap( ictxt_new, pmap, npmin, npmin, npmin )
         CALL blacs_gridinfo( ictxt_new, npmin, npmin, myrow_new,
     $        mycol_new )
         IF( myrow.GE.npmin .OR. mycol.GE.npmin ) ictxt_new = -1
         IF( ictxt_new.GE.0 ) THEN
            tzrows0 = numroc( jw, nb, myrow_new, 0, npmin )
            tzcols0 = numroc( jw, nb, mycol_new, 0, npmin )
            CALL descinit( desctz0, jw, jw, nb, nb, 0,
     $           0, ictxt_new, max(1,tzrows0), ierr0 )
            ipt0 = 1
            ipz0 = ipt0 + max(1,tzrows0)*max(1,tzcols0)
            ipw0 = ipz0 + max(1,tzrows0)*max(1,tzcols0)
         ELSE
            ipt0 = 1
            ipz0 = 2
            ipw0 = 3
            desctz0( ctxt_ ) = -1
            infqr = 0
         END IF
         CALL pdgemr2d( jw, jw, t, 1+iroffh, 1+iroffh, desct,
     $        work(ipt0), 1, 1, desctz0, ictxt )
         IF( ictxt_new.GE.0 ) THEN
            CALL pdlaset( 'All', jw, jw, zero, one, work(ipz0), 1, 1,
     $           desctz0 )
            nmin = pilaenvx( ictxt_new, 12, 'PDLAQR3', 'SV', jw, 1, jw,
     $           lwork )
            IF( jw.GT.nmin .AND. jw.LE.nmax .AND. reclevel.LT.1 ) THEN
               CALL pdlaqr0( .true., .true., jw, 1, jw, work(ipt0),
     $              desctz0, sr( kwtop ), si( kwtop ), 1, jw,
     $              work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
     $              iwork(nsel+1), liwork-nsel, infqr,
     $              reclevel+1 )
            ELSE
               CALL pdlaqr1( .true., .true., jw, 1, jw, work(ipt0),
     $              desctz0, sr( kwtop ), si( kwtop ), 1, jw,
     $              work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
     $              iwork(nsel+1), liwork-nsel, infqr )
            END IF
         END IF
         CALL pdgemr2d( jw, jw, work(ipt0), 1, 1, desctz0, t, 1+iroffh,
     $        1+iroffh, desct, ictxt )
         CALL pdgemr2d( jw, jw, work(ipz0), 1, 1, desctz0, v, 1+iroffh,
     $        1+iroffh, descv, ictxt )
         IF( ictxt_new.GE.0 )
     $      CALL blacs_gridexit( ictxt_new )
         IF( myrow+mycol.GT.0 ) THEN
            DO 40 j = 0, jw-1
               sr( kwtop+j ) = zero
               si( kwtop+j ) = zero
 40         CONTINUE
         END IF
         CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr, 1, -1, -1,
     $        -1, -1, -1 )
         CALL dgsum2d( ictxt, 'All', ' ', jw, 1, sr(kwtop), jw, -1, -1 )
         CALL dgsum2d( ictxt, 'All', ' ', jw, 1, si(kwtop), jw, -1, -1 )
      END IF
*
*     Adjust INFQR for offset from block border in submatrices.
*
      IF( infqr.NE.0 )
     $   infqr = infqr - iroffh
*
*     PDTRORD needs a clean margin near the diagonal.
*
      DO 50 j = 1, jw - 3
         CALL pdelset( t, j+2, j, desct, zero )
         CALL pdelset( t, j+3, j, desct, zero )
 50   CONTINUE
      IF( jw.GT.2 )
     $   CALL pdelset( t, jw, jw-2, desct, zero )
*
*     Check local residual for AED Schur decomposition.
*
      resaed = 0.0d+00
*
*     Clean up the array SELECT for PDTRORD.
*
      DO 60 j = 1, nsel
         iwork( j ) = 0
 60   CONTINUE
*
*     Set local M counter to zero.
*
      mloc = 0
*
*     Outer deflation detection loop (label 80).
*     In this loop a bunch of undeflatable eigenvalues
*     are moved simultaneously.
*
      DO 70 j = 1, iroffh + infqr
         iwork( j ) = 1
 70   CONTINUE
*
      ns = jw
      ilst = infqr + 1 + iroffh
      IF( ilst.GT.1 ) THEN
         CALL pdelget( 'All', '1-Tree', elem, t, ilst, ilst-1, desct )
         bulge = elem.NE.zero
         IF( bulge ) ilst = ilst+1
      END IF
*
 80   CONTINUE
      IF( ilst.LE.ns+iroffh ) THEN
*
*        Find the top-left corner of the local window.
*
         lilst = max(ilst,ns+iroffh-nb+1)
         IF( lilst.GT.1 ) THEN
            CALL pdelget( 'All', '1-Tree', elem, t, lilst, lilst-1,
     $           desct )
            bulge = elem.NE.zero
            IF( bulge ) lilst = lilst+1
         END IF
*
*        Lock all eigenvalues outside the local window.
*
         DO 90 j = iroffh+1, lilst-1
            iwork( j ) = 1
 90      CONTINUE
         lilst0 = lilst
*
*        Inner deflation detection loop (label 100).
*        In this loop, the undeflatable eigenvalues are moved to the
*        top-left corner of the local window.
*
 100     CONTINUE
         IF( lilst.LE.ns+iroffh ) THEN
            IF( ns.EQ.1 ) THEN
               bulge = .false.
            ELSE
               CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
     $              ns+iroffh-1, desct )
               bulge = elem.NE.zero
            END IF
*
*           Small spike tip test for deflation.
*
            IF( .NOT.bulge ) THEN
*
*              Real eigenvalue.
*
               CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
     $              ns+iroffh, desct )
               foo = abs( elem )
               IF( foo.EQ.zero )
     $            foo = abs( s )
               CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
     $              ns+iroffh, descv )
               IF( abs( s*elem ).LE.max( smlnum, ulp*foo ) ) THEN
*
*                 Deflatable.
*
                  ns = ns - 1
               ELSE
*
*                 Undeflatable: move it up out of the way.
*
                  ifst = ns
                  DO 110 j = lilst, jw+iroffh
                     iwork( j ) = 0
 110              CONTINUE
                  iwork( ifst+iroffh ) = 1
                  CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
     $                 1, desct, v, 1, 1, descv, work,
     $                 work(jw+iroffh+1), mloc,
     $                 work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
     $                 iwork(nsel+1), liwork-nsel, info )
*
*                 Adjust the array SELECT explicitly so that it does not
*                 rely on the output of PDTRORD.
*
                  iwork( ifst+iroffh ) = 0
                  iwork( lilst ) = 1
                  lilst = lilst + 1
*
*                 In case of a rare exchange failure, adjust the
*                 pointers ILST and LILST to the current place to avoid
*                 unexpected behaviors.
*
                  IF( info.NE.0 ) THEN
                     lilst = max(info, lilst)
                     ilst = max(info, ilst)
                  END IF
               END IF
            ELSE
*
*              Complex conjugate pair.
*
               CALL pdelget( 'All', '1-Tree', elem1, t, ns+iroffh,
     $              ns+iroffh, desct )
               CALL pdelget( 'All', '1-Tree', elem2, t, ns+iroffh,
     $              ns+iroffh-1, desct )
               CALL pdelget( 'All', '1-Tree', elem3, t, ns+iroffh-1,
     $              ns+iroffh, desct )
               foo = abs( elem1 ) + sqrt( abs( elem2 ) )*
     $              sqrt( abs( elem3 ) )
               IF( foo.EQ.zero )
     $            foo = abs( s )
               CALL pdelget( 'All', '1-Tree', elem1, v, 1+iroffh,
     $              ns+iroffh, descv )
               CALL pdelget( 'All', '1-Tree', elem2, v, 1+iroffh,
     $              ns+iroffh-1, descv )
               IF( max( abs( s*elem1 ), abs( s*elem2 ) ).LE.
     $              max( smlnum, ulp*foo ) ) THEN
*
*                 Deflatable.
*
                  ns = ns - 2
               ELSE
*
*                 Undeflatable: move them up out of the way.
*
                  ifst = ns
                  DO 120 j = lilst, jw+iroffh
                     iwork( j ) = 0
 120              CONTINUE
                  iwork( ifst+iroffh ) = 1
                  iwork( ifst+iroffh-1 ) = 1
                  CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
     $                 1, desct, v, 1, 1, descv, work,
     $                 work(jw+iroffh+1), mloc,
     $                 work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
     $                 iwork(nsel+1), liwork-nsel, info )
*
*                 Adjust the array SELECT explicitly so that it does not
*                 rely on the output of PDTRORD.
*
                  iwork( ifst+iroffh ) = 0
                  iwork( ifst+iroffh-1 ) = 0
                  iwork( lilst ) = 1
                  iwork( lilst+1 ) = 1
                  lilst = lilst + 2
*
*                 In case of a rare exchange failure, adjust the
*                 pointers ILST and LILST to the current place to avoid
*                 unexpected behaviors.
*
                  IF( info.NE.0 ) THEN
                     lilst = max(info, lilst)
                     ilst = max(info, ilst)
                  END IF
               END IF
            END IF
*
*           End of inner deflation detection loop.
*
            GO TO 100
         END IF
*
*        Unlock the eigenvalues outside the local window.
*        Then undeflatable eigenvalues are moved to the proper position.
*
         DO 130 j = ilst, lilst0-1
            iwork( j ) = 0
 130     CONTINUE
         CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
     $        desct, v, 1, 1, descv, work, work(jw+iroffh+1),
     $        m, work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
     $        iwork(nsel+1), liwork-nsel, info )
         ilst = m + 1
*
*        In case of a rare exchange failure, adjust the pointer ILST to
*        the current place to avoid unexpected behaviors.
*
         IF( info.NE.0 )
     $      ilst = max(info, ilst)
*
*        End of outer deflation detection loop.
*
         GO TO 80
      END IF
 
*
*     Post-reordering step: copy output eigenvalues to output.
*
      CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
      CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
*
*     Check local residual for reordered AED Schur decomposition.
*
      resaed = 0.0d+00
*
*     Return to Hessenberg form.
*
      IF( ns.EQ.0 )
     $   s = zero
*
      IF( ns.LT.jw .AND. sortgrad ) THEN
*
*        Sorting diagonal blocks of T improves accuracy for
*        graded matrices.  Bubble sort deals well with exchange
*        failures. Eigenvalues/shifts from T are also restored.
*
         round = 0
         sorted = .false.
         i = ns + 1 + iroffh
 140     CONTINUE
         IF( sorted )
     $      GO TO 180
         sorted = .true.
         round = round + 1
*
         kend = i - 1
         i = infqr + 1 + iroffh
         IF( i.EQ.ns+iroffh ) THEN
            k = i + 1
         ELSE IF( si( kwtop-iroffh + i-1 ).EQ.zero ) THEN
            k = i + 1
         ELSE
            k = i + 2
         END IF
 150     CONTINUE
         IF( k.LE.kend ) THEN
            IF( k.EQ.i+1 ) THEN
               evi = abs( sr( kwtop-iroffh+i-1 ) )
            ELSE
               evi = abs( sr( kwtop-iroffh+i-1 ) ) +
     $              abs( si( kwtop-iroffh+i-1 ) )
            END IF
*
            IF( k.EQ.kend ) THEN
               evk = abs( sr( kwtop-iroffh+k-1 ) )
            ELSEIF( si( kwtop-iroffh+k-1 ).EQ.zero ) THEN
               evk = abs( sr( kwtop-iroffh+k-1 ) )
            ELSE
               evk = abs( sr( kwtop-iroffh+k-1 ) ) +
     $              abs( si( kwtop-iroffh+k-1 ) )
            END IF
*
            IF( evi.GE.evk ) THEN
               i = k
            ELSE
               mloc = 0
               sorted = .false.
               ifst = i
               ilst = k
               DO 160 j = 1, i-1
                  iwork( j ) = 1
                  mloc = mloc + 1
 160           CONTINUE
               IF( k.EQ.i+2 ) THEN
                  iwork( i ) = 0
                  iwork(i+1) = 0
               ELSE
                  iwork( i ) = 0
               END IF
               IF( k.NE.kend .AND. si( kwtop-iroffh+k-1 ).NE.zero ) THEN
                  iwork( k ) = 1
                  iwork(k+1) = 1
                  mloc = mloc + 2
               ELSE
                  iwork( k ) = 1
                  IF( k.LT.kend ) iwork(k+1) = 0
                  mloc = mloc + 1
               END IF
               DO 170 j = k+2, jw+iroffh
                  iwork( j ) = 0
 170           CONTINUE
               CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
     $              desct, v, 1, 1, descv, work, work(jw+iroffh+1), m,
     $              work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
     $              iwork(nsel+1), liwork-nsel, ierr )
               CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
               CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
               IF( ierr.EQ.0 ) THEN
                  i = ilst
               ELSE
                  i = k
               END IF
            END IF
            IF( i.EQ.kend ) THEN
               k = i + 1
            ELSE IF( si( kwtop-iroffh+i-1 ).EQ.zero ) THEN
               k = i + 1
            ELSE
               k = i + 2
            END IF
            GO TO 150
         END IF
         GO TO 140
 180     CONTINUE
      END IF
*
*     Restore number of rows and columns of T matrix descriptor.
*
      desct( m_ ) = nw+iroffh
      desct( n_ ) = nh+iroffh
*
      IF( ns.LT.jw .OR. s.EQ.zero ) THEN
         IF( ns.GT.1 .AND. s.NE.zero ) THEN
*
*           Reflect spike back into lower triangle.
*
            rrows = numroc( ns+iroffh, nb, myrow, descv(rsrc_), nprow )
            rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
            CALL descinit( descr, ns+iroffh, 1, nb, 1, descv(rsrc_),
     $           descv(csrc_), ictxt, max(1, rrows), info )
            taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
            taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
     $           npcol )
            CALL descinit( desctau, 1, jw+iroffh, 1, nb, descv(rsrc_),
     $           descv(csrc_), ictxt, max(1, taurows), info )
*
            ir = 1
            itau = ir + descr( lld_ ) * rcols
            ipw  = itau + desctau( lld_ ) * taucols
*
            CALL pdlaset( 'All', ns+iroffh, 1, zero, zero, work(itau),
     $           1, 1, desctau )
*
            CALL pdcopy( ns, v, 1+iroffh, 1+iroffh, descv, descv(m_),
     $           work(ir), 1+iroffh, 1, descr, 1 )
            CALL pdlarfg( ns, beta, 1+iroffh, 1, work(ir), 2+iroffh, 1,
     $           descr, 1, work(itau+iroffh) )
            CALL pdelset( work(ir), 1+iroffh, 1, descr, one )
*
            CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 3+iroffh,
     $           1+iroffh, desct )
*
            CALL pdlarf( 'Left', ns, jw, work(ir), 1+iroffh, 1, descr,
     $           1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
     $           desct, work( ipw ) )
            CALL pdlarf( 'Right', ns, ns, work(ir), 1+iroffh, 1, descr,
     $           1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
     $           desct, work( ipw ) )
            CALL pdlarf( 'Right', jw, ns, work(ir), 1+iroffh, 1, descr,
     $           1, work(itau+iroffh), v, 1+iroffh, 1+iroffh,
     $           descv, work( ipw ) )
*
            itau = 1
            ipw = itau + desctau( lld_ ) * taucols
            CALL pdgehrd( jw+iroffh, 1+iroffh, ns+iroffh, t, 1, 1,
     $           desct, work(itau), work( ipw ), lwork-ipw+1, info )
         END IF
*
*        Copy updated reduced window into place.
*
         IF( kwtop.GT.1 ) THEN
            CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
     $           1+iroffh, descv )
            CALL pdelset( h, kwtop, kwtop-1, desch, s*elem )
         END IF
         CALL pdlacpy( 'Upper', jw-1, jw-1, t, 1+iroffh+1, 1+iroffh,
     $        desct, h, kwtop+1, kwtop, desch )
         CALL pdlacpy( 'All', 1, jw, t, 1+iroffh, 1+iroffh, desct, h,
     $        kwtop, kwtop, desch )
         CALL pdlacpy( 'All', jw-1, 1, t, 1+iroffh+1, 1+iroffh+jw-1,
     $        desct, h, kwtop+1, kwtop+jw-1, desch )
*
*        Accumulate orthogonal matrix in order to update
*        H and Z, if requested.
*
         IF( ns.GT.1 .AND. s.NE.zero ) THEN
            CALL pdormhr( 'Right', 'No', jw+iroffh, ns+iroffh, 1+iroffh,
     $           ns+iroffh, t, 1, 1, desct, work(itau), v, 1,
     $           1, descv, work( ipw ), lwork-ipw+1, info )
         END IF
*
*        Update vertical slab in H.
*
         IF( wantt ) THEN
            ltop = 1
         ELSE
            ltop = ktop
         END IF
         kln = max( 0, kwtop-ltop )
         iroffhh = mod( ltop-1, nb )
         icoffhh = mod( kwtop-1, nb )
         hhrsrc = indxg2p( ltop, nb, myrow, desch(rsrc_), nprow )
         hhcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
         hhrows = numroc( kln+iroffhh, nb, myrow, hhrsrc, nprow )
         hhcols = numroc( jw+icoffhh, nb, mycol, hhcsrc, npcol )
         CALL descinit( deschh, kln+iroffhh, jw+icoffhh, nb, nb,
     $        hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
         CALL pdgemm( 'No', 'No', kln, jw, jw, one, h, ltop,
     $        kwtop, desch, v, 1+iroffh, 1+iroffh, descv, zero,
     $        work, 1+iroffhh, 1+icoffhh, deschh )
         CALL pdlacpy( 'All', kln, jw, work, 1+iroffhh, 1+icoffhh,
     $        deschh, h, ltop, kwtop, desch )
*
*        Update horizontal slab in H.
*
         IF( wantt ) THEN
            kln = n-kbot
            iroffhh = mod( kwtop-1, nb )
            icoffhh = mod( kbot, nb )
            hhrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
            hhcsrc = indxg2p( kbot+1, nb, mycol, desch(csrc_), npcol )
            hhrows = numroc( jw+iroffhh, nb, myrow, hhrsrc, nprow )
            hhcols = numroc( kln+icoffhh, nb, mycol, hhcsrc, npcol )
            CALL descinit( deschh, jw+iroffhh, kln+icoffhh, nb, nb,
     $           hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
            CALL pdgemm( 'Tr', 'No', jw, kln, jw, one, v,
     $           1+iroffh, 1+iroffh, descv, h, kwtop, kbot+1,
     $           desch, zero, work, 1+iroffhh, 1+icoffhh, deschh )
            CALL pdlacpy( 'All', jw, kln, work, 1+iroffhh, 1+icoffhh,
     $           deschh, h, kwtop, kbot+1, desch )
         END IF
*
*        Update vertical slab in Z.
*
         IF( wantz ) THEN
            kln = ihiz-iloz+1
            iroffzz = mod( iloz-1, nb )
            icoffzz = mod( kwtop-1, nb )
            zzrsrc = indxg2p( iloz, nb, myrow, descz(rsrc_), nprow )
            zzcsrc = indxg2p( kwtop, nb, mycol, descz(csrc_), npcol )
            zzrows = numroc( kln+iroffzz, nb, myrow, zzrsrc, nprow )
            zzcols = numroc( jw+icoffzz, nb, mycol, zzcsrc, npcol )
            CALL descinit( desczz, kln+iroffzz, jw+icoffzz, nb, nb,
     $           zzrsrc, zzcsrc, ictxt, max(1, zzrows), ierr )
            CALL pdgemm( 'No', 'No', kln, jw, jw, one, z, iloz,
     $           kwtop, descz, v, 1+iroffh, 1+iroffh, descv,
     $           zero, work, 1+iroffzz, 1+icoffzz, desczz )
            CALL pdlacpy( 'All', kln, jw, work, 1+iroffzz, 1+icoffzz,
     $           desczz, z, iloz, kwtop, descz )
         END IF
      END IF
*
*     Return the number of deflations (ND) and the number of shifts (NS).
*     (Subtracting INFQR from the spike length takes care of the case of
*     a rare QR failure while calculating eigenvalues of the deflation
*     window.)
*
      nd = jw - ns
      ns = ns - infqr
*
*     Return optimal workspace.
*
      work( 1 ) = dble( lwkopt )
      iwork( 1 ) = ilwkopt + nsel
*
*     End of PDLAQR3
*
Here is the call graph for this function:
Here is the caller graph for this function: