SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdlaqr3.f
Go to the documentation of this file.
1 RECURSIVE SUBROUTINE pdlaqr3( WANTT, WANTZ, N, KTOP, KBOT, NW, H,
2 $ DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND,
3 $ SR, SI, V, DESCV, NH, T, DESCT, NV,
4 $ WV, DESCW, WORK, LWORK, IWORK,
5 $ LIWORK, RECLEVEL )
6*
7* Contribution from the Department of Computing Science and HPC2N,
8* Umea University, Sweden
9*
10* -- ScaLAPACK auxiliary routine (version 2.0.1) --
11* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12* Univ. of Colorado Denver and University of California, Berkeley.
13* January, 2012
14*
15 IMPLICIT NONE
16*
17* .. Scalar Arguments ..
18 INTEGER ihiz, iloz, kbot, ktop, lwork, n, nd, nh, ns,
19 $ nv, nw, liwork, reclevel
20 LOGICAL wantt, wantz
21* ..
22* .. Array Arguments ..
23 INTEGER desch( * ), descz( * ), desct( * ), descv( * ),
24 $ descw( * ), iwork( * )
25 DOUBLE PRECISION h( * ), si( kbot ), sr( kbot ), t( * ),
26 $ v( * ), work( * ), wv( * ),
27 $ z( * )
28* ..
29*
30* Purpose
31* =======
32*
33* Aggressive early deflation:
34*
35* This subroutine accepts as input an upper Hessenberg matrix H and
36* performs an orthogonal similarity transformation designed to detect
37* and deflate fully converged eigenvalues from a trailing principal
38* submatrix. On output H has been overwritten by a new Hessenberg
39* matrix that is a perturbation of an orthogonal similarity
40* transformation of H. It is to be hoped that the final version of H
41* has many zero subdiagonal entries.
42*
43* Notes
44* =====
45*
46* Each global data object is described by an associated description
47* vector. This vector stores the information required to establish
48* the mapping between an object element and its corresponding process
49* and memory location.
50*
51* Let A be a generic term for any 2D block cyclicly distributed array.
52* Such a global array has an associated description vector DESCA.
53* In the following comments, the character _ should be read as
54* "of the global array".
55*
56* NOTATION STORED IN EXPLANATION
57* --------------- -------------- --------------------------------------
58* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59* DTYPE_A = 1.
60* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61* the BLACS process grid A is distribu-
62* ted over. The context itself is glo-
63* bal, but the handle (the integer
64* value) may vary.
65* M_A (global) DESCA( M_ ) The number of rows in the global
66* array A.
67* N_A (global) DESCA( N_ ) The number of columns in the global
68* array A.
69* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70* the rows of the array.
71* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72* the columns of the array.
73* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74* row of the array A is distributed.
75* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76* first column of the array A is
77* distributed.
78* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79* array. LLD_A >= MAX(1,LOCr(M_A)).
80*
81* Let K be the number of rows or columns of a distributed matrix,
82* and assume that its process grid has dimension p x q.
83* LOCr( K ) denotes the number of elements of K that a process
84* would receive if K were distributed over the p processes of its
85* process column.
86* Similarly, LOCc( K ) denotes the number of elements of K that a
87* process would receive if K were distributed over the q processes of
88* its process row.
89* The values of LOCr() and LOCc() may be determined via a call to the
90* ScaLAPACK tool function, NUMROC:
91* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93* An upper bound for these quantities may be computed by:
94* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97* Arguments
98* =========
99*
100* WANTT (global input) LOGICAL
101* If .TRUE., then the Hessenberg matrix H is fully updated
102* so that the quasi-triangular Schur factor may be
103* computed (in cooperation with the calling subroutine).
104* If .FALSE., then only enough of H is updated to preserve
105* the eigenvalues.
106*
107* WANTZ (global input) LOGICAL
108* If .TRUE., then the orthogonal matrix Z is updated so
109* so that the orthogonal Schur factor may be computed
110* (in cooperation with the calling subroutine).
111* If .FALSE., then Z is not referenced.
112*
113* N (global input) INTEGER
114* The order of the matrix H and (if WANTZ is .TRUE.) the
115* order of the orthogonal matrix Z.
116*
117* KTOP (global input) INTEGER
118* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119* KBOT and KTOP together determine an isolated block
120* along the diagonal of the Hessenberg matrix.
121*
122* KBOT (global input) INTEGER
123* It is assumed without a check that either
124* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
125* determine an isolated block along the diagonal of the
126* Hessenberg matrix.
127*
128* NW (global input) INTEGER
129* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
130*
131* H (local input/output) DOUBLE PRECISION array, dimension
132* (DESCH(LLD_),*)
133* On input the initial N-by-N section of H stores the
134* Hessenberg matrix undergoing aggressive early deflation.
135* On output H has been transformed by an orthogonal
136* similarity transformation, perturbed, and the returned
137* to Hessenberg form that (it is to be hoped) has some
138* zero subdiagonal entries.
139*
140* DESCH (global and local input) INTEGER array of dimension DLEN_.
141* The array descriptor for the distributed matrix H.
142*
143* ILOZ (global input) INTEGER
144* IHIZ (global input) INTEGER
145* Specify the rows of Z to which transformations must be
146* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147*
148* Z (input/output) DOUBLE PRECISION array, dimension
149* (DESCH(LLD_),*)
150* IF WANTZ is .TRUE., then on output, the orthogonal
151* similarity transformation mentioned above has been
152* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153* If WANTZ is .FALSE., then Z is unreferenced.
154*
155* DESCZ (global and local input) INTEGER array of dimension DLEN_.
156* The array descriptor for the distributed matrix Z.
157*
158* NS (global output) INTEGER
159* The number of unconverged (ie approximate) eigenvalues
160* returned in SR and SI that may be used as shifts by the
161* calling subroutine.
162*
163* ND (global output) INTEGER
164* The number of converged eigenvalues uncovered by this
165* subroutine.
166*
167* SR (global output) DOUBLE PRECISION array, dimension KBOT
168* SI (global output) DOUBLE PRECISION array, dimension KBOT
169* On output, the real and imaginary parts of approximate
170* eigenvalues that may be used for shifts are stored in
171* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173* The real and imaginary parts of converged eigenvalues
174* are stored in SR(KBOT-ND+1) through SR(KBOT) and
175* SI(KBOT-ND+1) through SI(KBOT), respectively.
176*
177* V (global workspace) DOUBLE PRECISION array, dimension
178* (DESCV(LLD_),*)
179* An NW-by-NW distributed work array.
180*
181* DESCV (global and local input) INTEGER array of dimension DLEN_.
182* The array descriptor for the distributed matrix V.
183*
184* NH (input) INTEGER scalar
185* The number of columns of T. NH.GE.NW.
186*
187* T (global workspace) DOUBLE PRECISION array, dimension
188* (DESCV(LLD_),*)
189*
190* DESCT (global and local input) INTEGER array of dimension DLEN_.
191* The array descriptor for the distributed matrix T.
192*
193* NV (global input) INTEGER
194* The number of rows of work array WV available for
195* workspace. NV.GE.NW.
196*
197* WV (global workspace) DOUBLE PRECISION array, dimension
198* (DESCW(LLD_),*)
199*
200* DESCW (global and local input) INTEGER array of dimension DLEN_.
201* The array descriptor for the distributed matrix WV.
202*
203* WORK (local workspace) DOUBLE PRECISION array, dimension LWORK.
204* On exit, WORK(1) is set to an estimate of the optimal value
205* of LWORK for the given values of N, NW, KTOP and KBOT.
206*
207* LWORK (local input) INTEGER
208* The dimension of the work array WORK. LWORK = 2*NW
209* suffices, but greater efficiency may result from larger
210* values of LWORK.
211*
212* If LWORK = -1, then a workspace query is assumed; PDLAQR3
213* only estimates the optimal workspace size for the given
214* values of N, NW, KTOP and KBOT. The estimate is returned
215* in WORK(1). No error message related to LWORK is issued
216* by XERBLA. Neither H nor Z are accessed.
217*
218* IWORK (local workspace) INTEGER array, dimension (LIWORK)
219*
220* LIWORK (local input) INTEGER
221* The length of the workspace array IWORK
222*
223* ================================================================
224* Based on contributions by
225* Robert Granat and Meiyue Shao,
226* Department of Computing Science and HPC2N,
227* Umea University, Sweden
228*
229* ================================================================
230* .. Parameters ..
231 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
232 $ lld_, mb_, m_, nb_, n_, rsrc_
233 INTEGER recmax
234 LOGICAL sortgrad
235 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
236 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
237 $ rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3,
238 $ sortgrad = .false. )
239 DOUBLE PRECISION zero, one
240 PARAMETER ( zero = 0.0d0, one = 1.0d0 )
241* ..
242* .. Local Scalars ..
243 DOUBLE PRECISION aa, bb, beta, cc, cs, dd, evi, evk, foo, s,
244 $ safmax, safmin, smlnum, sn, tau, ulp,
245 $ elem, elem1, elem2, elem3, r1, anorm, rnorm,
246 $ resaed
247 INTEGER i, ifst, ilst, info, infqr, j, jw, k, kcol,
248 $ kend, kln, krow, kwtop, ltop, lwk1, lwk2, lwk3,
249 $ lwkopt, nmin, lldh, lldz, lldt, lldv, lldwv,
250 $ ictxt, nprow, nmax, npcol, myrow, mycol, nb,
251 $ iroffh, m, rcols, taurows, rrows, taucols,
252 $ itau, ir, ipw, nprocs, mloc, iroffhh,
253 $ icoffhh, hhrsrc, hhcsrc, hhrows, hhcols,
254 $ iroffzz, icoffzz, zzrsrc, zzcsrc, zzrows,
255 $ zzcols, ierr, tzrows0, tzcols0, ierr0, ipt0,
256 $ ipz0, ipw0, nb2, round, lilst, kk, lilst0,
257 $ iwrk1, rsrc, csrc, lwk4, lwk5, iwrk2, lwk6,
258 $ lwk7, lwk8, ilwkopt, tzrows, tzcols, nsel,
259 $ npmin, ictxt_new, myrow_new, mycol_new
260 LOGICAL bulge, sorted, lquery
261* ..
262* .. Local Arrays ..
263 INTEGER par( 6 ), descr( dlen_ ),
264 $ desctau( dlen_ ), deschh( dlen_ ),
265 $ desczz( dlen_ ), desctz0( dlen_ ),
266 $ pmap( 64*64 )
267 DOUBLE PRECISION ddum( 1 )
268* ..
269* .. External Functions ..
270 DOUBLE PRECISION dlamch, pdlange
271 INTEGER pilaenvx, numroc, indxg2p, iceil, blacs_pnum
272 EXTERNAL dlamch, pilaenvx, numroc, indxg2p, pdlange,
273 $ mpi_wtime, iceil, blacs_pnum
274* ..
275* .. External Subroutines ..
276 EXTERNAL pdcopy, pdgehrd, pdgemm, dlabad, pdlacpy,
277 $ pdlaqr1, dlanv2, pdlaqr0, pdlarf, pdlarfg,
279 $ pdlamve, blacs_gridinfo, blacs_gridmap,
280 $ blacs_gridexit, pdgemr2d
281* ..
282* .. Intrinsic Functions ..
283 INTRINSIC abs, dble, int, max, min, sqrt
284* ..
285* .. Executable Statements ..
286 ictxt = desch( ctxt_ )
287 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
288 nprocs = nprow*npcol
289*
290* Extract local leading dimensions, blockfactors, offset for
291* keeping the alignment requirements and size of deflation window.
292*
293 lldh = desch( lld_ )
294 lldz = descz( lld_ )
295 lldt = desct( lld_ )
296 lldv = descv( lld_ )
297 lldwv = descw( lld_ )
298 nb = desch( mb_ )
299 iroffh = mod( ktop - 1, nb )
300 jw = min( nw, kbot-ktop+1 )
301 nsel = nb+jw
302*
303* Extract environment variables for parallel eigenvalue reordering.
304*
305 par(1) = pilaenvx(ictxt, 17, 'PDLAQR3', 'SV', jw, nb, -1, -1)
306 par(2) = pilaenvx(ictxt, 18, 'PDLAQR3', 'SV', jw, nb, -1, -1)
307 par(3) = pilaenvx(ictxt, 19, 'PDLAQR3', 'SV', jw, nb, -1, -1)
308 par(4) = pilaenvx(ictxt, 20, 'PDLAQR3', 'SV', jw, nb, -1, -1)
309 par(5) = pilaenvx(ictxt, 21, 'PDLAQR3', 'SV', jw, nb, -1, -1)
310 par(6) = pilaenvx(ictxt, 22, 'PDLAQR3', 'SV', jw, nb, -1, -1)
311*
312* Check if workspace query.
313*
314 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
315*
316* Estimate optimal workspace.
317*
318 IF( jw.LE.2 ) THEN
319 lwkopt = 1
320 ELSE
321*
322* Workspace query calls to PDGEHRD and PDORMHR.
323*
324 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
325 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
326 $ npcol )
327 CALL pdgehrd( jw, 1, jw, t, 1, 1, desct, work, work, -1,
328 $ info )
329 lwk1 = int( work( 1 ) ) + taurows*taucols
330*
331* Workspace query call to PDORMHR.
332*
333 CALL pdormhr( 'Right', 'No', jw, jw, 1, jw, t, 1, 1, desct,
334 $ work, v, 1, 1, descv, work, -1, info )
335 lwk2 = int( work( 1 ) )
336*
337* Workspace query call to PDLAQR0.
338*
339 nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
340 nmax = ( n-1 ) / 3
341 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
342 $ .AND. reclevel.LT.recmax ) THEN
343 CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
344 $ jw+iroffh, t, desct, sr, si, 1, jw, v, descv,
345 $ work, -1, iwork, liwork-nsel, infqr,
346 $ reclevel+1 )
347 lwk3 = int( work( 1 ) )
348 iwrk1 = iwork( 1 )
349 ELSE
350 rsrc = desct( rsrc_ )
351 csrc = desct( csrc_ )
352 desct( rsrc_ ) = 0
353 desct( csrc_ ) = 0
354 CALL pdlaqr1( .true., .true., jw+iroffh, 1, jw+iroffh, t,
355 $ desct, sr, si, 1, jw+iroffh, v, descv, work, -1,
356 $ iwork, liwork-nsel, infqr )
357 desct( rsrc_ ) = rsrc
358 desct( csrc_ ) = csrc
359 lwk3 = int( work( 1 ) )
360 iwrk1 = iwork( 1 )
361 END IF
362*
363* Workspace in case of alignment problems.
364*
365 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
366 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
367 lwk4 = 2 * tzrows0*tzcols0
368*
369* Workspace check for reordering.
370*
371 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
372 $ desct, v, 1, 1, descv, ddum, ddum, mloc, work, -1,
373 $ iwork, liwork-nsel, info )
374 lwk5 = int( work( 1 ) )
375 iwrk2 = iwork( 1 )
376*
377* Extra workspace for reflecting back spike
378* (workspace for PDLARF approximated for simplicity).
379*
380 rrows = numroc( n+iroffh, nb, myrow, descv(rsrc_), nprow )
381 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
382 lwk6 = rrows*rcols + taurows*taucols +
383 $ 2*iceil(iceil(jw+iroffh,nb),nprow)*nb
384 $ *iceil(iceil(jw+iroffh,nb),npcol)*nb
385*
386* Extra workspace needed by PBLAS update calls
387* (also estimated for simplicity).
388*
389 lwk7 = max( iceil(iceil(jw,nb),nprow)*nb *
390 $ iceil(iceil(n-kbot,nb),npcol)*nb,
391 $ iceil(iceil(ihiz-iloz+1,nb),nprow)*nb *
392 $ iceil(iceil(jw,nb),npcol)*nb,
393 $ iceil(iceil(kbot-jw,nb),nprow)*nb *
394 $ iceil(iceil(jw,nb),npcol)*nb )
395*
396* Residual check workspace.
397*
398 lwk8 = 0
399*
400* Optimal workspace.
401*
402 lwkopt = max( lwk1, lwk2, lwk3+lwk4, lwk5, lwk6, lwk7, lwk8 )
403 ilwkopt = max( iwrk1, iwrk2 )
404 END IF
405*
406* Quick return in case of workspace query.
407*
408 work( 1 ) = dble( lwkopt )
409*
410* IWORK(1:NSEL) is used as the array SELECT for PDTRORD.
411*
412 iwork( 1 ) = ilwkopt + nsel
413 IF( lquery )
414 $ RETURN
415*
416* Nothing to do for an empty active block ...
417 ns = 0
418 nd = 0
419 IF( ktop.GT.kbot )
420 $ RETURN
421* ... nor for an empty deflation window.
422*
423 IF( nw.LT.1 )
424 $ RETURN
425*
426* Machine constants.
427*
428 safmin = dlamch( 'SAFE MINIMUM' )
429 safmax = one / safmin
430 CALL dlabad( safmin, safmax )
431 ulp = dlamch( 'PRECISION' )
432 smlnum = safmin*( dble( n ) / ulp )
433*
434* Setup deflation window.
435*
436 jw = min( nw, kbot-ktop+1 )
437 kwtop = kbot - jw + 1
438 IF( kwtop.EQ.ktop ) THEN
439 s = zero
440 ELSE
441 CALL pdelget( 'All', '1-Tree', s, h, kwtop, kwtop-1, desch )
442 END IF
443*
444 IF( kbot.EQ.kwtop ) THEN
445*
446* 1-by-1 deflation window: not much to do.
447*
448 CALL pdelget( 'All', '1-Tree', sr( kwtop ), h, kwtop, kwtop,
449 $ desch )
450 si( kwtop ) = zero
451 ns = 1
452 nd = 0
453 IF( abs( s ).LE.max( smlnum, ulp*abs( sr( kwtop ) ) ) )
454 $ THEN
455 ns = 0
456 nd = 1
457 IF( kwtop.GT.ktop )
458 $ CALL pdelset( h, kwtop, kwtop-1 , desch, zero )
459 END IF
460 RETURN
461 END IF
462*
463 IF( kwtop.EQ.ktop .AND. kbot-kwtop.EQ.1 ) THEN
464*
465* 2-by-2 deflation window: a little more to do.
466*
467 CALL pdelget( 'All', '1-Tree', aa, h, kwtop, kwtop, desch )
468 CALL pdelget( 'All', '1-Tree', bb, h, kwtop, kwtop+1, desch )
469 CALL pdelget( 'All', '1-Tree', cc, h, kwtop+1, kwtop, desch )
470 CALL pdelget( 'All', '1-Tree', dd, h, kwtop+1, kwtop+1, desch )
471 CALL dlanv2( aa, bb, cc, dd, sr(kwtop), si(kwtop),
472 $ sr(kwtop+1), si(kwtop+1), cs, sn )
473 ns = 0
474 nd = 2
475 IF( cc.EQ.zero ) THEN
476 i = kwtop
477 IF( i+2.LE.n .AND. wantt )
478 $ CALL pdrot( n-i-1, h, i, i+2, desch, desch(m_), h, i+1,
479 $ i+2, desch, desch(m_), cs, sn, work, lwork, info )
480 IF( i.GT.1 )
481 $ CALL pdrot( i-1, h, 1, i, desch, 1, h, 1, i+1, desch, 1,
482 $ cs, sn, work, lwork, info )
483 IF( wantz )
484 $ CALL pdrot( ihiz-iloz+1, z, iloz, i, descz, 1, z, iloz,
485 $ i+1, descz, 1, cs, sn, work, lwork, info )
486 CALL pdelset( h, i, i, desch, aa )
487 CALL pdelset( h, i, i+1, desch, bb )
488 CALL pdelset( h, i+1, i, desch, cc )
489 CALL pdelset( h, i+1, i+1, desch, dd )
490 END IF
491 work( 1 ) = dble( lwkopt )
492 RETURN
493 END IF
494*
495* Calculate new value for IROFFH in case deflation window
496* was adjusted.
497*
498 iroffh = mod( kwtop - 1, nb )
499*
500* Adjust number of rows and columns of T matrix descriptor
501* to prepare for call to PDBTRORD.
502*
503 desct( m_ ) = jw+iroffh
504 desct( n_ ) = jw+iroffh
505*
506* Convert to spike-triangular form. (In case of a rare QR failure,
507* this routine continues to do aggressive early deflation using that
508* part of the deflation window that converged using INFQR here and
509* there to keep track.)
510*
511* Copy the trailing submatrix to the working space.
512*
513 CALL pdlaset( 'All', iroffh, jw+iroffh, zero, one, t, 1, 1,
514 $ desct )
515 CALL pdlaset( 'All', jw, iroffh, zero, zero, t, 1+iroffh, 1,
516 $ desct )
517 CALL pdlacpy( 'All', 1, jw, h, kwtop, kwtop, desch, t, 1+iroffh,
518 $ 1+iroffh, desct )
519 CALL pdlacpy( 'Upper', jw-1, jw-1, h, kwtop+1, kwtop, desch, t,
520 $ 1+iroffh+1, 1+iroffh, desct )
521 IF( jw.GT.2 )
522 $ CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 1+iroffh+2,
523 $ 1+iroffh, desct )
524 CALL pdlacpy( 'All', jw-1, 1, h, kwtop+1, kwtop+jw-1, desch, t,
525 $ 1+iroffh+1, 1+iroffh+jw-1, desct )
526*
527* Initialize the working orthogonal matrix.
528*
529 CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one, v, 1, 1,
530 $ descv )
531*
532* Compute the Schur form of T.
533*
534 npmin = pilaenvx( ictxt, 23, 'PDLAQR3', 'SV', jw, nb, nprow,
535 $ npcol )
536 nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
537 nmax = ( n-1 ) / 3
538 IF( min(nprow, npcol).LE.npmin+1 .OR. reclevel.GE.1 ) THEN
539*
540* The AED window is large enough.
541* Compute the Schur decomposition with all processors.
542*
543 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
544 $ .AND. reclevel.LT.recmax ) THEN
545 CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
546 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
547 $ si( kwtop-iroffh ), 1+iroffh, jw+iroffh, v, descv,
548 $ work, lwork, iwork(nsel+1), liwork-nsel, infqr,
549 $ reclevel+1 )
550 ELSE
551 IF( desct(rsrc_).EQ.0 .AND. desct(csrc_).EQ.0 ) THEN
552 IF( jw+iroffh.GT.desct( mb_ ) ) THEN
553 CALL pdlaqr1( .true., .true., jw+iroffh, 1,
554 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
555 $ si( kwtop-iroffh ), 1, jw+iroffh, v,
556 $ descv, work, lwork, iwork(nsel+1), liwork-nsel,
557 $ infqr )
558 ELSE
559 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
560 CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
561 $ jw+iroffh, t, desct(lld_),
562 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
563 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
564 ELSE
565 infqr = 0
566 END IF
567 IF( nprocs.GT.1 )
568 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
569 $ 1, -1, -1, -1, -1, -1 )
570 END IF
571 ELSEIF( jw+iroffh.LE.desct( mb_ ) ) THEN
572 IF( myrow.EQ.desct(rsrc_) .AND. mycol.EQ.desct(csrc_) )
573 $ THEN
574 CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
575 $ jw+iroffh, t, desct(lld_),
576 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
577 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
578 ELSE
579 infqr = 0
580 END IF
581 IF( nprocs.GT.1 )
582 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
583 $ 1, -1, -1, -1, -1, -1 )
584 ELSE
585 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
586 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
587 CALL descinit( desctz0, jw+iroffh, jw+iroffh, nb, nb, 0,
588 $ 0, ictxt, max(1,tzrows0), ierr0 )
589 ipt0 = 1
590 ipz0 = ipt0 + max(1,tzrows0)*tzcols0
591 ipw0 = ipz0 + max(1,tzrows0)*tzcols0
592 CALL pdlamve( 'All', jw+iroffh, jw+iroffh, t, 1, 1,
593 $ desct, work(ipt0), 1, 1, desctz0, work(ipw0) )
594 CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one,
595 $ work(ipz0), 1, 1, desctz0 )
596 CALL pdlaqr1( .true., .true., jw+iroffh, 1,
597 $ jw+iroffh, work(ipt0), desctz0,
598 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
599 $ 1, jw+iroffh, work(ipz0),
600 $ desctz0, work(ipw0), lwork-ipw0+1, iwork(nsel+1),
601 $ liwork-nsel, infqr )
602 CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipt0), 1,
603 $ 1, desctz0, t, 1, 1, desct, work(ipw0) )
604 CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipz0), 1,
605 $ 1, desctz0, v, 1, 1, descv, work(ipw0) )
606 END IF
607 END IF
608 ELSE
609*
610* The AED window is too small.
611* Redistribute the AED window to a subgrid
612* and do the computation on the subgrid.
613*
614 ictxt_new = ictxt
615 DO 20 i = 0, npmin-1
616 DO 10 j = 0, npmin-1
617 pmap( j+1+i*npmin ) = blacs_pnum( ictxt, i, j )
618 10 CONTINUE
619 20 CONTINUE
620 CALL blacs_gridmap( ictxt_new, pmap, npmin, npmin, npmin )
621 CALL blacs_gridinfo( ictxt_new, npmin, npmin, myrow_new,
622 $ mycol_new )
623 IF( myrow.GE.npmin .OR. mycol.GE.npmin ) ictxt_new = -1
624 IF( ictxt_new.GE.0 ) THEN
625 tzrows0 = numroc( jw, nb, myrow_new, 0, npmin )
626 tzcols0 = numroc( jw, nb, mycol_new, 0, npmin )
627 CALL descinit( desctz0, jw, jw, nb, nb, 0,
628 $ 0, ictxt_new, max(1,tzrows0), ierr0 )
629 ipt0 = 1
630 ipz0 = ipt0 + max(1,tzrows0)*max(1,tzcols0)
631 ipw0 = ipz0 + max(1,tzrows0)*max(1,tzcols0)
632 ELSE
633 ipt0 = 1
634 ipz0 = 2
635 ipw0 = 3
636 desctz0( ctxt_ ) = -1
637 infqr = 0
638 END IF
639 CALL pdgemr2d( jw, jw, t, 1+iroffh, 1+iroffh, desct,
640 $ work(ipt0), 1, 1, desctz0, ictxt )
641 IF( ictxt_new.GE.0 ) THEN
642 CALL pdlaset( 'All', jw, jw, zero, one, work(ipz0), 1, 1,
643 $ desctz0 )
644 nmin = pilaenvx( ictxt_new, 12, 'PDLAQR3', 'SV', jw, 1, jw,
645 $ lwork )
646 IF( jw.GT.nmin .AND. jw.LE.nmax .AND. reclevel.LT.1 ) THEN
647 CALL pdlaqr0( .true., .true., jw, 1, jw, work(ipt0),
648 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
649 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
650 $ iwork(nsel+1), liwork-nsel, infqr,
651 $ reclevel+1 )
652 ELSE
653 CALL pdlaqr1( .true., .true., jw, 1, jw, work(ipt0),
654 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
655 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
656 $ iwork(nsel+1), liwork-nsel, infqr )
657 END IF
658 END IF
659 CALL pdgemr2d( jw, jw, work(ipt0), 1, 1, desctz0, t, 1+iroffh,
660 $ 1+iroffh, desct, ictxt )
661 CALL pdgemr2d( jw, jw, work(ipz0), 1, 1, desctz0, v, 1+iroffh,
662 $ 1+iroffh, descv, ictxt )
663 IF( ictxt_new.GE.0 )
664 $ CALL blacs_gridexit( ictxt_new )
665 IF( myrow+mycol.GT.0 ) THEN
666 DO 40 j = 0, jw-1
667 sr( kwtop+j ) = zero
668 si( kwtop+j ) = zero
669 40 CONTINUE
670 END IF
671 CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr, 1, -1, -1,
672 $ -1, -1, -1 )
673 CALL dgsum2d( ictxt, 'All', ' ', jw, 1, sr(kwtop), jw, -1, -1 )
674 CALL dgsum2d( ictxt, 'All', ' ', jw, 1, si(kwtop), jw, -1, -1 )
675 END IF
676*
677* Adjust INFQR for offset from block border in submatrices.
678*
679 IF( infqr.NE.0 )
680 $ infqr = infqr - iroffh
681*
682* PDTRORD needs a clean margin near the diagonal.
683*
684 DO 50 j = 1, jw - 3
685 CALL pdelset( t, j+2, j, desct, zero )
686 CALL pdelset( t, j+3, j, desct, zero )
687 50 CONTINUE
688 IF( jw.GT.2 )
689 $ CALL pdelset( t, jw, jw-2, desct, zero )
690*
691* Check local residual for AED Schur decomposition.
692*
693 resaed = 0.0d+00
694*
695* Clean up the array SELECT for PDTRORD.
696*
697 DO 60 j = 1, nsel
698 iwork( j ) = 0
699 60 CONTINUE
700*
701* Set local M counter to zero.
702*
703 mloc = 0
704*
705* Outer deflation detection loop (label 80).
706* In this loop a bunch of undeflatable eigenvalues
707* are moved simultaneously.
708*
709 DO 70 j = 1, iroffh + infqr
710 iwork( j ) = 1
711 70 CONTINUE
712*
713 ns = jw
714 ilst = infqr + 1 + iroffh
715 IF( ilst.GT.1 ) THEN
716 CALL pdelget( 'All', '1-Tree', elem, t, ilst, ilst-1, desct )
717 bulge = elem.NE.zero
718 IF( bulge ) ilst = ilst+1
719 END IF
720*
721 80 CONTINUE
722 IF( ilst.LE.ns+iroffh ) THEN
723*
724* Find the top-left corner of the local window.
725*
726 lilst = max(ilst,ns+iroffh-nb+1)
727 IF( lilst.GT.1 ) THEN
728 CALL pdelget( 'All', '1-Tree', elem, t, lilst, lilst-1,
729 $ desct )
730 bulge = elem.NE.zero
731 IF( bulge ) lilst = lilst+1
732 END IF
733*
734* Lock all eigenvalues outside the local window.
735*
736 DO 90 j = iroffh+1, lilst-1
737 iwork( j ) = 1
738 90 CONTINUE
739 lilst0 = lilst
740*
741* Inner deflation detection loop (label 100).
742* In this loop, the undeflatable eigenvalues are moved to the
743* top-left corner of the local window.
744*
745 100 CONTINUE
746 IF( lilst.LE.ns+iroffh ) THEN
747 IF( ns.EQ.1 ) THEN
748 bulge = .false.
749 ELSE
750 CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
751 $ ns+iroffh-1, desct )
752 bulge = elem.NE.zero
753 END IF
754*
755* Small spike tip test for deflation.
756*
757 IF( .NOT.bulge ) THEN
758*
759* Real eigenvalue.
760*
761 CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
762 $ ns+iroffh, desct )
763 foo = abs( elem )
764 IF( foo.EQ.zero )
765 $ foo = abs( s )
766 CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
767 $ ns+iroffh, descv )
768 IF( abs( s*elem ).LE.max( smlnum, ulp*foo ) ) THEN
769*
770* Deflatable.
771*
772 ns = ns - 1
773 ELSE
774*
775* Undeflatable: move it up out of the way.
776*
777 ifst = ns
778 DO 110 j = lilst, jw+iroffh
779 iwork( j ) = 0
780 110 CONTINUE
781 iwork( ifst+iroffh ) = 1
782 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
783 $ 1, desct, v, 1, 1, descv, work,
784 $ work(jw+iroffh+1), mloc,
785 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
786 $ iwork(nsel+1), liwork-nsel, info )
787*
788* Adjust the array SELECT explicitly so that it does not
789* rely on the output of PDTRORD.
790*
791 iwork( ifst+iroffh ) = 0
792 iwork( lilst ) = 1
793 lilst = lilst + 1
794*
795* In case of a rare exchange failure, adjust the
796* pointers ILST and LILST to the current place to avoid
797* unexpected behaviors.
798*
799 IF( info.NE.0 ) THEN
800 lilst = max(info, lilst)
801 ilst = max(info, ilst)
802 END IF
803 END IF
804 ELSE
805*
806* Complex conjugate pair.
807*
808 CALL pdelget( 'All', '1-Tree', elem1, t, ns+iroffh,
809 $ ns+iroffh, desct )
810 CALL pdelget( 'All', '1-Tree', elem2, t, ns+iroffh,
811 $ ns+iroffh-1, desct )
812 CALL pdelget( 'All', '1-Tree', elem3, t, ns+iroffh-1,
813 $ ns+iroffh, desct )
814 foo = abs( elem1 ) + sqrt( abs( elem2 ) )*
815 $ sqrt( abs( elem3 ) )
816 IF( foo.EQ.zero )
817 $ foo = abs( s )
818 CALL pdelget( 'All', '1-Tree', elem1, v, 1+iroffh,
819 $ ns+iroffh, descv )
820 CALL pdelget( 'All', '1-Tree', elem2, v, 1+iroffh,
821 $ ns+iroffh-1, descv )
822 IF( max( abs( s*elem1 ), abs( s*elem2 ) ).LE.
823 $ max( smlnum, ulp*foo ) ) THEN
824*
825* Deflatable.
826*
827 ns = ns - 2
828 ELSE
829*
830* Undeflatable: move them up out of the way.
831*
832 ifst = ns
833 DO 120 j = lilst, jw+iroffh
834 iwork( j ) = 0
835 120 CONTINUE
836 iwork( ifst+iroffh ) = 1
837 iwork( ifst+iroffh-1 ) = 1
838 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
839 $ 1, desct, v, 1, 1, descv, work,
840 $ work(jw+iroffh+1), mloc,
841 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
842 $ iwork(nsel+1), liwork-nsel, info )
843*
844* Adjust the array SELECT explicitly so that it does not
845* rely on the output of PDTRORD.
846*
847 iwork( ifst+iroffh ) = 0
848 iwork( ifst+iroffh-1 ) = 0
849 iwork( lilst ) = 1
850 iwork( lilst+1 ) = 1
851 lilst = lilst + 2
852*
853* In case of a rare exchange failure, adjust the
854* pointers ILST and LILST to the current place to avoid
855* unexpected behaviors.
856*
857 IF( info.NE.0 ) THEN
858 lilst = max(info, lilst)
859 ilst = max(info, ilst)
860 END IF
861 END IF
862 END IF
863*
864* End of inner deflation detection loop.
865*
866 GO TO 100
867 END IF
868*
869* Unlock the eigenvalues outside the local window.
870* Then undeflatable eigenvalues are moved to the proper position.
871*
872 DO 130 j = ilst, lilst0-1
873 iwork( j ) = 0
874 130 CONTINUE
875 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
876 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1),
877 $ m, work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
878 $ iwork(nsel+1), liwork-nsel, info )
879 ilst = m + 1
880*
881* In case of a rare exchange failure, adjust the pointer ILST to
882* the current place to avoid unexpected behaviors.
883*
884 IF( info.NE.0 )
885 $ ilst = max(info, ilst)
886*
887* End of outer deflation detection loop.
888*
889 GO TO 80
890 END IF
891
892*
893* Post-reordering step: copy output eigenvalues to output.
894*
895 CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
896 CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
897*
898* Check local residual for reordered AED Schur decomposition.
899*
900 resaed = 0.0d+00
901*
902* Return to Hessenberg form.
903*
904 IF( ns.EQ.0 )
905 $ s = zero
906*
907 IF( ns.LT.jw .AND. sortgrad ) THEN
908*
909* Sorting diagonal blocks of T improves accuracy for
910* graded matrices. Bubble sort deals well with exchange
911* failures. Eigenvalues/shifts from T are also restored.
912*
913 round = 0
914 sorted = .false.
915 i = ns + 1 + iroffh
916 140 CONTINUE
917 IF( sorted )
918 $ GO TO 180
919 sorted = .true.
920 round = round + 1
921*
922 kend = i - 1
923 i = infqr + 1 + iroffh
924 IF( i.EQ.ns+iroffh ) THEN
925 k = i + 1
926 ELSE IF( si( kwtop-iroffh + i-1 ).EQ.zero ) THEN
927 k = i + 1
928 ELSE
929 k = i + 2
930 END IF
931 150 CONTINUE
932 IF( k.LE.kend ) THEN
933 IF( k.EQ.i+1 ) THEN
934 evi = abs( sr( kwtop-iroffh+i-1 ) )
935 ELSE
936 evi = abs( sr( kwtop-iroffh+i-1 ) ) +
937 $ abs( si( kwtop-iroffh+i-1 ) )
938 END IF
939*
940 IF( k.EQ.kend ) THEN
941 evk = abs( sr( kwtop-iroffh+k-1 ) )
942 ELSEIF( si( kwtop-iroffh+k-1 ).EQ.zero ) THEN
943 evk = abs( sr( kwtop-iroffh+k-1 ) )
944 ELSE
945 evk = abs( sr( kwtop-iroffh+k-1 ) ) +
946 $ abs( si( kwtop-iroffh+k-1 ) )
947 END IF
948*
949 IF( evi.GE.evk ) THEN
950 i = k
951 ELSE
952 mloc = 0
953 sorted = .false.
954 ifst = i
955 ilst = k
956 DO 160 j = 1, i-1
957 iwork( j ) = 1
958 mloc = mloc + 1
959 160 CONTINUE
960 IF( k.EQ.i+2 ) THEN
961 iwork( i ) = 0
962 iwork(i+1) = 0
963 ELSE
964 iwork( i ) = 0
965 END IF
966 IF( k.NE.kend .AND. si( kwtop-iroffh+k-1 ).NE.zero ) THEN
967 iwork( k ) = 1
968 iwork(k+1) = 1
969 mloc = mloc + 2
970 ELSE
971 iwork( k ) = 1
972 IF( k.LT.kend ) iwork(k+1) = 0
973 mloc = mloc + 1
974 END IF
975 DO 170 j = k+2, jw+iroffh
976 iwork( j ) = 0
977 170 CONTINUE
978 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
979 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1), m,
980 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
981 $ iwork(nsel+1), liwork-nsel, ierr )
982 CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
983 CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
984 IF( ierr.EQ.0 ) THEN
985 i = ilst
986 ELSE
987 i = k
988 END IF
989 END IF
990 IF( i.EQ.kend ) THEN
991 k = i + 1
992 ELSE IF( si( kwtop-iroffh+i-1 ).EQ.zero ) THEN
993 k = i + 1
994 ELSE
995 k = i + 2
996 END IF
997 GO TO 150
998 END IF
999 GO TO 140
1000 180 CONTINUE
1001 END IF
1002*
1003* Restore number of rows and columns of T matrix descriptor.
1004*
1005 desct( m_ ) = nw+iroffh
1006 desct( n_ ) = nh+iroffh
1007*
1008 IF( ns.LT.jw .OR. s.EQ.zero ) THEN
1009 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1010*
1011* Reflect spike back into lower triangle.
1012*
1013 rrows = numroc( ns+iroffh, nb, myrow, descv(rsrc_), nprow )
1014 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
1015 CALL descinit( descr, ns+iroffh, 1, nb, 1, descv(rsrc_),
1016 $ descv(csrc_), ictxt, max(1, rrows), info )
1017 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
1018 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
1019 $ npcol )
1020 CALL descinit( desctau, 1, jw+iroffh, 1, nb, descv(rsrc_),
1021 $ descv(csrc_), ictxt, max(1, taurows), info )
1022*
1023 ir = 1
1024 itau = ir + descr( lld_ ) * rcols
1025 ipw = itau + desctau( lld_ ) * taucols
1026*
1027 CALL pdlaset( 'All', ns+iroffh, 1, zero, zero, work(itau),
1028 $ 1, 1, desctau )
1029*
1030 CALL pdcopy( ns, v, 1+iroffh, 1+iroffh, descv, descv(m_),
1031 $ work(ir), 1+iroffh, 1, descr, 1 )
1032 CALL pdlarfg( ns, beta, 1+iroffh, 1, work(ir), 2+iroffh, 1,
1033 $ descr, 1, work(itau+iroffh) )
1034 CALL pdelset( work(ir), 1+iroffh, 1, descr, one )
1035*
1036 CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 3+iroffh,
1037 $ 1+iroffh, desct )
1038*
1039 CALL pdlarf( 'Left', ns, jw, work(ir), 1+iroffh, 1, descr,
1040 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1041 $ desct, work( ipw ) )
1042 CALL pdlarf( 'Right', ns, ns, work(ir), 1+iroffh, 1, descr,
1043 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1044 $ desct, work( ipw ) )
1045 CALL pdlarf( 'Right', jw, ns, work(ir), 1+iroffh, 1, descr,
1046 $ 1, work(itau+iroffh), v, 1+iroffh, 1+iroffh,
1047 $ descv, work( ipw ) )
1048*
1049 itau = 1
1050 ipw = itau + desctau( lld_ ) * taucols
1051 CALL pdgehrd( jw+iroffh, 1+iroffh, ns+iroffh, t, 1, 1,
1052 $ desct, work(itau), work( ipw ), lwork-ipw+1, info )
1053 END IF
1054*
1055* Copy updated reduced window into place.
1056*
1057 IF( kwtop.GT.1 ) THEN
1058 CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
1059 $ 1+iroffh, descv )
1060 CALL pdelset( h, kwtop, kwtop-1, desch, s*elem )
1061 END IF
1062 CALL pdlacpy( 'Upper', jw-1, jw-1, t, 1+iroffh+1, 1+iroffh,
1063 $ desct, h, kwtop+1, kwtop, desch )
1064 CALL pdlacpy( 'All', 1, jw, t, 1+iroffh, 1+iroffh, desct, h,
1065 $ kwtop, kwtop, desch )
1066 CALL pdlacpy( 'All', jw-1, 1, t, 1+iroffh+1, 1+iroffh+jw-1,
1067 $ desct, h, kwtop+1, kwtop+jw-1, desch )
1068*
1069* Accumulate orthogonal matrix in order to update
1070* H and Z, if requested.
1071*
1072 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1073 CALL pdormhr( 'Right', 'No', jw+iroffh, ns+iroffh, 1+iroffh,
1074 $ ns+iroffh, t, 1, 1, desct, work(itau), v, 1,
1075 $ 1, descv, work( ipw ), lwork-ipw+1, info )
1076 END IF
1077*
1078* Update vertical slab in H.
1079*
1080 IF( wantt ) THEN
1081 ltop = 1
1082 ELSE
1083 ltop = ktop
1084 END IF
1085 kln = max( 0, kwtop-ltop )
1086 iroffhh = mod( ltop-1, nb )
1087 icoffhh = mod( kwtop-1, nb )
1088 hhrsrc = indxg2p( ltop, nb, myrow, desch(rsrc_), nprow )
1089 hhcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
1090 hhrows = numroc( kln+iroffhh, nb, myrow, hhrsrc, nprow )
1091 hhcols = numroc( jw+icoffhh, nb, mycol, hhcsrc, npcol )
1092 CALL descinit( deschh, kln+iroffhh, jw+icoffhh, nb, nb,
1093 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1094 CALL pdgemm( 'No', 'No', kln, jw, jw, one, h, ltop,
1095 $ kwtop, desch, v, 1+iroffh, 1+iroffh, descv, zero,
1096 $ work, 1+iroffhh, 1+icoffhh, deschh )
1097 CALL pdlacpy( 'All', kln, jw, work, 1+iroffhh, 1+icoffhh,
1098 $ deschh, h, ltop, kwtop, desch )
1099*
1100* Update horizontal slab in H.
1101*
1102 IF( wantt ) THEN
1103 kln = n-kbot
1104 iroffhh = mod( kwtop-1, nb )
1105 icoffhh = mod( kbot, nb )
1106 hhrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
1107 hhcsrc = indxg2p( kbot+1, nb, mycol, desch(csrc_), npcol )
1108 hhrows = numroc( jw+iroffhh, nb, myrow, hhrsrc, nprow )
1109 hhcols = numroc( kln+icoffhh, nb, mycol, hhcsrc, npcol )
1110 CALL descinit( deschh, jw+iroffhh, kln+icoffhh, nb, nb,
1111 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1112 CALL pdgemm( 'Tr', 'No', jw, kln, jw, one, v,
1113 $ 1+iroffh, 1+iroffh, descv, h, kwtop, kbot+1,
1114 $ desch, zero, work, 1+iroffhh, 1+icoffhh, deschh )
1115 CALL pdlacpy( 'All', jw, kln, work, 1+iroffhh, 1+icoffhh,
1116 $ deschh, h, kwtop, kbot+1, desch )
1117 END IF
1118*
1119* Update vertical slab in Z.
1120*
1121 IF( wantz ) THEN
1122 kln = ihiz-iloz+1
1123 iroffzz = mod( iloz-1, nb )
1124 icoffzz = mod( kwtop-1, nb )
1125 zzrsrc = indxg2p( iloz, nb, myrow, descz(rsrc_), nprow )
1126 zzcsrc = indxg2p( kwtop, nb, mycol, descz(csrc_), npcol )
1127 zzrows = numroc( kln+iroffzz, nb, myrow, zzrsrc, nprow )
1128 zzcols = numroc( jw+icoffzz, nb, mycol, zzcsrc, npcol )
1129 CALL descinit( desczz, kln+iroffzz, jw+icoffzz, nb, nb,
1130 $ zzrsrc, zzcsrc, ictxt, max(1, zzrows), ierr )
1131 CALL pdgemm( 'No', 'No', kln, jw, jw, one, z, iloz,
1132 $ kwtop, descz, v, 1+iroffh, 1+iroffh, descv,
1133 $ zero, work, 1+iroffzz, 1+icoffzz, desczz )
1134 CALL pdlacpy( 'All', kln, jw, work, 1+iroffzz, 1+icoffzz,
1135 $ desczz, z, iloz, kwtop, descz )
1136 END IF
1137 END IF
1138*
1139* Return the number of deflations (ND) and the number of shifts (NS).
1140* (Subtracting INFQR from the spike length takes care of the case of
1141* a rare QR failure while calculating eigenvalues of the deflation
1142* window.)
1143*
1144 nd = jw - ns
1145 ns = ns - infqr
1146*
1147* Return optimal workspace.
1148*
1149 work( 1 ) = dble( lwkopt )
1150 iwork( 1 ) = ilwkopt + nsel
1151*
1152* End of PDLAQR3
1153*
1154 END
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pdblastst.f:6862
subroutine pdelget(scope, top, alpha, a, ia, ja, desca)
Definition pdelget.f:2
subroutine pdelset(a, ia, ja, desca, alpha)
Definition pdelset.f:2
subroutine pdgehrd(n, ilo, ihi, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgehrd.f:3
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pdlacpy.f:3
subroutine pdlamve(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb, dwork)
Definition pdlamve.f:3
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3
recursive subroutine pdlaqr0(wantt, wantz, n, ilo, ihi, h, desch, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, liwork, info, reclevel)
Definition pdlaqr0.f:4
recursive subroutine pdlaqr1(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pdlaqr1.f:5
recursive subroutine pdlaqr3(wantt, wantz, n, ktop, kbot, nw, h, desch, iloz, ihiz, z, descz, ns, nd, sr, si, v, descv, nh, t, desct, nv, wv, descw, work, lwork, iwork, liwork, reclevel)
Definition pdlaqr3.f:6
subroutine pdlarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pdlarf.f:3
subroutine pdlarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pdlarfg.f:3
subroutine pdormhr(side, trans, m, n, ilo, ihi, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition pdormhr.f:3
subroutine pdrot(n, x, ix, jx, descx, incx, y, iy, jy, descy, incy, cs, sn, work, lwork, info)
Definition pdrot.f:3
subroutine pdtrord(compq, select, para, n, t, it, jt, desct, q, iq, jq, descq, wr, wi, m, work, lwork, iwork, liwork, info)
Definition pdtrord.f:4
integer function pilaenvx(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pilaenvx.f:3
double precision function dlamch(cmach)
Definition tools.f:10