SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pdlaqr0()

recursive subroutine pdlaqr0 ( logical  wantt,
logical  wantz,
integer  n,
integer  ilo,
integer  ihi,
double precision, dimension( * )  h,
integer, dimension( * )  desch,
double precision, dimension( n )  wr,
double precision, dimension( n )  wi,
integer  iloz,
integer  ihiz,
double precision, dimension( * )  z,
integer, dimension( * )  descz,
double precision, dimension( * )  work,
integer  lwork,
integer, dimension( * )  iwork,
integer  liwork,
integer  info,
integer  reclevel 
)

Definition at line 1 of file pdlaqr0.f.

4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK auxiliary routine (version 2.0.1) --
9* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
10* Univ. of Colorado Denver and University of California, Berkeley.
11* January, 2012
12*
13 IMPLICIT NONE
14*
15* .. Scalar Arguments ..
16 INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LIWORK, LWORK, N,
17 $ RECLEVEL
18 LOGICAL WANTT, WANTZ
19* ..
20* .. Array Arguments ..
21 INTEGER DESCH( * ), DESCZ( * ), IWORK( * )
22 DOUBLE PRECISION H( * ), WI( N ), WORK( * ), WR( N ),
23 $ Z( * )
24* ..
25*
26* Purpose
27* =======
28*
29* PDLAQR0 computes the eigenvalues of a Hessenberg matrix H
30* and, optionally, the matrices T and Z from the Schur decomposition
31* H = Z*T*Z**T, where T is an upper quasi-triangular matrix (the
32* Schur form), and Z is the orthogonal matrix of Schur vectors.
33*
34* Optionally Z may be postmultiplied into an input orthogonal
35* matrix Q so that this routine can give the Schur factorization
36* of a matrix A which has been reduced to the Hessenberg form H
37* by the orthogonal matrix Q:
38* A = Q * H * Q**T = (QZ) * T * (QZ)**T.
39*
40* Notes
41* =====
42*
43* Each global data object is described by an associated description
44* vector. This vector stores the information required to establish
45* the mapping between an object element and its corresponding process
46* and memory location.
47*
48* Let A be a generic term for any 2D block cyclicly distributed array.
49* Such a global array has an associated description vector DESCA.
50* In the following comments, the character _ should be read as
51* "of the global array".
52*
53* NOTATION STORED IN EXPLANATION
54* --------------- -------------- --------------------------------------
55* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
56* DTYPE_A = 1.
57* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58* the BLACS process grid A is distribu-
59* ted over. The context itself is glo-
60* bal, but the handle (the integer
61* value) may vary.
62* M_A (global) DESCA( M_ ) The number of rows in the global
63* array A.
64* N_A (global) DESCA( N_ ) The number of columns in the global
65* array A.
66* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
67* the rows of the array.
68* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
69* the columns of the array.
70* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71* row of the array A is distributed.
72* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73* first column of the array A is
74* distributed.
75* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
76* array. LLD_A >= MAX(1,LOCr(M_A)).
77*
78* Let K be the number of rows or columns of a distributed matrix,
79* and assume that its process grid has dimension p x q.
80* LOCr( K ) denotes the number of elements of K that a process
81* would receive if K were distributed over the p processes of its
82* process column.
83* Similarly, LOCc( K ) denotes the number of elements of K that a
84* process would receive if K were distributed over the q processes of
85* its process row.
86* The values of LOCr() and LOCc() may be determined via a call to the
87* ScaLAPACK tool function, NUMROC:
88* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90* An upper bound for these quantities may be computed by:
91* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93*
94* Arguments
95* =========
96*
97* WANTT (global input) LOGICAL
98* = .TRUE. : the full Schur form T is required;
99* = .FALSE.: only eigenvalues are required.
100*
101* WANTZ (global input) LOGICAL
102* = .TRUE. : the matrix of Schur vectors Z is required;
103* = .FALSE.: Schur vectors are not required.
104*
105* N (global input) INTEGER
106* The order of the Hessenberg matrix H (and Z if WANTZ).
107* N >= 0.
108*
109* ILO (global input) INTEGER
110* IHI (global input) INTEGER
111* It is assumed that H is already upper triangular in rows
112* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
113* set by a previous call to PDGEBAL, and then passed to PDGEHRD
114* when the matrix output by PDGEBAL is reduced to Hessenberg
115* form. Otherwise ILO and IHI should be set to 1 and N
116* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
117* If N = 0, then ILO = 1 and IHI = 0.
118*
119* H (global input/output) DOUBLE PRECISION array, dimension
120* (DESCH(LLD_),*)
121* On entry, the upper Hessenberg matrix H.
122* On exit, if JOB = 'S', H is upper quasi-triangular in
123* rows and columns ILO:IHI, with 1-by-1 and 2-by-2 blocks on
124* the main diagonal. The 2-by-2 diagonal blocks (corresponding
125* to complex conjugate pairs of eigenvalues) are returned in
126* standard form, with H(i,i) = H(i+1,i+1) and
127* H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the
128* contents of H are unspecified on exit.
129*
130* DESCH (global and local input) INTEGER array of dimension DLEN_.
131* The array descriptor for the distributed matrix H.
132*
133* WR (global output) DOUBLE PRECISION array, dimension (N)
134* WI (global output) DOUBLE PRECISION array, dimension (N)
135* The real and imaginary parts, respectively, of the computed
136* eigenvalues ILO to IHI are stored in the corresponding
137* elements of WR and WI. If two eigenvalues are computed as a
138* complex conjugate pair, they are stored in consecutive
139* elements of WR and WI, say the i-th and (i+1)th, with
140* WI(i) > 0 and WI(i+1) < 0. If JOB = 'S', the
141* eigenvalues are stored in the same order as on the diagonal
142* of the Schur form returned in H.
143*
144* Z (global input/output) DOUBLE PRECISION array.
145* If COMPZ = 'V', on entry Z must contain the current
146* matrix Z of accumulated transformations from, e.g., PDGEHRD,
147* and on exit Z has been updated; transformations are applied
148* only to the submatrix Z(ILO:IHI,ILO:IHI).
149* If COMPZ = 'N', Z is not referenced.
150* If COMPZ = 'I', on entry Z need not be set and on exit,
151* if INFO = 0, Z contains the orthogonal matrix Z of the Schur
152* vectors of H.
153*
154* DESCZ (global and local input) INTEGER array of dimension DLEN_.
155* The array descriptor for the distributed matrix Z.
156*
157* WORK (local workspace) DOUBLE PRECISION array, dimension(DWORK)
158*
159* LWORK (local input) INTEGER
160* The length of the workspace array WORK.
161*
162* IWORK (local workspace) INTEGER array, dimension (LIWORK)
163*
164* LIWORK (local input) INTEGER
165* The length of the workspace array IWORK.
166*
167* INFO (output) INTEGER
168* = 0: successful exit
169* .LT. 0: if INFO = -i, the i-th argument had an illegal
170* value
171* .GT. 0: if INFO = i, PDLAQR0 failed to compute all of
172* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR
173* and WI contain those eigenvalues which have been
174* successfully computed. (Failures are rare.)
175*
176* If INFO .GT. 0 and JOB = 'E', then on exit, the
177* remaining unconverged eigenvalues are the eigen-
178* values of the upper Hessenberg matrix rows and
179* columns ILO through INFO of the final, output
180* value of H.
181*
182* If INFO .GT. 0 and JOB = 'S', then on exit
183*
184* (*) (initial value of H)*U = U*(final value of H)
185*
186* where U is an orthogonal matrix. The final
187* value of H is upper Hessenberg and quasi-triangular
188* in rows and columns INFO+1 through IHI.
189*
190* If INFO .GT. 0 and COMPZ = 'V', then on exit
191*
192* (final value of Z) = (initial value of Z)*U
193*
194* where U is the orthogonal matrix in (*) (regard-
195* less of the value of JOB.)
196*
197* If INFO .GT. 0 and COMPZ = 'I', then on exit
198* (final value of Z) = U
199* where U is the orthogonal matrix in (*) (regard-
200* less of the value of JOB.)
201*
202* If INFO .GT. 0 and COMPZ = 'N', then Z is not
203* accessed.
204*
205* ================================================================
206* Based on contributions by
207* Robert Granat, Department of Computing Science and HPC2N,
208* Umea University, Sweden.
209* ================================================================
210*
211* Restrictions: The block size in H and Z must be square and larger
212* than or equal to six (6) due to restrictions in PDLAQR1, PDLAQR5
213* and DLAQR6. Moreover, H and Z need to be distributed identically
214* with the same context.
215*
216* ================================================================
217* References:
218* K. Braman, R. Byers, and R. Mathias,
219* The Multi-Shift QR Algorithm Part I: Maintaining Well Focused
220* Shifts, and Level 3 Performance.
221* SIAM J. Matrix Anal. Appl., 23(4):929--947, 2002.
222*
223* K. Braman, R. Byers, and R. Mathias,
224* The Multi-Shift QR Algorithm Part II: Aggressive Early
225* Deflation.
226* SIAM J. Matrix Anal. Appl., 23(4):948--973, 2002.
227*
228* R. Granat, B. Kagstrom, and D. Kressner,
229* A Novel Parallel QR Algorithm for Hybrid Distributed Momory HPC
230* Systems.
231* SIAM J. Sci. Comput., 32(4):2345--2378, 2010.
232*
233* ================================================================
234*
235* .. Parameters ..
236*
237* ==== Exceptional deflation windows: try to cure rare
238* . slow convergence by increasing the size of the
239* . deflation window after KEXNW iterations. =====
240*
241* ==== Exceptional shifts: try to cure rare slow convergence
242* . with ad-hoc exceptional shifts every KEXSH iterations.
243* . The constants WILK1 and WILK2 are used to form the
244* . exceptional shifts. ====
245*
246 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
247 $ LLD_, MB_, M_, NB_, N_, RSRC_
248 INTEGER RECMAX
249 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
250 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
251 $ rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3 )
252 INTEGER NTINY
253 parameter( ntiny = 11 )
254 INTEGER KEXNW, KEXSH
255 parameter( kexnw = 5, kexsh = 6 )
256 DOUBLE PRECISION WILK1, WILK2
257 parameter( wilk1 = 0.75d0, wilk2 = -0.4375d0 )
258 DOUBLE PRECISION ZERO, ONE
259 parameter( zero = 0.0d0, one = 1.0d0 )
260* ..
261* .. Local Scalars ..
262 DOUBLE PRECISION AA, BB, CC, CS, DD, SN, SS, SWAP, ELEM, T0,
263 $ ELEM1, ELEM2, ELEM3, ALPHA, SDSUM, STAMP
264 INTEGER I, J, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS,
265 $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS,
266 $ LWKOPT, NDFL, NH, NHO, NIBBLE, NMIN, NS, NSMAX,
267 $ NSR, NVE, NW, NWMAX, NWR, LLDH, LLDZ, II, JJ,
268 $ ICTXT, NPROW, NPCOL, MYROW, MYCOL, IPV, IPT,
269 $ IPW, IPWRK, VROWS, VCOLS, TROWS, TCOLS, WROWS,
270 $ WCOLS, HRSRC, HCSRC, NB, IS, IE, NPROCS, KK,
271 $ IROFFH, ICOFFH, HRSRC3, HCSRC3, NWIN, TOTIT,
272 $ SWEEP, JW, TOTNS, LIWKOPT, NPMIN, ICTXT_NEW,
273 $ MYROW_NEW, MYCOL_NEW
274 LOGICAL NWINC, SORTED, LQUERY, RECURSION
275 CHARACTER JBCMPZ*2
276* ..
277* .. External Functions ..
278 INTEGER PILAENVX, NUMROC, INDXG2P, ICEIL, BLACS_PNUM
279 EXTERNAL pilaenvx, numroc, indxg2p, iceil, blacs_pnum
280* ..
281* .. Local Arrays ..
282 INTEGER DESCV( DLEN_ ), DESCT( DLEN_ ), DESCW( DLEN_ ),
283 $ PMAP( 64*64 )
284 DOUBLE PRECISION ZDUM( 1, 1 )
285* ..
286* .. External Subroutines ..
287 EXTERNAL pdlacpy, pdlaqr1, dlanv2, pdlaqr3, pdlaqr5,
288 $ pdelget, dlaqr0, dlaset, pdgemr2d
289* ..
290* .. Intrinsic Functions ..
291 INTRINSIC abs, dble, int, max, min, mod
292* ..
293* .. Executable Statements ..
294 info = 0
295 ictxt = desch( ctxt_ )
296 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
297 nprocs = nprow*npcol
298 recursion = reclevel .LT. recmax
299*
300* Quick return for N = 0: nothing to do.
301*
302 IF( n.EQ.0 ) THEN
303 work( 1 ) = one
304 iwork( 1 ) = 1
305 RETURN
306 END IF
307*
308* Set up job flags for PILAENV.
309*
310 IF( wantt ) THEN
311 jbcmpz( 1: 1 ) = 'S'
312 ELSE
313 jbcmpz( 1: 1 ) = 'E'
314 END IF
315 IF( wantz ) THEN
316 jbcmpz( 2: 2 ) = 'V'
317 ELSE
318 jbcmpz( 2: 2 ) = 'N'
319 END IF
320*
321* Check if workspace query
322*
323 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
324*
325* Extract local leading dimensions and block factors of matrices
326* H and Z
327*
328 lldh = desch( lld_ )
329 lldz = descz( lld_ )
330 nb = desch( mb_ )
331*
332* Tiny (sub-) matrices must use PDLAQR1. (Stops recursion)
333*
334 IF( n.LE.ntiny ) THEN
335*
336* Estimate optimal workspace.
337*
338 CALL pdlaqr1( wantt, wantz, n, ilo, ihi, h, desch, wr, wi,
339 $ iloz, ihiz, z, descz, work, lwork, iwork, liwork, info )
340 lwkopt = int( work(1) )
341 liwkopt = iwork(1)
342*
343* Completely local matrices uses LAPACK. (Stops recursion)
344*
345 ELSEIF( n.LE.nb ) THEN
346 IF( myrow.EQ.desch(rsrc_) .AND. mycol.EQ.desch(csrc_) ) THEN
347 CALL dlaqr0( wantt, wantz, n, ilo, ihi, h, desch(lld_),
348 $ wr, wi, iloz, ihiz, z, descz(lld_), work, lwork, info )
349 IF( n.GT.2 )
350 $ CALL dlaset( 'L', n-2, n-2, zero, zero, h(3),
351 $ desch(lld_) )
352 lwkopt = int( work(1) )
353 liwkopt = 1
354 ELSE
355 lwkopt = 1
356 liwkopt = 1
357 END IF
358*
359* Do one more step of recursion
360*
361 ELSE
362*
363* Zero out iteration and sweep counters for debugging purposes
364*
365 totit = 0
366 sweep = 0
367 totns = 0
368*
369* Use small bulge multi-shift QR with aggressive early
370* deflation on larger-than-tiny matrices.
371*
372* Hope for the best.
373*
374 info = 0
375*
376* NWR = recommended deflation window size. At this
377* point, N .GT. NTINY = 11, so there is enough
378* subdiagonal workspace for NWR.GE.2 as required.
379* (In fact, there is enough subdiagonal space for
380* NWR.GE.3.)
381*
382 nwr = pilaenvx( ictxt, 13, 'PDLAQR0', jbcmpz, n, ilo, ihi,
383 $ lwork )
384 nwr = max( 2, nwr )
385 nwr = min( ihi-ilo+1, nwr )
386 nw = nwr
387*
388* NSR = recommended number of simultaneous shifts.
389* At this point N .GT. NTINY = 11, so there is at
390* enough subdiagonal workspace for NSR to be even
391* and greater than or equal to two as required.
392*
393 nwin = pilaenvx( ictxt, 19, 'PDLAQR0', jbcmpz, n, nb, nb, nb )
394 nsr = pilaenvx( ictxt, 15, 'PDLAQR0', jbcmpz, n, ilo, ihi,
395 $ max(nwin,nb) )
396 nsr = min( nsr, ihi-ilo )
397 nsr = max( 2, nsr-mod( nsr, 2 ) )
398*
399* Estimate optimal workspace
400*
401 lwkopt = 3*iceil(nwr,nprow)*iceil(nwr,npcol)
402*
403* Workspace query call to PDLAQR3
404*
405 CALL pdlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h,
406 $ desch, iloz, ihiz, z, descz, ls, ld, wr, wi, h,
407 $ desch, n, h, desch, n, h, desch, work, -1, iwork,
408 $ liwork, reclevel )
409 lwkopt = lwkopt + int( work( 1 ) )
410 liwkopt = iwork( 1 )
411*
412* Workspace query call to PDLAQR5
413*
414 CALL pdlaqr5( wantt, wantz, 2, n, 1, n, n, wr, wi, h,
415 $ desch, iloz, ihiz, z, descz, work, -1, iwork,
416 $ liwork )
417*
418* Optimal workspace = MAX(PDLAQR3, PDLAQR5)
419*
420 lwkopt = max( lwkopt, int( work( 1 ) ) )
421 liwkopt = max( liwkopt, iwork( 1 ) )
422*
423* Quick return in case of workspace query.
424*
425 IF( lquery ) THEN
426 work( 1 ) = dble( lwkopt )
427 iwork( 1 ) = liwkopt
428 RETURN
429 END IF
430*
431* PDLAQR1/PDLAQR0 crossover point.
432*
433 nmin = pilaenvx( ictxt, 12, 'PDLAQR0', jbcmpz, n, ilo, ihi,
434 $ lwork )
435 nmin = max( ntiny, nmin )
436*
437* Nibble crossover point.
438*
439 nibble = pilaenvx( ictxt, 14, 'PDLAQR0', jbcmpz, n, ilo, ihi,
440 $ lwork )
441 nibble = max( 0, nibble )
442*
443* Accumulate reflections during ttswp? Use block
444* 2-by-2 structure during matrix-matrix multiply?
445*
446 kacc22 = pilaenvx( ictxt, 16, 'PDLAQR0', jbcmpz, n, ilo, ihi,
447 $ lwork )
448 kacc22 = max( 1, kacc22 )
449 kacc22 = min( 2, kacc22 )
450*
451* NWMAX = the largest possible deflation window for
452* which there is sufficient workspace.
453*
454 nwmax = min( ( n-1 ) / 3, lwork / 2 )
455*
456* NSMAX = the Largest number of simultaneous shifts
457* for which there is sufficient workspace.
458*
459 nsmax = min( ( n+6 ) / 9, lwork - lwork/3 )
460 nsmax = nsmax - mod( nsmax, 2 )
461*
462* NDFL: an iteration count restarted at deflation.
463*
464 ndfl = 1
465*
466* ITMAX = iteration limit
467*
468 itmax = max( 30, 2*kexsh )*max( 10, ( ihi-ilo+1 ) )
469*
470* Last row and column in the active block.
471*
472 kbot = ihi
473*
474* Main Loop.
475*
476 DO 110 it = 1, itmax
477 totit = totit + 1
478*
479* Done when KBOT falls below ILO.
480*
481 IF( kbot.LT.ilo )
482 $ GO TO 120
483*
484* Locate active block.
485*
486 DO 10 k = kbot, ilo + 1, -1
487 CALL infog2l( k, k-1, desch, nprow, npcol, myrow, mycol,
488 $ ii, jj, hrsrc, hcsrc )
489 IF( myrow.EQ.hrsrc .AND. mycol.EQ.hcsrc ) THEN
490 IF( h( ii + (jj-1)*lldh ).EQ.zero )
491 $ GO TO 20
492 END IF
493 10 CONTINUE
494 k = ilo
495 20 CONTINUE
496 ktop = k
497 IF( nprocs.GT.1 )
498 $ CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ktop, 1,
499 $ -1, -1, -1, -1, -1 )
500*
501* Select deflation window size.
502*
503 nh = kbot - ktop + 1
504 IF( nh.LE.ntiny ) THEN
505 nw = nh
506 ELSEIF( ndfl.LT.kexnw .OR. nh.LT.nw ) THEN
507*
508* Typical deflation window. If possible and
509* advisable, nibble the entire active block.
510* If not, use size NWR or NWR+1 depending upon
511* which has the smaller corresponding subdiagonal
512* entry (a heuristic).
513*
514 nwinc = .true.
515 IF( nh.LE.min( nmin, nwmax ) ) THEN
516 nw = nh
517 ELSE
518 nw = min( nwr, nh, nwmax )
519 IF( nw.LT.nwmax ) THEN
520 IF( nw.GE.nh-1 ) THEN
521 nw = nh
522 ELSE
523 kwtop = kbot - nw + 1
524 CALL pdelget( 'All', '1-Tree', elem1, h, kwtop,
525 $ kwtop-1, desch )
526 CALL pdelget( 'All', '1-Tree', elem2, h,
527 $ kwtop-1, kwtop-2, desch )
528 IF( abs( elem1 ).GT.abs( elem2 ) ) nw = nw + 1
529 END IF
530 END IF
531 END IF
532 ELSE
533*
534* Exceptional deflation window. If there have
535* been no deflations in KEXNW or more iterations,
536* then vary the deflation window size. At first,
537* because, larger windows are, in general, more
538* powerful than smaller ones, rapidly increase the
539* window up to the maximum reasonable and possible.
540* Then maybe try a slightly smaller window.
541*
542 IF( nwinc .AND. nw.LT.min( nwmax, nh ) ) THEN
543 nw = min( nwmax, nh, 2*nw )
544 ELSE
545 nwinc = .false.
546 IF( nw.EQ.nh .AND. nh.GT.2 )
547 $ nw = nh - 1
548 END IF
549 END IF
550*
551* Aggressive early deflation:
552* split workspace into
553* - an NW-by-NW work array V for orthogonal matrix
554* - an NW-by-at-least-NW-but-more-is-better
555* (NW-by-NHO) horizontal work array for Schur factor
556* - an at-least-NW-but-more-is-better (NVE-by-NW)
557* vertical work array for matrix multiplications
558* - align T, V and W with the deflation window
559*
560 kv = n - nw + 1
561 kt = nw + 1
562 nho = ( n-nw-1 ) - kt + 1
563 kwv = nw + 2
564 nve = ( n-nw ) - kwv + 1
565*
566 jw = min( nw, kbot-ktop+1 )
567 kwtop = kbot - jw + 1
568 iroffh = mod( kwtop - 1, nb )
569 icoffh = iroffh
570 hrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
571 hcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
572 vrows = numroc( jw+iroffh, nb, myrow, hrsrc, nprow )
573 vcols = numroc( jw+icoffh, nb, mycol, hcsrc, npcol )
574 CALL descinit( descv, jw+iroffh, jw+icoffh, nb, nb,
575 $ hrsrc, hcsrc, ictxt, max(1, vrows), info )
576*
577 trows = numroc( jw+iroffh, nb, myrow, hrsrc, nprow )
578 tcols = numroc( jw+icoffh, nb, mycol, hcsrc, npcol )
579 CALL descinit( desct, jw+iroffh, jw+icoffh, nb, nb,
580 $ hrsrc, hcsrc, ictxt, max(1, trows), info )
581 wrows = numroc( jw+iroffh, nb, myrow, hrsrc, nprow )
582 wcols = numroc( jw+icoffh, nb, mycol, hcsrc, npcol )
583 CALL descinit( descw, jw+iroffh, jw+icoffh, nb, nb,
584 $ hrsrc, hcsrc, ictxt, max(1, wrows), info )
585*
586 ipv = 1
587 ipt = ipv + descv( lld_ ) * vcols
588 ipw = ipt + desct( lld_ ) * tcols
589 ipwrk = ipw + descw( lld_ ) * wcols
590*
591* Aggressive early deflation
592*
593 iwork(1) = it
594 CALL pdlaqr3( wantt, wantz, n, ktop, kbot, nw, h,
595 $ desch, iloz, ihiz, z, descz, ls, ld, wr, wi,
596 $ work(ipv), descv, nho, work(ipt), desct, nve,
597 $ work(ipw), descw, work(ipwrk), lwork-ipwrk+1,
598 $ iwork, liwork, reclevel )
599*
600* Adjust KBOT accounting for new deflations.
601*
602 kbot = kbot - ld
603*
604* KS points to the shifts.
605*
606 ks = kbot - ls + 1
607*
608* Skip an expensive QR sweep if there is a (partly
609* heuristic) reason to expect that many eigenvalues
610* will deflate without it. Here, the QR sweep is
611* skipped if many eigenvalues have just been deflated
612* or if the remaining active block is small.
613*
614 IF( ( ld.EQ.0 ) .OR. ( ( 100*ld.LE.nw*nibble ) .AND. ( kbot-
615 $ ktop+1.GT.min( nmin, nwmax ) ) ) ) THEN
616*
617* NS = nominal number of simultaneous shifts.
618* This may be lowered (slightly) if PDLAQR3
619* did not provide that many shifts.
620*
621 ns = min( nsmax, nsr, max( 2, kbot-ktop ) )
622 ns = ns - mod( ns, 2 )
623*
624* If there have been no deflations
625* in a multiple of KEXSH iterations,
626* then try exceptional shifts.
627* Otherwise use shifts provided by
628* PDLAQR3 above or from the eigenvalues
629* of a trailing principal submatrix.
630*
631 IF( mod( ndfl, kexsh ).EQ.0 ) THEN
632 ks = kbot - ns + 1
633 DO 30 i = kbot, max( ks+1, ktop+2 ), -2
634 CALL pdelget( 'All', '1-Tree', elem1, h, i, i-1,
635 $ desch )
636 CALL pdelget( 'All', '1-Tree', elem2, h, i-1, i-2,
637 $ desch )
638 CALL pdelget( 'All', '1-Tree', elem3, h, i, i,
639 $ desch )
640 ss = abs( elem1 ) + abs( elem2 )
641 aa = wilk1*ss + elem3
642 bb = ss
643 cc = wilk2*ss
644 dd = aa
645 CALL dlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),
646 $ wr( i ), wi( i ), cs, sn )
647 30 CONTINUE
648 IF( ks.EQ.ktop ) THEN
649 CALL pdelget( 'All', '1-Tree', elem1, h, ks+1,
650 $ ks+1, desch )
651 wr( ks+1 ) = elem1
652 wi( ks+1 ) = zero
653 wr( ks ) = wr( ks+1 )
654 wi( ks ) = wi( ks+1 )
655 END IF
656 ELSE
657*
658* Got NS/2 or fewer shifts? Use PDLAQR0 or
659* PDLAQR1 on a trailing principal submatrix to
660* get more.
661*
662 IF( kbot-ks+1.LE.ns / 2 ) THEN
663 ks = kbot - ns + 1
664 kt = n - ns + 1
665 npmin = pilaenvx( ictxt, 23, 'PDLAQR0', 'EN', ns,
666 $ nb, nprow, npcol )
667c
668c Temporarily force NPMIN <= 8 since only PDLAQR1 is used.
669c
670 npmin = min(npmin, 8)
671 IF( min(nprow, npcol).LE.npmin+1 .OR.
672 $ reclevel.GE.1 ) THEN
673*
674* The window is large enough. Compute the Schur
675* decomposition with all processors.
676*
677 iroffh = mod( ks - 1, nb )
678 icoffh = iroffh
679 IF( ns.GT.nmin ) THEN
680 hrsrc = indxg2p( ks, nb, myrow, desch(rsrc_),
681 $ nprow )
682 hcsrc = indxg2p( ks, nb, myrow, desch(csrc_),
683 $ npcol )
684 ELSE
685 hrsrc = 0
686 hcsrc = 0
687 END IF
688 trows = numroc( ns+iroffh, nb, myrow, hrsrc,
689 $ nprow )
690 tcols = numroc( ns+icoffh, nb, mycol, hcsrc,
691 $ npcol )
692 CALL descinit( desct, ns+iroffh, ns+icoffh, nb,
693 $ nb, hrsrc, hcsrc, ictxt, max(1, trows),
694 $ info )
695 ipt = 1
696 ipwrk = ipt + desct(lld_) * tcols
697*
698 IF( ns.GT.nmin .AND. recursion ) THEN
699 CALL pdlacpy( 'All', ns, ns, h, ks, ks,
700 $ desch, work(ipt), 1+iroffh, 1+icoffh,
701 $ desct )
702 CALL pdlaqr0( .false., .false., iroffh+ns,
703 $ 1+iroffh, iroffh+ns, work(ipt),
704 $ desct, wr( ks-iroffh ),
705 $ wi( ks-iroffh ), 1, 1, zdum,
706 $ descz, work( ipwrk ),
707 $ lwork-ipwrk+1, iwork, liwork,
708 $ inf, reclevel+1 )
709 ELSE
710 CALL pdlamve( 'All', ns, ns, h, ks, ks,
711 $ desch, work(ipt), 1+iroffh, 1+icoffh,
712 $ desct, work(ipwrk) )
713 CALL pdlaqr1( .false., .false., iroffh+ns,
714 $ 1+iroffh, iroffh+ns, work(ipt),
715 $ desct, wr( ks-iroffh ),
716 $ wi( ks-iroffh ), 1+iroffh, iroffh+ns,
717 $ zdum, descz, work( ipwrk ),
718 $ lwork-ipwrk+1, iwork, liwork, inf )
719 END IF
720 ELSE
721*
722* The window is too small. Redistribute the AED
723* window to a subgrid and do the computation on
724* the subgrid.
725*
726 ictxt_new = ictxt
727 DO 50 i = 0, npmin-1
728 DO 40 j = 0, npmin-1
729 pmap( j+1+i*npmin ) =
730 $ blacs_pnum( ictxt, i, j )
731 40 CONTINUE
732 50 CONTINUE
733 CALL blacs_gridmap( ictxt_new, pmap, npmin,
734 $ npmin, npmin )
735 CALL blacs_gridinfo( ictxt_new, npmin, npmin,
736 $ myrow_new, mycol_new )
737 IF( myrow.GE.npmin .OR. mycol.GE.npmin )
738 $ ictxt_new = -1
739*
740 IF( ictxt_new.GE.0 ) THEN
741 trows = numroc( ns, nb, myrow_new, 0, npmin )
742 tcols = numroc( ns, nb, mycol_new, 0, npmin )
743 CALL descinit( desct, ns, ns, nb, nb, 0, 0,
744 $ ictxt_new, max(1,trows), info )
745 ipt = 1
746 ipwrk = ipt + desct(lld_) * tcols
747 ELSE
748 ipt = 1
749 ipwrk = 2
750 desct( ctxt_ ) = -1
751 inf = 0
752 END IF
753 CALL pdgemr2d( ns, ns, h, ks, ks, desch,
754 $ work(ipt), 1, 1, desct, ictxt )
755*
756c
757c This part is still not perfect.
758c Either PDLAQR0 or PDLAQR1 can work, but not both.
759c
760c NMIN = PILAENVX( ICTXT_NEW, 12, 'PDLAQR0',
761c $ 'EN', NS, 1, NS, LWORK )
762 IF( ictxt_new.GE.0 ) THEN
763c IF( NS.GT.NMIN .AND. RECLEVEL.LT.1 ) THEN
764c CALL PDLAQR0( .FALSE., .FALSE., NS, 1,
765c $ NS, WORK(IPT), DESCT, WR( KS ),
766c $ WI( KS ), 1, 1, ZDUM, DESCT,
767c $ WORK( IPWRK ), LWORK-IPWRK+1, IWORK,
768c $ LIWORK, INF, RECLEVEL+1 )
769c ELSE
770 CALL pdlaqr1( .false., .false., ns, 1,
771 $ ns, work(ipt), desct, wr( ks ),
772 $ wi( ks ), 1, ns, zdum, desct,
773 $ work( ipwrk ), lwork-ipwrk+1, iwork,
774 $ liwork, inf )
775c END IF
776 CALL blacs_gridexit( ictxt_new )
777 END IF
778 IF( myrow+mycol.GT.0 ) THEN
779 DO 60 j = 0, ns-1
780 wr( ks+j ) = zero
781 wi( ks+j ) = zero
782 60 CONTINUE
783 END IF
784 CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, inf,
785 $ 1, -1, -1, -1, -1, -1 )
786 CALL dgsum2d( ictxt, 'All', ' ', ns, 1, wr(ks),
787 $ ns, -1, -1 )
788 CALL dgsum2d( ictxt, 'All', ' ', ns, 1, wi(ks),
789 $ ns, -1, -1 )
790 END IF
791 ks = ks + inf
792*
793* In case of a rare QR failure use
794* eigenvalues of the trailing 2-by-2
795* principal submatrix.
796*
797 IF( ks.GE.kbot ) THEN
798 CALL pdelget( 'All', '1-Tree', aa, h, kbot-1,
799 $ kbot-1, desch )
800 CALL pdelget( 'All', '1-Tree', cc, h, kbot,
801 $ kbot-1, desch )
802 CALL pdelget( 'All', '1-Tree', bb, h, kbot-1,
803 $ kbot, desch )
804 CALL pdelget( 'All', '1-Tree', dd, h, kbot,
805 $ kbot, desch )
806 CALL dlanv2( aa, bb, cc, dd, wr( kbot-1 ),
807 $ wi( kbot-1 ), wr( kbot ),
808 $ wi( kbot ), cs, sn )
809 ks = kbot - 1
810 END IF
811 END IF
812*
813 IF( kbot-ks+1.GT.ns ) THEN
814*
815* Sort the shifts (helps a little)
816* Bubble sort keeps complex conjugate
817* pairs together.
818*
819 sorted = .false.
820 DO 80 k = kbot, ks + 1, -1
821 IF( sorted )
822 $ GO TO 90
823 sorted = .true.
824 DO 70 i = ks, k - 1
825 IF( abs( wr( i ) )+abs( wi( i ) ).LT.
826 $ abs( wr( i+1 ) )+abs( wi( i+1 ) ) ) THEN
827 sorted = .false.
828*
829 swap = wr( i )
830 wr( i ) = wr( i+1 )
831 wr( i+1 ) = swap
832*
833 swap = wi( i )
834 wi( i ) = wi( i+1 )
835 wi( i+1 ) = swap
836 END IF
837 70 CONTINUE
838 80 CONTINUE
839 90 CONTINUE
840 END IF
841*
842* Shuffle shifts into pairs of real shifts
843* and pairs of complex conjugate shifts
844* assuming complex conjugate shifts are
845* already adjacent to one another. (Yes,
846* they are.)
847*
848 DO 100 i = kbot, ks + 2, -2
849 IF( wi( i ).NE.-wi( i-1 ) ) THEN
850*
851 swap = wr( i )
852 wr( i ) = wr( i-1 )
853 wr( i-1 ) = wr( i-2 )
854 wr( i-2 ) = swap
855*
856 swap = wi( i )
857 wi( i ) = wi( i-1 )
858 wi( i-1 ) = wi( i-2 )
859 wi( i-2 ) = swap
860 END IF
861 100 CONTINUE
862 END IF
863*
864* If there are only two shifts and both are
865* real, then use only one.
866*
867 IF( kbot-ks+1.EQ.2 ) THEN
868 IF( wi( kbot ).EQ.zero ) THEN
869 CALL pdelget( 'All', '1-Tree', elem, h, kbot,
870 $ kbot, desch )
871 IF( abs( wr( kbot )-elem ).LT.
872 $ abs( wr( kbot-1 )-elem ) ) THEN
873 wr( kbot-1 ) = wr( kbot )
874 ELSE
875 wr( kbot ) = wr( kbot-1 )
876 END IF
877 END IF
878 END IF
879*
880* Use up to NS of the the smallest magnatiude
881* shifts. If there aren't NS shifts available,
882* then use them all, possibly dropping one to
883* make the number of shifts even.
884*
885 ns = min( ns, kbot-ks+1 )
886 ns = ns - mod( ns, 2 )
887 ks = kbot - ns + 1
888*
889* Small-bulge multi-shift QR sweep.
890*
891 totns = totns + ns
892 sweep = sweep + 1
893 CALL pdlaqr5( wantt, wantz, kacc22, n, ktop, kbot,
894 $ ns, wr( ks ), wi( ks ), h, desch, iloz, ihiz, z,
895 $ descz, work, lwork, iwork, liwork )
896 END IF
897*
898* Note progress (or the lack of it).
899*
900 IF( ld.GT.0 ) THEN
901 ndfl = 1
902 ELSE
903 ndfl = ndfl + 1
904 END IF
905*
906* End of main loop.
907 110 CONTINUE
908*
909* Iteration limit exceeded. Set INFO to show where
910* the problem occurred and exit.
911*
912 info = kbot
913 120 CONTINUE
914 END IF
915*
916* Return the optimal value of LWORK.
917*
918 work( 1 ) = dble( lwkopt )
919 iwork( 1 ) = liwkopt
920 IF( .NOT. lquery ) THEN
921 iwork( 1 ) = totit
922 iwork( 2 ) = sweep
923 iwork( 3 ) = totns
924 END IF
925 RETURN
926*
927* End of PDLAQR0
928*
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdelget(scope, top, alpha, a, ia, ja, desca)
Definition pdelget.f:2
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pdlacpy.f:3
subroutine pdlamve(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb, dwork)
Definition pdlamve.f:3
recursive subroutine pdlaqr0(wantt, wantz, n, ilo, ihi, h, desch, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, liwork, info, reclevel)
Definition pdlaqr0.f:4
recursive subroutine pdlaqr1(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pdlaqr1.f:5
recursive subroutine pdlaqr3(wantt, wantz, n, ktop, kbot, nw, h, desch, iloz, ihiz, z, descz, ns, nd, sr, si, v, descv, nh, t, desct, nv, wv, descw, work, lwork, iwork, liwork, reclevel)
Definition pdlaqr3.f:6
subroutine pdlaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, desch, iloz, ihiz, z, descz, work, lwork, iwork, liwork)
Definition pdlaqr5.f:4
integer function pilaenvx(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pilaenvx.f:3
Here is the call graph for this function:
Here is the caller graph for this function: