SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pdlaqr3()

recursive subroutine pdlaqr3 ( logical  wantt,
logical  wantz,
integer  n,
integer  ktop,
integer  kbot,
integer  nw,
double precision, dimension( * )  h,
integer, dimension( * )  desch,
integer  iloz,
integer  ihiz,
double precision, dimension( * )  z,
integer, dimension( * )  descz,
integer  ns,
integer  nd,
double precision, dimension( kbot )  sr,
double precision, dimension( kbot )  si,
double precision, dimension( * )  v,
integer, dimension( * )  descv,
integer  nh,
double precision, dimension( * )  t,
integer, dimension( * )  desct,
integer  nv,
double precision, dimension( * )  wv,
integer, dimension( * )  descw,
double precision, dimension( * )  work,
integer  lwork,
integer, dimension( * )  iwork,
integer  liwork,
integer  reclevel 
)

Definition at line 1 of file pdlaqr3.f.

6*
7* Contribution from the Department of Computing Science and HPC2N,
8* Umea University, Sweden
9*
10* -- ScaLAPACK auxiliary routine (version 2.0.1) --
11* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12* Univ. of Colorado Denver and University of California, Berkeley.
13* January, 2012
14*
15 IMPLICIT NONE
16*
17* .. Scalar Arguments ..
18 INTEGER IHIZ, ILOZ, KBOT, KTOP, LWORK, N, ND, NH, NS,
19 $ NV, NW, LIWORK, RECLEVEL
20 LOGICAL WANTT, WANTZ
21* ..
22* .. Array Arguments ..
23 INTEGER DESCH( * ), DESCZ( * ), DESCT( * ), DESCV( * ),
24 $ DESCW( * ), IWORK( * )
25 DOUBLE PRECISION H( * ), SI( KBOT ), SR( KBOT ), T( * ),
26 $ V( * ), WORK( * ), WV( * ),
27 $ Z( * )
28* ..
29*
30* Purpose
31* =======
32*
33* Aggressive early deflation:
34*
35* This subroutine accepts as input an upper Hessenberg matrix H and
36* performs an orthogonal similarity transformation designed to detect
37* and deflate fully converged eigenvalues from a trailing principal
38* submatrix. On output H has been overwritten by a new Hessenberg
39* matrix that is a perturbation of an orthogonal similarity
40* transformation of H. It is to be hoped that the final version of H
41* has many zero subdiagonal entries.
42*
43* Notes
44* =====
45*
46* Each global data object is described by an associated description
47* vector. This vector stores the information required to establish
48* the mapping between an object element and its corresponding process
49* and memory location.
50*
51* Let A be a generic term for any 2D block cyclicly distributed array.
52* Such a global array has an associated description vector DESCA.
53* In the following comments, the character _ should be read as
54* "of the global array".
55*
56* NOTATION STORED IN EXPLANATION
57* --------------- -------------- --------------------------------------
58* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59* DTYPE_A = 1.
60* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61* the BLACS process grid A is distribu-
62* ted over. The context itself is glo-
63* bal, but the handle (the integer
64* value) may vary.
65* M_A (global) DESCA( M_ ) The number of rows in the global
66* array A.
67* N_A (global) DESCA( N_ ) The number of columns in the global
68* array A.
69* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70* the rows of the array.
71* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72* the columns of the array.
73* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74* row of the array A is distributed.
75* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76* first column of the array A is
77* distributed.
78* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79* array. LLD_A >= MAX(1,LOCr(M_A)).
80*
81* Let K be the number of rows or columns of a distributed matrix,
82* and assume that its process grid has dimension p x q.
83* LOCr( K ) denotes the number of elements of K that a process
84* would receive if K were distributed over the p processes of its
85* process column.
86* Similarly, LOCc( K ) denotes the number of elements of K that a
87* process would receive if K were distributed over the q processes of
88* its process row.
89* The values of LOCr() and LOCc() may be determined via a call to the
90* ScaLAPACK tool function, NUMROC:
91* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93* An upper bound for these quantities may be computed by:
94* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97* Arguments
98* =========
99*
100* WANTT (global input) LOGICAL
101* If .TRUE., then the Hessenberg matrix H is fully updated
102* so that the quasi-triangular Schur factor may be
103* computed (in cooperation with the calling subroutine).
104* If .FALSE., then only enough of H is updated to preserve
105* the eigenvalues.
106*
107* WANTZ (global input) LOGICAL
108* If .TRUE., then the orthogonal matrix Z is updated so
109* so that the orthogonal Schur factor may be computed
110* (in cooperation with the calling subroutine).
111* If .FALSE., then Z is not referenced.
112*
113* N (global input) INTEGER
114* The order of the matrix H and (if WANTZ is .TRUE.) the
115* order of the orthogonal matrix Z.
116*
117* KTOP (global input) INTEGER
118* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119* KBOT and KTOP together determine an isolated block
120* along the diagonal of the Hessenberg matrix.
121*
122* KBOT (global input) INTEGER
123* It is assumed without a check that either
124* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
125* determine an isolated block along the diagonal of the
126* Hessenberg matrix.
127*
128* NW (global input) INTEGER
129* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
130*
131* H (local input/output) DOUBLE PRECISION array, dimension
132* (DESCH(LLD_),*)
133* On input the initial N-by-N section of H stores the
134* Hessenberg matrix undergoing aggressive early deflation.
135* On output H has been transformed by an orthogonal
136* similarity transformation, perturbed, and the returned
137* to Hessenberg form that (it is to be hoped) has some
138* zero subdiagonal entries.
139*
140* DESCH (global and local input) INTEGER array of dimension DLEN_.
141* The array descriptor for the distributed matrix H.
142*
143* ILOZ (global input) INTEGER
144* IHIZ (global input) INTEGER
145* Specify the rows of Z to which transformations must be
146* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147*
148* Z (input/output) DOUBLE PRECISION array, dimension
149* (DESCH(LLD_),*)
150* IF WANTZ is .TRUE., then on output, the orthogonal
151* similarity transformation mentioned above has been
152* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153* If WANTZ is .FALSE., then Z is unreferenced.
154*
155* DESCZ (global and local input) INTEGER array of dimension DLEN_.
156* The array descriptor for the distributed matrix Z.
157*
158* NS (global output) INTEGER
159* The number of unconverged (ie approximate) eigenvalues
160* returned in SR and SI that may be used as shifts by the
161* calling subroutine.
162*
163* ND (global output) INTEGER
164* The number of converged eigenvalues uncovered by this
165* subroutine.
166*
167* SR (global output) DOUBLE PRECISION array, dimension KBOT
168* SI (global output) DOUBLE PRECISION array, dimension KBOT
169* On output, the real and imaginary parts of approximate
170* eigenvalues that may be used for shifts are stored in
171* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173* The real and imaginary parts of converged eigenvalues
174* are stored in SR(KBOT-ND+1) through SR(KBOT) and
175* SI(KBOT-ND+1) through SI(KBOT), respectively.
176*
177* V (global workspace) DOUBLE PRECISION array, dimension
178* (DESCV(LLD_),*)
179* An NW-by-NW distributed work array.
180*
181* DESCV (global and local input) INTEGER array of dimension DLEN_.
182* The array descriptor for the distributed matrix V.
183*
184* NH (input) INTEGER scalar
185* The number of columns of T. NH.GE.NW.
186*
187* T (global workspace) DOUBLE PRECISION array, dimension
188* (DESCV(LLD_),*)
189*
190* DESCT (global and local input) INTEGER array of dimension DLEN_.
191* The array descriptor for the distributed matrix T.
192*
193* NV (global input) INTEGER
194* The number of rows of work array WV available for
195* workspace. NV.GE.NW.
196*
197* WV (global workspace) DOUBLE PRECISION array, dimension
198* (DESCW(LLD_),*)
199*
200* DESCW (global and local input) INTEGER array of dimension DLEN_.
201* The array descriptor for the distributed matrix WV.
202*
203* WORK (local workspace) DOUBLE PRECISION array, dimension LWORK.
204* On exit, WORK(1) is set to an estimate of the optimal value
205* of LWORK for the given values of N, NW, KTOP and KBOT.
206*
207* LWORK (local input) INTEGER
208* The dimension of the work array WORK. LWORK = 2*NW
209* suffices, but greater efficiency may result from larger
210* values of LWORK.
211*
212* If LWORK = -1, then a workspace query is assumed; PDLAQR3
213* only estimates the optimal workspace size for the given
214* values of N, NW, KTOP and KBOT. The estimate is returned
215* in WORK(1). No error message related to LWORK is issued
216* by XERBLA. Neither H nor Z are accessed.
217*
218* IWORK (local workspace) INTEGER array, dimension (LIWORK)
219*
220* LIWORK (local input) INTEGER
221* The length of the workspace array IWORK
222*
223* ================================================================
224* Based on contributions by
225* Robert Granat and Meiyue Shao,
226* Department of Computing Science and HPC2N,
227* Umea University, Sweden
228*
229* ================================================================
230* .. Parameters ..
231 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
232 $ LLD_, MB_, M_, NB_, N_, RSRC_
233 INTEGER RECMAX
234 LOGICAL SORTGRAD
235 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
236 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
237 $ rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3,
238 $ sortgrad = .false. )
239 DOUBLE PRECISION ZERO, ONE
240 parameter( zero = 0.0d0, one = 1.0d0 )
241* ..
242* .. Local Scalars ..
243 DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
244 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP,
245 $ ELEM, ELEM1, ELEM2, ELEM3, R1, ANORM, RNORM,
246 $ RESAED
247 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
248 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
249 $ LWKOPT, NMIN, LLDH, LLDZ, LLDT, LLDV, LLDWV,
250 $ ICTXT, NPROW, NMAX, NPCOL, MYROW, MYCOL, NB,
251 $ IROFFH, M, RCOLS, TAUROWS, RROWS, TAUCOLS,
252 $ ITAU, IR, IPW, NPROCS, MLOC, IROFFHH,
253 $ ICOFFHH, HHRSRC, HHCSRC, HHROWS, HHCOLS,
254 $ IROFFZZ, ICOFFZZ, ZZRSRC, ZZCSRC, ZZROWS,
255 $ ZZCOLS, IERR, TZROWS0, TZCOLS0, IERR0, IPT0,
256 $ IPZ0, IPW0, NB2, ROUND, LILST, KK, LILST0,
257 $ IWRK1, RSRC, CSRC, LWK4, LWK5, IWRK2, LWK6,
258 $ LWK7, LWK8, ILWKOPT, TZROWS, TZCOLS, NSEL,
259 $ NPMIN, ICTXT_NEW, MYROW_NEW, MYCOL_NEW
260 LOGICAL BULGE, SORTED, LQUERY
261* ..
262* .. Local Arrays ..
263 INTEGER PAR( 6 ), DESCR( DLEN_ ),
264 $ DESCTAU( DLEN_ ), DESCHH( DLEN_ ),
265 $ DESCZZ( DLEN_ ), DESCTZ0( DLEN_ ),
266 $ PMAP( 64*64 )
267 DOUBLE PRECISION DDUM( 1 )
268* ..
269* .. External Functions ..
270 DOUBLE PRECISION DLAMCH, PDLANGE
271 INTEGER PILAENVX, NUMROC, INDXG2P, ICEIL, BLACS_PNUM
272 EXTERNAL dlamch, pilaenvx, numroc, indxg2p, pdlange,
273 $ mpi_wtime, iceil, blacs_pnum
274* ..
275* .. External Subroutines ..
276 EXTERNAL pdcopy, pdgehrd, pdgemm, dlabad, pdlacpy,
277 $ pdlaqr1, dlanv2, pdlaqr0, pdlarf, pdlarfg,
279 $ pdlamve, blacs_gridinfo, blacs_gridmap,
280 $ blacs_gridexit, pdgemr2d
281* ..
282* .. Intrinsic Functions ..
283 INTRINSIC abs, dble, int, max, min, sqrt
284* ..
285* .. Executable Statements ..
286 ictxt = desch( ctxt_ )
287 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
288 nprocs = nprow*npcol
289*
290* Extract local leading dimensions, blockfactors, offset for
291* keeping the alignment requirements and size of deflation window.
292*
293 lldh = desch( lld_ )
294 lldz = descz( lld_ )
295 lldt = desct( lld_ )
296 lldv = descv( lld_ )
297 lldwv = descw( lld_ )
298 nb = desch( mb_ )
299 iroffh = mod( ktop - 1, nb )
300 jw = min( nw, kbot-ktop+1 )
301 nsel = nb+jw
302*
303* Extract environment variables for parallel eigenvalue reordering.
304*
305 par(1) = pilaenvx(ictxt, 17, 'PDLAQR3', 'SV', jw, nb, -1, -1)
306 par(2) = pilaenvx(ictxt, 18, 'PDLAQR3', 'SV', jw, nb, -1, -1)
307 par(3) = pilaenvx(ictxt, 19, 'PDLAQR3', 'SV', jw, nb, -1, -1)
308 par(4) = pilaenvx(ictxt, 20, 'PDLAQR3', 'SV', jw, nb, -1, -1)
309 par(5) = pilaenvx(ictxt, 21, 'PDLAQR3', 'SV', jw, nb, -1, -1)
310 par(6) = pilaenvx(ictxt, 22, 'PDLAQR3', 'SV', jw, nb, -1, -1)
311*
312* Check if workspace query.
313*
314 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
315*
316* Estimate optimal workspace.
317*
318 IF( jw.LE.2 ) THEN
319 lwkopt = 1
320 ELSE
321*
322* Workspace query calls to PDGEHRD and PDORMHR.
323*
324 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
325 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
326 $ npcol )
327 CALL pdgehrd( jw, 1, jw, t, 1, 1, desct, work, work, -1,
328 $ info )
329 lwk1 = int( work( 1 ) ) + taurows*taucols
330*
331* Workspace query call to PDORMHR.
332*
333 CALL pdormhr( 'Right', 'No', jw, jw, 1, jw, t, 1, 1, desct,
334 $ work, v, 1, 1, descv, work, -1, info )
335 lwk2 = int( work( 1 ) )
336*
337* Workspace query call to PDLAQR0.
338*
339 nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
340 nmax = ( n-1 ) / 3
341 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
342 $ .AND. reclevel.LT.recmax ) THEN
343 CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
344 $ jw+iroffh, t, desct, sr, si, 1, jw, v, descv,
345 $ work, -1, iwork, liwork-nsel, infqr,
346 $ reclevel+1 )
347 lwk3 = int( work( 1 ) )
348 iwrk1 = iwork( 1 )
349 ELSE
350 rsrc = desct( rsrc_ )
351 csrc = desct( csrc_ )
352 desct( rsrc_ ) = 0
353 desct( csrc_ ) = 0
354 CALL pdlaqr1( .true., .true., jw+iroffh, 1, jw+iroffh, t,
355 $ desct, sr, si, 1, jw+iroffh, v, descv, work, -1,
356 $ iwork, liwork-nsel, infqr )
357 desct( rsrc_ ) = rsrc
358 desct( csrc_ ) = csrc
359 lwk3 = int( work( 1 ) )
360 iwrk1 = iwork( 1 )
361 END IF
362*
363* Workspace in case of alignment problems.
364*
365 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
366 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
367 lwk4 = 2 * tzrows0*tzcols0
368*
369* Workspace check for reordering.
370*
371 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
372 $ desct, v, 1, 1, descv, ddum, ddum, mloc, work, -1,
373 $ iwork, liwork-nsel, info )
374 lwk5 = int( work( 1 ) )
375 iwrk2 = iwork( 1 )
376*
377* Extra workspace for reflecting back spike
378* (workspace for PDLARF approximated for simplicity).
379*
380 rrows = numroc( n+iroffh, nb, myrow, descv(rsrc_), nprow )
381 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
382 lwk6 = rrows*rcols + taurows*taucols +
383 $ 2*iceil(iceil(jw+iroffh,nb),nprow)*nb
384 $ *iceil(iceil(jw+iroffh,nb),npcol)*nb
385*
386* Extra workspace needed by PBLAS update calls
387* (also estimated for simplicity).
388*
389 lwk7 = max( iceil(iceil(jw,nb),nprow)*nb *
390 $ iceil(iceil(n-kbot,nb),npcol)*nb,
391 $ iceil(iceil(ihiz-iloz+1,nb),nprow)*nb *
392 $ iceil(iceil(jw,nb),npcol)*nb,
393 $ iceil(iceil(kbot-jw,nb),nprow)*nb *
394 $ iceil(iceil(jw,nb),npcol)*nb )
395*
396* Residual check workspace.
397*
398 lwk8 = 0
399*
400* Optimal workspace.
401*
402 lwkopt = max( lwk1, lwk2, lwk3+lwk4, lwk5, lwk6, lwk7, lwk8 )
403 ilwkopt = max( iwrk1, iwrk2 )
404 END IF
405*
406* Quick return in case of workspace query.
407*
408 work( 1 ) = dble( lwkopt )
409*
410* IWORK(1:NSEL) is used as the array SELECT for PDTRORD.
411*
412 iwork( 1 ) = ilwkopt + nsel
413 IF( lquery )
414 $ RETURN
415*
416* Nothing to do for an empty active block ...
417 ns = 0
418 nd = 0
419 IF( ktop.GT.kbot )
420 $ RETURN
421* ... nor for an empty deflation window.
422*
423 IF( nw.LT.1 )
424 $ RETURN
425*
426* Machine constants.
427*
428 safmin = dlamch( 'SAFE MINIMUM' )
429 safmax = one / safmin
430 CALL dlabad( safmin, safmax )
431 ulp = dlamch( 'PRECISION' )
432 smlnum = safmin*( dble( n ) / ulp )
433*
434* Setup deflation window.
435*
436 jw = min( nw, kbot-ktop+1 )
437 kwtop = kbot - jw + 1
438 IF( kwtop.EQ.ktop ) THEN
439 s = zero
440 ELSE
441 CALL pdelget( 'All', '1-Tree', s, h, kwtop, kwtop-1, desch )
442 END IF
443*
444 IF( kbot.EQ.kwtop ) THEN
445*
446* 1-by-1 deflation window: not much to do.
447*
448 CALL pdelget( 'All', '1-Tree', sr( kwtop ), h, kwtop, kwtop,
449 $ desch )
450 si( kwtop ) = zero
451 ns = 1
452 nd = 0
453 IF( abs( s ).LE.max( smlnum, ulp*abs( sr( kwtop ) ) ) )
454 $ THEN
455 ns = 0
456 nd = 1
457 IF( kwtop.GT.ktop )
458 $ CALL pdelset( h, kwtop, kwtop-1 , desch, zero )
459 END IF
460 RETURN
461 END IF
462*
463 IF( kwtop.EQ.ktop .AND. kbot-kwtop.EQ.1 ) THEN
464*
465* 2-by-2 deflation window: a little more to do.
466*
467 CALL pdelget( 'All', '1-Tree', aa, h, kwtop, kwtop, desch )
468 CALL pdelget( 'All', '1-Tree', bb, h, kwtop, kwtop+1, desch )
469 CALL pdelget( 'All', '1-Tree', cc, h, kwtop+1, kwtop, desch )
470 CALL pdelget( 'All', '1-Tree', dd, h, kwtop+1, kwtop+1, desch )
471 CALL dlanv2( aa, bb, cc, dd, sr(kwtop), si(kwtop),
472 $ sr(kwtop+1), si(kwtop+1), cs, sn )
473 ns = 0
474 nd = 2
475 IF( cc.EQ.zero ) THEN
476 i = kwtop
477 IF( i+2.LE.n .AND. wantt )
478 $ CALL pdrot( n-i-1, h, i, i+2, desch, desch(m_), h, i+1,
479 $ i+2, desch, desch(m_), cs, sn, work, lwork, info )
480 IF( i.GT.1 )
481 $ CALL pdrot( i-1, h, 1, i, desch, 1, h, 1, i+1, desch, 1,
482 $ cs, sn, work, lwork, info )
483 IF( wantz )
484 $ CALL pdrot( ihiz-iloz+1, z, iloz, i, descz, 1, z, iloz,
485 $ i+1, descz, 1, cs, sn, work, lwork, info )
486 CALL pdelset( h, i, i, desch, aa )
487 CALL pdelset( h, i, i+1, desch, bb )
488 CALL pdelset( h, i+1, i, desch, cc )
489 CALL pdelset( h, i+1, i+1, desch, dd )
490 END IF
491 work( 1 ) = dble( lwkopt )
492 RETURN
493 END IF
494*
495* Calculate new value for IROFFH in case deflation window
496* was adjusted.
497*
498 iroffh = mod( kwtop - 1, nb )
499*
500* Adjust number of rows and columns of T matrix descriptor
501* to prepare for call to PDBTRORD.
502*
503 desct( m_ ) = jw+iroffh
504 desct( n_ ) = jw+iroffh
505*
506* Convert to spike-triangular form. (In case of a rare QR failure,
507* this routine continues to do aggressive early deflation using that
508* part of the deflation window that converged using INFQR here and
509* there to keep track.)
510*
511* Copy the trailing submatrix to the working space.
512*
513 CALL pdlaset( 'All', iroffh, jw+iroffh, zero, one, t, 1, 1,
514 $ desct )
515 CALL pdlaset( 'All', jw, iroffh, zero, zero, t, 1+iroffh, 1,
516 $ desct )
517 CALL pdlacpy( 'All', 1, jw, h, kwtop, kwtop, desch, t, 1+iroffh,
518 $ 1+iroffh, desct )
519 CALL pdlacpy( 'Upper', jw-1, jw-1, h, kwtop+1, kwtop, desch, t,
520 $ 1+iroffh+1, 1+iroffh, desct )
521 IF( jw.GT.2 )
522 $ CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 1+iroffh+2,
523 $ 1+iroffh, desct )
524 CALL pdlacpy( 'All', jw-1, 1, h, kwtop+1, kwtop+jw-1, desch, t,
525 $ 1+iroffh+1, 1+iroffh+jw-1, desct )
526*
527* Initialize the working orthogonal matrix.
528*
529 CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one, v, 1, 1,
530 $ descv )
531*
532* Compute the Schur form of T.
533*
534 npmin = pilaenvx( ictxt, 23, 'PDLAQR3', 'SV', jw, nb, nprow,
535 $ npcol )
536 nmin = pilaenvx( ictxt, 12, 'PDLAQR3', 'SV', jw, 1, jw, lwork )
537 nmax = ( n-1 ) / 3
538 IF( min(nprow, npcol).LE.npmin+1 .OR. reclevel.GE.1 ) THEN
539*
540* The AED window is large enough.
541* Compute the Schur decomposition with all processors.
542*
543 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
544 $ .AND. reclevel.LT.recmax ) THEN
545 CALL pdlaqr0( .true., .true., jw+iroffh, 1+iroffh,
546 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
547 $ si( kwtop-iroffh ), 1+iroffh, jw+iroffh, v, descv,
548 $ work, lwork, iwork(nsel+1), liwork-nsel, infqr,
549 $ reclevel+1 )
550 ELSE
551 IF( desct(rsrc_).EQ.0 .AND. desct(csrc_).EQ.0 ) THEN
552 IF( jw+iroffh.GT.desct( mb_ ) ) THEN
553 CALL pdlaqr1( .true., .true., jw+iroffh, 1,
554 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
555 $ si( kwtop-iroffh ), 1, jw+iroffh, v,
556 $ descv, work, lwork, iwork(nsel+1), liwork-nsel,
557 $ infqr )
558 ELSE
559 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
560 CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
561 $ jw+iroffh, t, desct(lld_),
562 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
563 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
564 ELSE
565 infqr = 0
566 END IF
567 IF( nprocs.GT.1 )
568 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
569 $ 1, -1, -1, -1, -1, -1 )
570 END IF
571 ELSEIF( jw+iroffh.LE.desct( mb_ ) ) THEN
572 IF( myrow.EQ.desct(rsrc_) .AND. mycol.EQ.desct(csrc_) )
573 $ THEN
574 CALL dlahqr( .true., .true., jw+iroffh, 1+iroffh,
575 $ jw+iroffh, t, desct(lld_),
576 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
577 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
578 ELSE
579 infqr = 0
580 END IF
581 IF( nprocs.GT.1 )
582 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
583 $ 1, -1, -1, -1, -1, -1 )
584 ELSE
585 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
586 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
587 CALL descinit( desctz0, jw+iroffh, jw+iroffh, nb, nb, 0,
588 $ 0, ictxt, max(1,tzrows0), ierr0 )
589 ipt0 = 1
590 ipz0 = ipt0 + max(1,tzrows0)*tzcols0
591 ipw0 = ipz0 + max(1,tzrows0)*tzcols0
592 CALL pdlamve( 'All', jw+iroffh, jw+iroffh, t, 1, 1,
593 $ desct, work(ipt0), 1, 1, desctz0, work(ipw0) )
594 CALL pdlaset( 'All', jw+iroffh, jw+iroffh, zero, one,
595 $ work(ipz0), 1, 1, desctz0 )
596 CALL pdlaqr1( .true., .true., jw+iroffh, 1,
597 $ jw+iroffh, work(ipt0), desctz0,
598 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
599 $ 1, jw+iroffh, work(ipz0),
600 $ desctz0, work(ipw0), lwork-ipw0+1, iwork(nsel+1),
601 $ liwork-nsel, infqr )
602 CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipt0), 1,
603 $ 1, desctz0, t, 1, 1, desct, work(ipw0) )
604 CALL pdlamve( 'All', jw+iroffh, jw+iroffh, work(ipz0), 1,
605 $ 1, desctz0, v, 1, 1, descv, work(ipw0) )
606 END IF
607 END IF
608 ELSE
609*
610* The AED window is too small.
611* Redistribute the AED window to a subgrid
612* and do the computation on the subgrid.
613*
614 ictxt_new = ictxt
615 DO 20 i = 0, npmin-1
616 DO 10 j = 0, npmin-1
617 pmap( j+1+i*npmin ) = blacs_pnum( ictxt, i, j )
618 10 CONTINUE
619 20 CONTINUE
620 CALL blacs_gridmap( ictxt_new, pmap, npmin, npmin, npmin )
621 CALL blacs_gridinfo( ictxt_new, npmin, npmin, myrow_new,
622 $ mycol_new )
623 IF( myrow.GE.npmin .OR. mycol.GE.npmin ) ictxt_new = -1
624 IF( ictxt_new.GE.0 ) THEN
625 tzrows0 = numroc( jw, nb, myrow_new, 0, npmin )
626 tzcols0 = numroc( jw, nb, mycol_new, 0, npmin )
627 CALL descinit( desctz0, jw, jw, nb, nb, 0,
628 $ 0, ictxt_new, max(1,tzrows0), ierr0 )
629 ipt0 = 1
630 ipz0 = ipt0 + max(1,tzrows0)*max(1,tzcols0)
631 ipw0 = ipz0 + max(1,tzrows0)*max(1,tzcols0)
632 ELSE
633 ipt0 = 1
634 ipz0 = 2
635 ipw0 = 3
636 desctz0( ctxt_ ) = -1
637 infqr = 0
638 END IF
639 CALL pdgemr2d( jw, jw, t, 1+iroffh, 1+iroffh, desct,
640 $ work(ipt0), 1, 1, desctz0, ictxt )
641 IF( ictxt_new.GE.0 ) THEN
642 CALL pdlaset( 'All', jw, jw, zero, one, work(ipz0), 1, 1,
643 $ desctz0 )
644 nmin = pilaenvx( ictxt_new, 12, 'PDLAQR3', 'SV', jw, 1, jw,
645 $ lwork )
646 IF( jw.GT.nmin .AND. jw.LE.nmax .AND. reclevel.LT.1 ) THEN
647 CALL pdlaqr0( .true., .true., jw, 1, jw, work(ipt0),
648 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
649 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
650 $ iwork(nsel+1), liwork-nsel, infqr,
651 $ reclevel+1 )
652 ELSE
653 CALL pdlaqr1( .true., .true., jw, 1, jw, work(ipt0),
654 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
655 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
656 $ iwork(nsel+1), liwork-nsel, infqr )
657 END IF
658 END IF
659 CALL pdgemr2d( jw, jw, work(ipt0), 1, 1, desctz0, t, 1+iroffh,
660 $ 1+iroffh, desct, ictxt )
661 CALL pdgemr2d( jw, jw, work(ipz0), 1, 1, desctz0, v, 1+iroffh,
662 $ 1+iroffh, descv, ictxt )
663 IF( ictxt_new.GE.0 )
664 $ CALL blacs_gridexit( ictxt_new )
665 IF( myrow+mycol.GT.0 ) THEN
666 DO 40 j = 0, jw-1
667 sr( kwtop+j ) = zero
668 si( kwtop+j ) = zero
669 40 CONTINUE
670 END IF
671 CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr, 1, -1, -1,
672 $ -1, -1, -1 )
673 CALL dgsum2d( ictxt, 'All', ' ', jw, 1, sr(kwtop), jw, -1, -1 )
674 CALL dgsum2d( ictxt, 'All', ' ', jw, 1, si(kwtop), jw, -1, -1 )
675 END IF
676*
677* Adjust INFQR for offset from block border in submatrices.
678*
679 IF( infqr.NE.0 )
680 $ infqr = infqr - iroffh
681*
682* PDTRORD needs a clean margin near the diagonal.
683*
684 DO 50 j = 1, jw - 3
685 CALL pdelset( t, j+2, j, desct, zero )
686 CALL pdelset( t, j+3, j, desct, zero )
687 50 CONTINUE
688 IF( jw.GT.2 )
689 $ CALL pdelset( t, jw, jw-2, desct, zero )
690*
691* Check local residual for AED Schur decomposition.
692*
693 resaed = 0.0d+00
694*
695* Clean up the array SELECT for PDTRORD.
696*
697 DO 60 j = 1, nsel
698 iwork( j ) = 0
699 60 CONTINUE
700*
701* Set local M counter to zero.
702*
703 mloc = 0
704*
705* Outer deflation detection loop (label 80).
706* In this loop a bunch of undeflatable eigenvalues
707* are moved simultaneously.
708*
709 DO 70 j = 1, iroffh + infqr
710 iwork( j ) = 1
711 70 CONTINUE
712*
713 ns = jw
714 ilst = infqr + 1 + iroffh
715 IF( ilst.GT.1 ) THEN
716 CALL pdelget( 'All', '1-Tree', elem, t, ilst, ilst-1, desct )
717 bulge = elem.NE.zero
718 IF( bulge ) ilst = ilst+1
719 END IF
720*
721 80 CONTINUE
722 IF( ilst.LE.ns+iroffh ) THEN
723*
724* Find the top-left corner of the local window.
725*
726 lilst = max(ilst,ns+iroffh-nb+1)
727 IF( lilst.GT.1 ) THEN
728 CALL pdelget( 'All', '1-Tree', elem, t, lilst, lilst-1,
729 $ desct )
730 bulge = elem.NE.zero
731 IF( bulge ) lilst = lilst+1
732 END IF
733*
734* Lock all eigenvalues outside the local window.
735*
736 DO 90 j = iroffh+1, lilst-1
737 iwork( j ) = 1
738 90 CONTINUE
739 lilst0 = lilst
740*
741* Inner deflation detection loop (label 100).
742* In this loop, the undeflatable eigenvalues are moved to the
743* top-left corner of the local window.
744*
745 100 CONTINUE
746 IF( lilst.LE.ns+iroffh ) THEN
747 IF( ns.EQ.1 ) THEN
748 bulge = .false.
749 ELSE
750 CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
751 $ ns+iroffh-1, desct )
752 bulge = elem.NE.zero
753 END IF
754*
755* Small spike tip test for deflation.
756*
757 IF( .NOT.bulge ) THEN
758*
759* Real eigenvalue.
760*
761 CALL pdelget( 'All', '1-Tree', elem, t, ns+iroffh,
762 $ ns+iroffh, desct )
763 foo = abs( elem )
764 IF( foo.EQ.zero )
765 $ foo = abs( s )
766 CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
767 $ ns+iroffh, descv )
768 IF( abs( s*elem ).LE.max( smlnum, ulp*foo ) ) THEN
769*
770* Deflatable.
771*
772 ns = ns - 1
773 ELSE
774*
775* Undeflatable: move it up out of the way.
776*
777 ifst = ns
778 DO 110 j = lilst, jw+iroffh
779 iwork( j ) = 0
780 110 CONTINUE
781 iwork( ifst+iroffh ) = 1
782 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
783 $ 1, desct, v, 1, 1, descv, work,
784 $ work(jw+iroffh+1), mloc,
785 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
786 $ iwork(nsel+1), liwork-nsel, info )
787*
788* Adjust the array SELECT explicitly so that it does not
789* rely on the output of PDTRORD.
790*
791 iwork( ifst+iroffh ) = 0
792 iwork( lilst ) = 1
793 lilst = lilst + 1
794*
795* In case of a rare exchange failure, adjust the
796* pointers ILST and LILST to the current place to avoid
797* unexpected behaviors.
798*
799 IF( info.NE.0 ) THEN
800 lilst = max(info, lilst)
801 ilst = max(info, ilst)
802 END IF
803 END IF
804 ELSE
805*
806* Complex conjugate pair.
807*
808 CALL pdelget( 'All', '1-Tree', elem1, t, ns+iroffh,
809 $ ns+iroffh, desct )
810 CALL pdelget( 'All', '1-Tree', elem2, t, ns+iroffh,
811 $ ns+iroffh-1, desct )
812 CALL pdelget( 'All', '1-Tree', elem3, t, ns+iroffh-1,
813 $ ns+iroffh, desct )
814 foo = abs( elem1 ) + sqrt( abs( elem2 ) )*
815 $ sqrt( abs( elem3 ) )
816 IF( foo.EQ.zero )
817 $ foo = abs( s )
818 CALL pdelget( 'All', '1-Tree', elem1, v, 1+iroffh,
819 $ ns+iroffh, descv )
820 CALL pdelget( 'All', '1-Tree', elem2, v, 1+iroffh,
821 $ ns+iroffh-1, descv )
822 IF( max( abs( s*elem1 ), abs( s*elem2 ) ).LE.
823 $ max( smlnum, ulp*foo ) ) THEN
824*
825* Deflatable.
826*
827 ns = ns - 2
828 ELSE
829*
830* Undeflatable: move them up out of the way.
831*
832 ifst = ns
833 DO 120 j = lilst, jw+iroffh
834 iwork( j ) = 0
835 120 CONTINUE
836 iwork( ifst+iroffh ) = 1
837 iwork( ifst+iroffh-1 ) = 1
838 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
839 $ 1, desct, v, 1, 1, descv, work,
840 $ work(jw+iroffh+1), mloc,
841 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
842 $ iwork(nsel+1), liwork-nsel, info )
843*
844* Adjust the array SELECT explicitly so that it does not
845* rely on the output of PDTRORD.
846*
847 iwork( ifst+iroffh ) = 0
848 iwork( ifst+iroffh-1 ) = 0
849 iwork( lilst ) = 1
850 iwork( lilst+1 ) = 1
851 lilst = lilst + 2
852*
853* In case of a rare exchange failure, adjust the
854* pointers ILST and LILST to the current place to avoid
855* unexpected behaviors.
856*
857 IF( info.NE.0 ) THEN
858 lilst = max(info, lilst)
859 ilst = max(info, ilst)
860 END IF
861 END IF
862 END IF
863*
864* End of inner deflation detection loop.
865*
866 GO TO 100
867 END IF
868*
869* Unlock the eigenvalues outside the local window.
870* Then undeflatable eigenvalues are moved to the proper position.
871*
872 DO 130 j = ilst, lilst0-1
873 iwork( j ) = 0
874 130 CONTINUE
875 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
876 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1),
877 $ m, work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
878 $ iwork(nsel+1), liwork-nsel, info )
879 ilst = m + 1
880*
881* In case of a rare exchange failure, adjust the pointer ILST to
882* the current place to avoid unexpected behaviors.
883*
884 IF( info.NE.0 )
885 $ ilst = max(info, ilst)
886*
887* End of outer deflation detection loop.
888*
889 GO TO 80
890 END IF
891
892*
893* Post-reordering step: copy output eigenvalues to output.
894*
895 CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
896 CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
897*
898* Check local residual for reordered AED Schur decomposition.
899*
900 resaed = 0.0d+00
901*
902* Return to Hessenberg form.
903*
904 IF( ns.EQ.0 )
905 $ s = zero
906*
907 IF( ns.LT.jw .AND. sortgrad ) THEN
908*
909* Sorting diagonal blocks of T improves accuracy for
910* graded matrices. Bubble sort deals well with exchange
911* failures. Eigenvalues/shifts from T are also restored.
912*
913 round = 0
914 sorted = .false.
915 i = ns + 1 + iroffh
916 140 CONTINUE
917 IF( sorted )
918 $ GO TO 180
919 sorted = .true.
920 round = round + 1
921*
922 kend = i - 1
923 i = infqr + 1 + iroffh
924 IF( i.EQ.ns+iroffh ) THEN
925 k = i + 1
926 ELSE IF( si( kwtop-iroffh + i-1 ).EQ.zero ) THEN
927 k = i + 1
928 ELSE
929 k = i + 2
930 END IF
931 150 CONTINUE
932 IF( k.LE.kend ) THEN
933 IF( k.EQ.i+1 ) THEN
934 evi = abs( sr( kwtop-iroffh+i-1 ) )
935 ELSE
936 evi = abs( sr( kwtop-iroffh+i-1 ) ) +
937 $ abs( si( kwtop-iroffh+i-1 ) )
938 END IF
939*
940 IF( k.EQ.kend ) THEN
941 evk = abs( sr( kwtop-iroffh+k-1 ) )
942 ELSEIF( si( kwtop-iroffh+k-1 ).EQ.zero ) THEN
943 evk = abs( sr( kwtop-iroffh+k-1 ) )
944 ELSE
945 evk = abs( sr( kwtop-iroffh+k-1 ) ) +
946 $ abs( si( kwtop-iroffh+k-1 ) )
947 END IF
948*
949 IF( evi.GE.evk ) THEN
950 i = k
951 ELSE
952 mloc = 0
953 sorted = .false.
954 ifst = i
955 ilst = k
956 DO 160 j = 1, i-1
957 iwork( j ) = 1
958 mloc = mloc + 1
959 160 CONTINUE
960 IF( k.EQ.i+2 ) THEN
961 iwork( i ) = 0
962 iwork(i+1) = 0
963 ELSE
964 iwork( i ) = 0
965 END IF
966 IF( k.NE.kend .AND. si( kwtop-iroffh+k-1 ).NE.zero ) THEN
967 iwork( k ) = 1
968 iwork(k+1) = 1
969 mloc = mloc + 2
970 ELSE
971 iwork( k ) = 1
972 IF( k.LT.kend ) iwork(k+1) = 0
973 mloc = mloc + 1
974 END IF
975 DO 170 j = k+2, jw+iroffh
976 iwork( j ) = 0
977 170 CONTINUE
978 CALL pdtrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
979 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1), m,
980 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
981 $ iwork(nsel+1), liwork-nsel, ierr )
982 CALL dcopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
983 CALL dcopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
984 IF( ierr.EQ.0 ) THEN
985 i = ilst
986 ELSE
987 i = k
988 END IF
989 END IF
990 IF( i.EQ.kend ) THEN
991 k = i + 1
992 ELSE IF( si( kwtop-iroffh+i-1 ).EQ.zero ) THEN
993 k = i + 1
994 ELSE
995 k = i + 2
996 END IF
997 GO TO 150
998 END IF
999 GO TO 140
1000 180 CONTINUE
1001 END IF
1002*
1003* Restore number of rows and columns of T matrix descriptor.
1004*
1005 desct( m_ ) = nw+iroffh
1006 desct( n_ ) = nh+iroffh
1007*
1008 IF( ns.LT.jw .OR. s.EQ.zero ) THEN
1009 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1010*
1011* Reflect spike back into lower triangle.
1012*
1013 rrows = numroc( ns+iroffh, nb, myrow, descv(rsrc_), nprow )
1014 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
1015 CALL descinit( descr, ns+iroffh, 1, nb, 1, descv(rsrc_),
1016 $ descv(csrc_), ictxt, max(1, rrows), info )
1017 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
1018 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
1019 $ npcol )
1020 CALL descinit( desctau, 1, jw+iroffh, 1, nb, descv(rsrc_),
1021 $ descv(csrc_), ictxt, max(1, taurows), info )
1022*
1023 ir = 1
1024 itau = ir + descr( lld_ ) * rcols
1025 ipw = itau + desctau( lld_ ) * taucols
1026*
1027 CALL pdlaset( 'All', ns+iroffh, 1, zero, zero, work(itau),
1028 $ 1, 1, desctau )
1029*
1030 CALL pdcopy( ns, v, 1+iroffh, 1+iroffh, descv, descv(m_),
1031 $ work(ir), 1+iroffh, 1, descr, 1 )
1032 CALL pdlarfg( ns, beta, 1+iroffh, 1, work(ir), 2+iroffh, 1,
1033 $ descr, 1, work(itau+iroffh) )
1034 CALL pdelset( work(ir), 1+iroffh, 1, descr, one )
1035*
1036 CALL pdlaset( 'Lower', jw-2, jw-2, zero, zero, t, 3+iroffh,
1037 $ 1+iroffh, desct )
1038*
1039 CALL pdlarf( 'Left', ns, jw, work(ir), 1+iroffh, 1, descr,
1040 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1041 $ desct, work( ipw ) )
1042 CALL pdlarf( 'Right', ns, ns, work(ir), 1+iroffh, 1, descr,
1043 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1044 $ desct, work( ipw ) )
1045 CALL pdlarf( 'Right', jw, ns, work(ir), 1+iroffh, 1, descr,
1046 $ 1, work(itau+iroffh), v, 1+iroffh, 1+iroffh,
1047 $ descv, work( ipw ) )
1048*
1049 itau = 1
1050 ipw = itau + desctau( lld_ ) * taucols
1051 CALL pdgehrd( jw+iroffh, 1+iroffh, ns+iroffh, t, 1, 1,
1052 $ desct, work(itau), work( ipw ), lwork-ipw+1, info )
1053 END IF
1054*
1055* Copy updated reduced window into place.
1056*
1057 IF( kwtop.GT.1 ) THEN
1058 CALL pdelget( 'All', '1-Tree', elem, v, 1+iroffh,
1059 $ 1+iroffh, descv )
1060 CALL pdelset( h, kwtop, kwtop-1, desch, s*elem )
1061 END IF
1062 CALL pdlacpy( 'Upper', jw-1, jw-1, t, 1+iroffh+1, 1+iroffh,
1063 $ desct, h, kwtop+1, kwtop, desch )
1064 CALL pdlacpy( 'All', 1, jw, t, 1+iroffh, 1+iroffh, desct, h,
1065 $ kwtop, kwtop, desch )
1066 CALL pdlacpy( 'All', jw-1, 1, t, 1+iroffh+1, 1+iroffh+jw-1,
1067 $ desct, h, kwtop+1, kwtop+jw-1, desch )
1068*
1069* Accumulate orthogonal matrix in order to update
1070* H and Z, if requested.
1071*
1072 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1073 CALL pdormhr( 'Right', 'No', jw+iroffh, ns+iroffh, 1+iroffh,
1074 $ ns+iroffh, t, 1, 1, desct, work(itau), v, 1,
1075 $ 1, descv, work( ipw ), lwork-ipw+1, info )
1076 END IF
1077*
1078* Update vertical slab in H.
1079*
1080 IF( wantt ) THEN
1081 ltop = 1
1082 ELSE
1083 ltop = ktop
1084 END IF
1085 kln = max( 0, kwtop-ltop )
1086 iroffhh = mod( ltop-1, nb )
1087 icoffhh = mod( kwtop-1, nb )
1088 hhrsrc = indxg2p( ltop, nb, myrow, desch(rsrc_), nprow )
1089 hhcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
1090 hhrows = numroc( kln+iroffhh, nb, myrow, hhrsrc, nprow )
1091 hhcols = numroc( jw+icoffhh, nb, mycol, hhcsrc, npcol )
1092 CALL descinit( deschh, kln+iroffhh, jw+icoffhh, nb, nb,
1093 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1094 CALL pdgemm( 'No', 'No', kln, jw, jw, one, h, ltop,
1095 $ kwtop, desch, v, 1+iroffh, 1+iroffh, descv, zero,
1096 $ work, 1+iroffhh, 1+icoffhh, deschh )
1097 CALL pdlacpy( 'All', kln, jw, work, 1+iroffhh, 1+icoffhh,
1098 $ deschh, h, ltop, kwtop, desch )
1099*
1100* Update horizontal slab in H.
1101*
1102 IF( wantt ) THEN
1103 kln = n-kbot
1104 iroffhh = mod( kwtop-1, nb )
1105 icoffhh = mod( kbot, nb )
1106 hhrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
1107 hhcsrc = indxg2p( kbot+1, nb, mycol, desch(csrc_), npcol )
1108 hhrows = numroc( jw+iroffhh, nb, myrow, hhrsrc, nprow )
1109 hhcols = numroc( kln+icoffhh, nb, mycol, hhcsrc, npcol )
1110 CALL descinit( deschh, jw+iroffhh, kln+icoffhh, nb, nb,
1111 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1112 CALL pdgemm( 'Tr', 'No', jw, kln, jw, one, v,
1113 $ 1+iroffh, 1+iroffh, descv, h, kwtop, kbot+1,
1114 $ desch, zero, work, 1+iroffhh, 1+icoffhh, deschh )
1115 CALL pdlacpy( 'All', jw, kln, work, 1+iroffhh, 1+icoffhh,
1116 $ deschh, h, kwtop, kbot+1, desch )
1117 END IF
1118*
1119* Update vertical slab in Z.
1120*
1121 IF( wantz ) THEN
1122 kln = ihiz-iloz+1
1123 iroffzz = mod( iloz-1, nb )
1124 icoffzz = mod( kwtop-1, nb )
1125 zzrsrc = indxg2p( iloz, nb, myrow, descz(rsrc_), nprow )
1126 zzcsrc = indxg2p( kwtop, nb, mycol, descz(csrc_), npcol )
1127 zzrows = numroc( kln+iroffzz, nb, myrow, zzrsrc, nprow )
1128 zzcols = numroc( jw+icoffzz, nb, mycol, zzcsrc, npcol )
1129 CALL descinit( desczz, kln+iroffzz, jw+icoffzz, nb, nb,
1130 $ zzrsrc, zzcsrc, ictxt, max(1, zzrows), ierr )
1131 CALL pdgemm( 'No', 'No', kln, jw, jw, one, z, iloz,
1132 $ kwtop, descz, v, 1+iroffh, 1+iroffh, descv,
1133 $ zero, work, 1+iroffzz, 1+icoffzz, desczz )
1134 CALL pdlacpy( 'All', kln, jw, work, 1+iroffzz, 1+icoffzz,
1135 $ desczz, z, iloz, kwtop, descz )
1136 END IF
1137 END IF
1138*
1139* Return the number of deflations (ND) and the number of shifts (NS).
1140* (Subtracting INFQR from the spike length takes care of the case of
1141* a rare QR failure while calculating eigenvalues of the deflation
1142* window.)
1143*
1144 nd = jw - ns
1145 ns = ns - infqr
1146*
1147* Return optimal workspace.
1148*
1149 work( 1 ) = dble( lwkopt )
1150 iwork( 1 ) = ilwkopt + nsel
1151*
1152* End of PDLAQR3
1153*
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pdblastst.f:6862
subroutine pdelget(scope, top, alpha, a, ia, ja, desca)
Definition pdelget.f:2
subroutine pdelset(a, ia, ja, desca, alpha)
Definition pdelset.f:2
subroutine pdgehrd(n, ilo, ihi, a, ia, ja, desca, tau, work, lwork, info)
Definition pdgehrd.f:3
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pdlacpy.f:3
subroutine pdlamve(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb, dwork)
Definition pdlamve.f:3
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition pdlange.f:3
recursive subroutine pdlaqr0(wantt, wantz, n, ilo, ihi, h, desch, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, liwork, info, reclevel)
Definition pdlaqr0.f:4
recursive subroutine pdlaqr1(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pdlaqr1.f:5
subroutine pdlarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pdlarf.f:3
subroutine pdlarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pdlarfg.f:3
subroutine pdormhr(side, trans, m, n, ilo, ihi, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition pdormhr.f:3
subroutine pdrot(n, x, ix, jx, descx, incx, y, iy, jy, descy, incy, cs, sn, work, lwork, info)
Definition pdrot.f:3
subroutine pdtrord(compq, select, para, n, t, it, jt, desct, q, iq, jq, descq, wr, wi, m, work, lwork, iwork, liwork, info)
Definition pdtrord.f:4
integer function pilaenvx(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pilaenvx.f:3
double precision function dlamch(cmach)
Definition tools.f:10
Here is the call graph for this function:
Here is the caller graph for this function: