SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pslaqr3()

recursive subroutine pslaqr3 ( logical  wantt,
logical  wantz,
integer  n,
integer  ktop,
integer  kbot,
integer  nw,
real, dimension( * )  h,
integer, dimension( * )  desch,
integer  iloz,
integer  ihiz,
real, dimension( * )  z,
integer, dimension( * )  descz,
integer  ns,
integer  nd,
real, dimension( kbot )  sr,
real, dimension( kbot )  si,
real, dimension( * )  v,
integer, dimension( * )  descv,
integer  nh,
real, dimension( * )  t,
integer, dimension( * )  desct,
integer  nv,
real, dimension( * )  wv,
integer, dimension( * )  descw,
real, dimension( * )  work,
integer  lwork,
integer, dimension( * )  iwork,
integer  liwork,
integer  reclevel 
)

Definition at line 1 of file pslaqr3.f.

6*
7* Contribution from the Department of Computing Science and HPC2N,
8* Umea University, Sweden
9*
10* -- ScaLAPACK auxiliary routine (version 2.0.1) --
11* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
12* Univ. of Colorado Denver and University of California, Berkeley.
13* January, 2012
14*
15 IMPLICIT NONE
16*
17* .. Scalar Arguments ..
18 INTEGER IHIZ, ILOZ, KBOT, KTOP, LWORK, N, ND, NH, NS,
19 $ NV, NW, LIWORK, RECLEVEL
20 LOGICAL WANTT, WANTZ
21* ..
22* .. Array Arguments ..
23 INTEGER DESCH( * ), DESCZ( * ), DESCT( * ), DESCV( * ),
24 $ DESCW( * ), IWORK( * )
25 REAL H( * ), SI( KBOT ), SR( KBOT ), T( * ),
26 $ V( * ), WORK( * ), WV( * ),
27 $ Z( * )
28* ..
29*
30* Purpose
31* =======
32*
33* Aggressive early deflation:
34*
35* This subroutine accepts as input an upper Hessenberg matrix H and
36* performs an orthogonal similarity transformation designed to detect
37* and deflate fully converged eigenvalues from a trailing principal
38* submatrix. On output H has been overwritten by a new Hessenberg
39* matrix that is a perturbation of an orthogonal similarity
40* transformation of H. It is to be hoped that the final version of H
41* has many zero subdiagonal entries.
42*
43* Notes
44* =====
45*
46* Each global data object is described by an associated description
47* vector. This vector stores the information required to establish
48* the mapping between an object element and its corresponding process
49* and memory location.
50*
51* Let A be a generic term for any 2D block cyclicly distributed array.
52* Such a global array has an associated description vector DESCA.
53* In the following comments, the character _ should be read as
54* "of the global array".
55*
56* NOTATION STORED IN EXPLANATION
57* --------------- -------------- --------------------------------------
58* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59* DTYPE_A = 1.
60* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61* the BLACS process grid A is distribu-
62* ted over. The context itself is glo-
63* bal, but the handle (the integer
64* value) may vary.
65* M_A (global) DESCA( M_ ) The number of rows in the global
66* array A.
67* N_A (global) DESCA( N_ ) The number of columns in the global
68* array A.
69* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70* the rows of the array.
71* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72* the columns of the array.
73* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74* row of the array A is distributed.
75* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76* first column of the array A is
77* distributed.
78* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79* array. LLD_A >= MAX(1,LOCr(M_A)).
80*
81* Let K be the number of rows or columns of a distributed matrix,
82* and assume that its process grid has dimension p x q.
83* LOCr( K ) denotes the number of elements of K that a process
84* would receive if K were distributed over the p processes of its
85* process column.
86* Similarly, LOCc( K ) denotes the number of elements of K that a
87* process would receive if K were distributed over the q processes of
88* its process row.
89* The values of LOCr() and LOCc() may be determined via a call to the
90* ScaLAPACK tool function, NUMROC:
91* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93* An upper bound for these quantities may be computed by:
94* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96*
97* Arguments
98* =========
99*
100* WANTT (global input) LOGICAL
101* If .TRUE., then the Hessenberg matrix H is fully updated
102* so that the quasi-triangular Schur factor may be
103* computed (in cooperation with the calling subroutine).
104* If .FALSE., then only enough of H is updated to preserve
105* the eigenvalues.
106*
107* WANTZ (global input) LOGICAL
108* If .TRUE., then the orthogonal matrix Z is updated so
109* so that the orthogonal Schur factor may be computed
110* (in cooperation with the calling subroutine).
111* If .FALSE., then Z is not referenced.
112*
113* N (global input) INTEGER
114* The order of the matrix H and (if WANTZ is .TRUE.) the
115* order of the orthogonal matrix Z.
116*
117* KTOP (global input) INTEGER
118* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
119* KBOT and KTOP together determine an isolated block
120* along the diagonal of the Hessenberg matrix.
121*
122* KBOT (global input) INTEGER
123* It is assumed without a check that either
124* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
125* determine an isolated block along the diagonal of the
126* Hessenberg matrix.
127*
128* NW (global input) INTEGER
129* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
130*
131* H (local input/output) REAL array, dimension
132* (DESCH(LLD_),*)
133* On input the initial N-by-N section of H stores the
134* Hessenberg matrix undergoing aggressive early deflation.
135* On output H has been transformed by an orthogonal
136* similarity transformation, perturbed, and the returned
137* to Hessenberg form that (it is to be hoped) has some
138* zero subdiagonal entries.
139*
140* DESCH (global and local input) INTEGER array of dimension DLEN_.
141* The array descriptor for the distributed matrix H.
142*
143* ILOZ (global input) INTEGER
144* IHIZ (global input) INTEGER
145* Specify the rows of Z to which transformations must be
146* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
147*
148* Z (input/output) REAL array, dimension
149* (DESCH(LLD_),*)
150* IF WANTZ is .TRUE., then on output, the orthogonal
151* similarity transformation mentioned above has been
152* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
153* If WANTZ is .FALSE., then Z is unreferenced.
154*
155* DESCZ (global and local input) INTEGER array of dimension DLEN_.
156* The array descriptor for the distributed matrix Z.
157*
158* NS (global output) INTEGER
159* The number of unconverged (ie approximate) eigenvalues
160* returned in SR and SI that may be used as shifts by the
161* calling subroutine.
162*
163* ND (global output) INTEGER
164* The number of converged eigenvalues uncovered by this
165* subroutine.
166*
167* SR (global output) REAL array, dimension KBOT
168* SI (global output) REAL array, dimension KBOT
169* On output, the real and imaginary parts of approximate
170* eigenvalues that may be used for shifts are stored in
171* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
172* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
173* The real and imaginary parts of converged eigenvalues
174* are stored in SR(KBOT-ND+1) through SR(KBOT) and
175* SI(KBOT-ND+1) through SI(KBOT), respectively.
176*
177* V (global workspace) REAL array, dimension
178* (DESCV(LLD_),*)
179* An NW-by-NW distributed work array.
180*
181* DESCV (global and local input) INTEGER array of dimension DLEN_.
182* The array descriptor for the distributed matrix V.
183*
184* NH (input) INTEGER scalar
185* The number of columns of T. NH.GE.NW.
186*
187* T (global workspace) REAL array, dimension
188* (DESCV(LLD_),*)
189*
190* DESCT (global and local input) INTEGER array of dimension DLEN_.
191* The array descriptor for the distributed matrix T.
192*
193* NV (global input) INTEGER
194* The number of rows of work array WV available for
195* workspace. NV.GE.NW.
196*
197* WV (global workspace) REAL array, dimension
198* (DESCW(LLD_),*)
199*
200* DESCW (global and local input) INTEGER array of dimension DLEN_.
201* The array descriptor for the distributed matrix WV.
202*
203* WORK (local workspace) REAL array, dimension LWORK.
204* On exit, WORK(1) is set to an estimate of the optimal value
205* of LWORK for the given values of N, NW, KTOP and KBOT.
206*
207* LWORK (local input) INTEGER
208* The dimension of the work array WORK. LWORK = 2*NW
209* suffices, but greater efficiency may result from larger
210* values of LWORK.
211*
212* If LWORK = -1, then a workspace query is assumed; PSLAQR3
213* only estimates the optimal workspace size for the given
214* values of N, NW, KTOP and KBOT. The estimate is returned
215* in WORK(1). No error message related to LWORK is issued
216* by XERBLA. Neither H nor Z are accessed.
217*
218* IWORK (local workspace) INTEGER array, dimension (LIWORK)
219*
220* LIWORK (local input) INTEGER
221* The length of the workspace array IWORK
222*
223* ================================================================
224* Based on contributions by
225* Robert Granat and Meiyue Shao,
226* Department of Computing Science and HPC2N,
227* Umea University, Sweden
228*
229* ================================================================
230* .. Parameters ..
231 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
232 $ LLD_, MB_, M_, NB_, N_, RSRC_
233 INTEGER RECMAX
234 LOGICAL SORTGRAD
235 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
236 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
237 $ rsrc_ = 7, csrc_ = 8, lld_ = 9, recmax = 3,
238 $ sortgrad = .false. )
239 REAL ZERO, ONE
240 parameter( zero = 0.0, one = 1.0 )
241* ..
242* .. Local Scalars ..
243 REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
244 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP,
245 $ ELEM, ELEM1, ELEM2, ELEM3, R1, ANORM, RNORM,
246 $ RESAED
247 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
248 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
249 $ LWKOPT, NMIN, LLDH, LLDZ, LLDT, LLDV, LLDWV,
250 $ ICTXT, NPROW, NMAX, NPCOL, MYROW, MYCOL, NB,
251 $ IROFFH, M, RCOLS, TAUROWS, RROWS, TAUCOLS,
252 $ ITAU, IR, IPW, NPROCS, MLOC, IROFFHH,
253 $ ICOFFHH, HHRSRC, HHCSRC, HHROWS, HHCOLS,
254 $ IROFFZZ, ICOFFZZ, ZZRSRC, ZZCSRC, ZZROWS,
255 $ ZZCOLS, IERR, TZROWS0, TZCOLS0, IERR0, IPT0,
256 $ IPZ0, IPW0, NB2, ROUND, LILST, KK, LILST0,
257 $ IWRK1, RSRC, CSRC, LWK4, LWK5, IWRK2, LWK6,
258 $ LWK7, LWK8, ILWKOPT, TZROWS, TZCOLS, NSEL,
259 $ NPMIN, ICTXT_NEW, MYROW_NEW, MYCOL_NEW
260 LOGICAL BULGE, SORTED, LQUERY
261* ..
262* .. Local Arrays ..
263 INTEGER PAR( 6 ), DESCR( DLEN_ ),
264 $ DESCTAU( DLEN_ ), DESCHH( DLEN_ ),
265 $ DESCZZ( DLEN_ ), DESCTZ0( DLEN_ ),
266 $ PMAP( 64*64 )
267 REAL DDUM( 1 )
268* ..
269* .. External Functions ..
270 REAL SLAMCH, PSLANGE
271 INTEGER PILAENVX, NUMROC, INDXG2P, ICEIL, BLACS_PNUM
272 EXTERNAL slamch, pilaenvx, numroc, indxg2p, pslange,
273 $ iceil, blacs_pnum
274* ..
275* .. External Subroutines ..
276 EXTERNAL pscopy, psgehrd, psgemm, slabad, pslacpy,
277 $ pslaqr1, slanv2, pslaqr0, pslarf, pslarfg,
279 $ pslamve, blacs_gridinfo, blacs_gridmap,
280 $ blacs_gridexit, psgemr2d
281* ..
282* .. Intrinsic Functions ..
283 INTRINSIC abs, float, int, max, min, sqrt
284* ..
285* .. Executable Statements ..
286 ictxt = desch( ctxt_ )
287 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
288 nprocs = nprow*npcol
289*
290* Extract local leading dimensions, blockfactors, offset for
291* keeping the alignment requirements and size of deflation window.
292*
293 lldh = desch( lld_ )
294 lldz = descz( lld_ )
295 lldt = desct( lld_ )
296 lldv = descv( lld_ )
297 lldwv = descw( lld_ )
298 nb = desch( mb_ )
299 iroffh = mod( ktop - 1, nb )
300 jw = min( nw, kbot-ktop+1 )
301 nsel = nb+jw
302*
303* Extract environment variables for parallel eigenvalue reordering.
304*
305 par(1) = pilaenvx(ictxt, 17, 'PSLAQR3', 'SV', jw, nb, -1, -1)
306 par(2) = pilaenvx(ictxt, 18, 'PSLAQR3', 'SV', jw, nb, -1, -1)
307 par(3) = pilaenvx(ictxt, 19, 'PSLAQR3', 'SV', jw, nb, -1, -1)
308 par(4) = pilaenvx(ictxt, 20, 'PSLAQR3', 'SV', jw, nb, -1, -1)
309 par(5) = pilaenvx(ictxt, 21, 'PSLAQR3', 'SV', jw, nb, -1, -1)
310 par(6) = pilaenvx(ictxt, 22, 'PSLAQR3', 'SV', jw, nb, -1, -1)
311*
312* Check if workspace query.
313*
314 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
315*
316* Estimate optimal workspace.
317*
318 IF( jw.LE.2 ) THEN
319 lwkopt = 1
320 ELSE
321*
322* Workspace query calls to PSGEHRD and PSORMHR.
323*
324 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
325 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
326 $ npcol )
327 CALL psgehrd( jw, 1, jw, t, 1, 1, desct, work, work, -1,
328 $ info )
329 lwk1 = int( work( 1 ) ) + taurows*taucols
330*
331* Workspace query call to PSORMHR.
332*
333 CALL psormhr( 'Right', 'No', jw, jw, 1, jw, t, 1, 1, desct,
334 $ work, v, 1, 1, descv, work, -1, info )
335 lwk2 = int( work( 1 ) )
336*
337* Workspace query call to PSLAQR0.
338*
339 nmin = pilaenvx( ictxt, 12, 'PSLAQR3', 'SV', jw, 1, jw, lwork )
340 nmax = ( n-1 ) / 3
341 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
342 $ .AND. reclevel.LT.recmax ) THEN
343 CALL pslaqr0( .true., .true., jw+iroffh, 1+iroffh,
344 $ jw+iroffh, t, desct, sr, si, 1, jw, v, descv,
345 $ work, -1, iwork, liwork-nsel, infqr,
346 $ reclevel+1 )
347 lwk3 = int( work( 1 ) )
348 iwrk1 = iwork( 1 )
349 ELSE
350 rsrc = desct( rsrc_ )
351 csrc = desct( csrc_ )
352 desct( rsrc_ ) = 0
353 desct( csrc_ ) = 0
354 CALL pslaqr1( .true., .true., jw+iroffh, 1, jw+iroffh, t,
355 $ desct, sr, si, 1, jw+iroffh, v, descv, work, -1,
356 $ iwork, liwork-nsel, infqr )
357 desct( rsrc_ ) = rsrc
358 desct( csrc_ ) = csrc
359 lwk3 = int( work( 1 ) )
360 iwrk1 = iwork( 1 )
361 END IF
362*
363* Workspace in case of alignment problems.
364*
365 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
366 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
367 lwk4 = 2 * tzrows0*tzcols0
368*
369* Workspace check for reordering.
370*
371 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
372 $ desct, v, 1, 1, descv, ddum, ddum, mloc, work, -1,
373 $ iwork, liwork-nsel, info )
374 lwk5 = int( work( 1 ) )
375 iwrk2 = iwork( 1 )
376*
377* Extra workspace for reflecting back spike
378* (workspace for PSLARF approximated for simplicity).
379*
380 rrows = numroc( n+iroffh, nb, myrow, descv(rsrc_), nprow )
381 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
382 lwk6 = rrows*rcols + taurows*taucols +
383 $ 2*iceil(iceil(jw+iroffh,nb),nprow)*nb
384 $ *iceil(iceil(jw+iroffh,nb),npcol)*nb
385*
386* Extra workspace needed by PBLAS update calls
387* (also estimated for simplicity).
388*
389 lwk7 = max( iceil(iceil(jw,nb),nprow)*nb *
390 $ iceil(iceil(n-kbot,nb),npcol)*nb,
391 $ iceil(iceil(ihiz-iloz+1,nb),nprow)*nb *
392 $ iceil(iceil(jw,nb),npcol)*nb,
393 $ iceil(iceil(kbot-jw,nb),nprow)*nb *
394 $ iceil(iceil(jw,nb),npcol)*nb )
395*
396* Residual check workspace.
397*
398 tzrows = numroc( jw+iroffh, nb, myrow, desct(rsrc_), nprow )
399 tzcols = numroc( jw+iroffh, nb, mycol, desct(csrc_), npcol )
400 lwk8 = 2*tzrows*tzcols
401*
402* Optimal workspace.
403*
404 lwkopt = max( lwk1, lwk2, lwk3+lwk4, lwk5, lwk6, lwk7, lwk8 )
405 ilwkopt = max( iwrk1, iwrk2 )
406 END IF
407*
408* Quick return in case of workspace query.
409*
410 work( 1 ) = float( lwkopt )
411*
412* IWORK(1:NSEL) is used as the array SELECT for PSTRORD.
413*
414 iwork( 1 ) = ilwkopt + nsel
415 IF( lquery )
416 $ RETURN
417*
418* Nothing to do for an empty active block ...
419 ns = 0
420 nd = 0
421 IF( ktop.GT.kbot )
422 $ RETURN
423* ... nor for an empty deflation window.
424*
425 IF( nw.LT.1 )
426 $ RETURN
427*
428* Machine constants.
429*
430 safmin = slamch( 'SAFE MINIMUM' )
431 safmax = one / safmin
432 CALL slabad( safmin, safmax )
433 ulp = slamch( 'PRECISION' )
434 smlnum = safmin*( float( n ) / ulp )
435*
436* Setup deflation window.
437*
438 jw = min( nw, kbot-ktop+1 )
439 kwtop = kbot - jw + 1
440 IF( kwtop.EQ.ktop ) THEN
441 s = zero
442 ELSE
443 CALL pselget( 'All', '1-Tree', s, h, kwtop, kwtop-1, desch )
444 END IF
445*
446 IF( kbot.EQ.kwtop ) THEN
447*
448* 1-by-1 deflation window: not much to do.
449*
450 CALL pselget( 'All', '1-Tree', sr( kwtop ), h, kwtop, kwtop,
451 $ desch )
452 si( kwtop ) = zero
453 ns = 1
454 nd = 0
455 IF( abs( s ).LE.max( smlnum, ulp*abs( sr( kwtop ) ) ) )
456 $ THEN
457 ns = 0
458 nd = 1
459 IF( kwtop.GT.ktop )
460 $ CALL pselset( h, kwtop, kwtop-1 , desch, zero )
461 END IF
462 RETURN
463 END IF
464*
465 IF( kwtop.EQ.ktop .AND. kbot-kwtop.EQ.1 ) THEN
466*
467* 2-by-2 deflation window: a little more to do.
468*
469 CALL pselget( 'All', '1-Tree', aa, h, kwtop, kwtop, desch )
470 CALL pselget( 'All', '1-Tree', bb, h, kwtop, kwtop+1, desch )
471 CALL pselget( 'All', '1-Tree', cc, h, kwtop+1, kwtop, desch )
472 CALL pselget( 'All', '1-Tree', dd, h, kwtop+1, kwtop+1, desch )
473 CALL slanv2( aa, bb, cc, dd, sr(kwtop), si(kwtop),
474 $ sr(kwtop+1), si(kwtop+1), cs, sn )
475 ns = 0
476 nd = 2
477 IF( cc.EQ.zero ) THEN
478 i = kwtop
479 IF( i+2.LE.n .AND. wantt )
480 $ CALL psrot( n-i-1, h, i, i+2, desch, desch(m_), h, i+1,
481 $ i+2, desch, desch(m_), cs, sn, work, lwork, info )
482 IF( i.GT.1 )
483 $ CALL psrot( i-1, h, 1, i, desch, 1, h, 1, i+1, desch, 1,
484 $ cs, sn, work, lwork, info )
485 IF( wantz )
486 $ CALL psrot( ihiz-iloz+1, z, iloz, i, descz, 1, z, iloz,
487 $ i+1, descz, 1, cs, sn, work, lwork, info )
488 CALL pselset( h, i, i, desch, aa )
489 CALL pselset( h, i, i+1, desch, bb )
490 CALL pselset( h, i+1, i, desch, cc )
491 CALL pselset( h, i+1, i+1, desch, dd )
492 END IF
493 work( 1 ) = float( lwkopt )
494 RETURN
495 END IF
496*
497* Calculate new value for IROFFH in case deflation window
498* was adjusted.
499*
500 iroffh = mod( kwtop - 1, nb )
501*
502* Adjust number of rows and columns of T matrix descriptor
503* to prepare for call to PDBTRORD.
504*
505 desct( m_ ) = jw+iroffh
506 desct( n_ ) = jw+iroffh
507*
508* Convert to spike-triangular form. (In case of a rare QR failure,
509* this routine continues to do aggressive early deflation using that
510* part of the deflation window that converged using INFQR here and
511* there to keep track.)
512*
513* Copy the trailing submatrix to the working space.
514*
515 CALL pslaset( 'All', iroffh, jw+iroffh, zero, one, t, 1, 1,
516 $ desct )
517 CALL pslaset( 'All', jw, iroffh, zero, zero, t, 1+iroffh, 1,
518 $ desct )
519 CALL pslacpy( 'All', 1, jw, h, kwtop, kwtop, desch, t, 1+iroffh,
520 $ 1+iroffh, desct )
521 CALL pslacpy( 'Upper', jw-1, jw-1, h, kwtop+1, kwtop, desch, t,
522 $ 1+iroffh+1, 1+iroffh, desct )
523 IF( jw.GT.2 )
524 $ CALL pslaset( 'Lower', jw-2, jw-2, zero, zero, t, 1+iroffh+2,
525 $ 1+iroffh, desct )
526 CALL pslacpy( 'All', jw-1, 1, h, kwtop+1, kwtop+jw-1, desch, t,
527 $ 1+iroffh+1, 1+iroffh+jw-1, desct )
528*
529* Initialize the working orthogonal matrix.
530*
531 CALL pslaset( 'All', jw+iroffh, jw+iroffh, zero, one, v, 1, 1,
532 $ descv )
533*
534* Compute the Schur form of T.
535*
536 npmin = pilaenvx( ictxt, 23, 'PSLAQR3', 'SV', jw, nb, nprow,
537 $ npcol )
538 nmin = pilaenvx( ictxt, 12, 'PSLAQR3', 'SV', jw, 1, jw, lwork )
539 nmax = ( n-1 ) / 3
540 IF( min(nprow, npcol).LE.npmin+1 .OR. reclevel.GE.1 ) THEN
541*
542* The AED window is large enough.
543* Compute the Schur decomposition with all processors.
544*
545 IF( jw+iroffh.GT.nmin .AND. jw+iroffh.LE.nmax
546 $ .AND. reclevel.LT.recmax ) THEN
547 CALL pslaqr0( .true., .true., jw+iroffh, 1+iroffh,
548 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
549 $ si( kwtop-iroffh ), 1+iroffh, jw+iroffh, v, descv,
550 $ work, lwork, iwork(nsel+1), liwork-nsel, infqr,
551 $ reclevel+1 )
552 ELSE
553 IF( desct(rsrc_).EQ.0 .AND. desct(csrc_).EQ.0 ) THEN
554 IF( jw+iroffh.GT.desct( mb_ ) ) THEN
555 CALL pslaqr1( .true., .true., jw+iroffh, 1,
556 $ jw+iroffh, t, desct, sr( kwtop-iroffh ),
557 $ si( kwtop-iroffh ), 1, jw+iroffh, v,
558 $ descv, work, lwork, iwork(nsel+1), liwork-nsel,
559 $ infqr )
560 ELSE
561 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
562 CALL slahqr( .true., .true., jw+iroffh, 1+iroffh,
563 $ jw+iroffh, t, desct(lld_),
564 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
565 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
566 ELSE
567 infqr = 0
568 END IF
569 IF( nprocs.GT.1 )
570 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
571 $ 1, -1, -1, -1, -1, -1 )
572 END IF
573 ELSEIF( jw+iroffh.LE.desct( mb_ ) ) THEN
574 IF( myrow.EQ.desct(rsrc_) .AND. mycol.EQ.desct(csrc_) )
575 $ THEN
576 CALL slahqr( .true., .true., jw+iroffh, 1+iroffh,
577 $ jw+iroffh, t, desct(lld_),
578 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
579 $ 1+iroffh, jw+iroffh, v, descv(lld_), infqr )
580 ELSE
581 infqr = 0
582 END IF
583 IF( nprocs.GT.1 )
584 $ CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr,
585 $ 1, -1, -1, -1, -1, -1 )
586 ELSE
587 tzrows0 = numroc( jw+iroffh, nb, myrow, 0, nprow )
588 tzcols0 = numroc( jw+iroffh, nb, mycol, 0, npcol )
589 CALL descinit( desctz0, jw+iroffh, jw+iroffh, nb, nb, 0,
590 $ 0, ictxt, max(1,tzrows0), ierr0 )
591 ipt0 = 1
592 ipz0 = ipt0 + max(1,tzrows0)*tzcols0
593 ipw0 = ipz0 + max(1,tzrows0)*tzcols0
594 CALL pslamve( 'All', jw+iroffh, jw+iroffh, t, 1, 1,
595 $ desct, work(ipt0), 1, 1, desctz0, work(ipw0) )
596 CALL pslaset( 'All', jw+iroffh, jw+iroffh, zero, one,
597 $ work(ipz0), 1, 1, desctz0 )
598 CALL pslaqr1( .true., .true., jw+iroffh, 1,
599 $ jw+iroffh, work(ipt0), desctz0,
600 $ sr( kwtop-iroffh ), si( kwtop-iroffh ),
601 $ 1, jw+iroffh, work(ipz0),
602 $ desctz0, work(ipw0), lwork-ipw0+1, iwork(nsel+1),
603 $ liwork-nsel, infqr )
604 CALL pslamve( 'All', jw+iroffh, jw+iroffh, work(ipt0), 1,
605 $ 1, desctz0, t, 1, 1, desct, work(ipw0) )
606 CALL pslamve( 'All', jw+iroffh, jw+iroffh, work(ipz0), 1,
607 $ 1, desctz0, v, 1, 1, descv, work(ipw0) )
608 END IF
609 END IF
610 ELSE
611*
612* The AED window is too small.
613* Redistribute the AED window to a subgrid
614* and do the computation on the subgrid.
615*
616 ictxt_new = ictxt
617 DO 20 i = 0, npmin-1
618 DO 10 j = 0, npmin-1
619 pmap( j+1+i*npmin ) = blacs_pnum( ictxt, i, j )
620 10 CONTINUE
621 20 CONTINUE
622 CALL blacs_gridmap( ictxt_new, pmap, npmin, npmin, npmin )
623 CALL blacs_gridinfo( ictxt_new, npmin, npmin, myrow_new,
624 $ mycol_new )
625 IF( myrow.GE.npmin .OR. mycol.GE.npmin ) ictxt_new = -1
626 IF( ictxt_new.GE.0 ) THEN
627 tzrows0 = numroc( jw, nb, myrow_new, 0, npmin )
628 tzcols0 = numroc( jw, nb, mycol_new, 0, npmin )
629 CALL descinit( desctz0, jw, jw, nb, nb, 0,
630 $ 0, ictxt_new, max(1,tzrows0), ierr0 )
631 ipt0 = 1
632 ipz0 = ipt0 + max(1,tzrows0)*max(1,tzcols0)
633 ipw0 = ipz0 + max(1,tzrows0)*max(1,tzcols0)
634 ELSE
635 ipt0 = 1
636 ipz0 = 2
637 ipw0 = 3
638 desctz0( ctxt_ ) = -1
639 infqr = 0
640 END IF
641 CALL psgemr2d( jw, jw, t, 1+iroffh, 1+iroffh, desct,
642 $ work(ipt0), 1, 1, desctz0, ictxt )
643 IF( ictxt_new.GE.0 ) THEN
644 CALL pslaset( 'All', jw, jw, zero, one, work(ipz0), 1, 1,
645 $ desctz0 )
646 nmin = pilaenvx( ictxt_new, 12, 'PSLAQR3', 'SV', jw, 1, jw,
647 $ lwork )
648 IF( jw.GT.nmin .AND. jw.LE.nmax .AND. reclevel.LT.1 ) THEN
649 CALL pslaqr0( .true., .true., jw, 1, jw, work(ipt0),
650 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
651 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
652 $ iwork(nsel+1), liwork-nsel, infqr,
653 $ reclevel+1 )
654 ELSE
655 CALL pslaqr1( .true., .true., jw, 1, jw, work(ipt0),
656 $ desctz0, sr( kwtop ), si( kwtop ), 1, jw,
657 $ work(ipz0), desctz0, work(ipw0), lwork-ipw0+1,
658 $ iwork(nsel+1), liwork-nsel, infqr )
659 END IF
660 END IF
661 CALL psgemr2d( jw, jw, work(ipt0), 1, 1, desctz0, t, 1+iroffh,
662 $ 1+iroffh, desct, ictxt )
663 CALL psgemr2d( jw, jw, work(ipz0), 1, 1, desctz0, v, 1+iroffh,
664 $ 1+iroffh, descv, ictxt )
665 IF( ictxt_new.GE.0 )
666 $ CALL blacs_gridexit( ictxt_new )
667 IF( myrow+mycol.GT.0 ) THEN
668 DO 40 j = 0, jw-1
669 sr( kwtop+j ) = zero
670 si( kwtop+j ) = zero
671 40 CONTINUE
672 END IF
673 CALL igamn2d( ictxt, 'All', '1-Tree', 1, 1, infqr, 1, -1, -1,
674 $ -1, -1, -1 )
675 CALL sgsum2d( ictxt, 'All', ' ', jw, 1, sr(kwtop), jw, -1, -1 )
676 CALL sgsum2d( ictxt, 'All', ' ', jw, 1, si(kwtop), jw, -1, -1 )
677 END IF
678*
679* Adjust INFQR for offset from block border in submatrices.
680*
681 IF( infqr.NE.0 )
682 $ infqr = infqr - iroffh
683*
684* PSTRORD needs a clean margin near the diagonal.
685*
686 DO 50 j = 1, jw - 3
687 CALL pselset( t, j+2, j, desct, zero )
688 CALL pselset( t, j+3, j, desct, zero )
689 50 CONTINUE
690 IF( jw.GT.2 )
691 $ CALL pselset( t, jw, jw-2, desct, zero )
692*
693* Check local residual for AED Schur decomposition.
694*
695 resaed = 0.0
696*
697* Clean up the array SELECT for PSTRORD.
698*
699 DO 60 j = 1, nsel
700 iwork( j ) = 0
701 60 CONTINUE
702*
703* Set local M counter to zero.
704*
705 mloc = 0
706*
707* Outer deflation detection loop (label 80).
708* In this loop a bunch of undeflatable eigenvalues
709* are moved simultaneously.
710*
711 DO 70 j = 1, iroffh + infqr
712 iwork( j ) = 1
713 70 CONTINUE
714*
715 ns = jw
716 ilst = infqr + 1 + iroffh
717 IF( ilst.GT.1 ) THEN
718 CALL pselget( 'All', '1-Tree', elem, t, ilst, ilst-1, desct )
719 bulge = elem.NE.zero
720 IF( bulge ) ilst = ilst+1
721 END IF
722*
723 80 CONTINUE
724 IF( ilst.LE.ns+iroffh ) THEN
725*
726* Find the top-left corner of the local window.
727*
728 lilst = max(ilst,ns+iroffh-nb+1)
729 IF( lilst.GT.1 ) THEN
730 CALL pselget( 'All', '1-Tree', elem, t, lilst, lilst-1,
731 $ desct )
732 bulge = elem.NE.zero
733 IF( bulge ) lilst = lilst+1
734 END IF
735*
736* Lock all eigenvalues outside the local window.
737*
738 DO 90 j = iroffh+1, lilst-1
739 iwork( j ) = 1
740 90 CONTINUE
741 lilst0 = lilst
742*
743* Inner deflation detection loop (label 100).
744* In this loop, the undeflatable eigenvalues are moved to the
745* top-left corner of the local window.
746*
747 100 CONTINUE
748 IF( lilst.LE.ns+iroffh ) THEN
749 IF( ns.EQ.1 ) THEN
750 bulge = .false.
751 ELSE
752 CALL pselget( 'All', '1-Tree', elem, t, ns+iroffh,
753 $ ns+iroffh-1, desct )
754 bulge = elem.NE.zero
755 END IF
756*
757* Small spike tip test for deflation.
758*
759 IF( .NOT.bulge ) THEN
760*
761* Real eigenvalue.
762*
763 CALL pselget( 'All', '1-Tree', elem, t, ns+iroffh,
764 $ ns+iroffh, desct )
765 foo = abs( elem )
766 IF( foo.EQ.zero )
767 $ foo = abs( s )
768 CALL pselget( 'All', '1-Tree', elem, v, 1+iroffh,
769 $ ns+iroffh, descv )
770 IF( abs( s*elem ).LE.max( smlnum, ulp*foo ) ) THEN
771*
772* Deflatable.
773*
774 ns = ns - 1
775 ELSE
776*
777* Undeflatable: move it up out of the way.
778*
779 ifst = ns
780 DO 110 j = lilst, jw+iroffh
781 iwork( j ) = 0
782 110 CONTINUE
783 iwork( ifst+iroffh ) = 1
784 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
785 $ 1, desct, v, 1, 1, descv, work,
786 $ work(jw+iroffh+1), mloc,
787 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
788 $ iwork(nsel+1), liwork-nsel, info )
789*
790* Adjust the array SELECT explicitly so that it does not
791* rely on the output of PSTRORD.
792*
793 iwork( ifst+iroffh ) = 0
794 iwork( lilst ) = 1
795 lilst = lilst + 1
796*
797* In case of a rare exchange failure, adjust the
798* pointers ILST and LILST to the current place to avoid
799* unexpected behaviors.
800*
801 IF( info.NE.0 ) THEN
802 lilst = max(info, lilst)
803 ilst = max(info, ilst)
804 END IF
805 END IF
806 ELSE
807*
808* Complex conjugate pair.
809*
810 CALL pselget( 'All', '1-Tree', elem1, t, ns+iroffh,
811 $ ns+iroffh, desct )
812 CALL pselget( 'All', '1-Tree', elem2, t, ns+iroffh,
813 $ ns+iroffh-1, desct )
814 CALL pselget( 'All', '1-Tree', elem3, t, ns+iroffh-1,
815 $ ns+iroffh, desct )
816 foo = abs( elem1 ) + sqrt( abs( elem2 ) )*
817 $ sqrt( abs( elem3 ) )
818 IF( foo.EQ.zero )
819 $ foo = abs( s )
820 CALL pselget( 'All', '1-Tree', elem1, v, 1+iroffh,
821 $ ns+iroffh, descv )
822 CALL pselget( 'All', '1-Tree', elem2, v, 1+iroffh,
823 $ ns+iroffh-1, descv )
824 IF( max( abs( s*elem1 ), abs( s*elem2 ) ).LE.
825 $ max( smlnum, ulp*foo ) ) THEN
826*
827* Deflatable.
828*
829 ns = ns - 2
830 ELSE
831*
832* Undeflatable: move them up out of the way.
833*
834 ifst = ns
835 DO 120 j = lilst, jw+iroffh
836 iwork( j ) = 0
837 120 CONTINUE
838 iwork( ifst+iroffh ) = 1
839 iwork( ifst+iroffh-1 ) = 1
840 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1,
841 $ 1, desct, v, 1, 1, descv, work,
842 $ work(jw+iroffh+1), mloc,
843 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
844 $ iwork(nsel+1), liwork-nsel, info )
845*
846* Adjust the array SELECT explicitly so that it does not
847* rely on the output of PSTRORD.
848*
849 iwork( ifst+iroffh ) = 0
850 iwork( ifst+iroffh-1 ) = 0
851 iwork( lilst ) = 1
852 iwork( lilst+1 ) = 1
853 lilst = lilst + 2
854*
855* In case of a rare exchange failure, adjust the
856* pointers ILST and LILST to the current place to avoid
857* unexpected behaviors.
858*
859 IF( info.NE.0 ) THEN
860 lilst = max(info, lilst)
861 ilst = max(info, ilst)
862 END IF
863 END IF
864 END IF
865*
866* End of inner deflation detection loop.
867*
868 GO TO 100
869 END IF
870*
871* Unlock the eigenvalues outside the local window.
872* Then undeflatable eigenvalues are moved to the proper position.
873*
874 DO 130 j = ilst, lilst0-1
875 iwork( j ) = 0
876 130 CONTINUE
877 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
878 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1),
879 $ m, work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
880 $ iwork(nsel+1), liwork-nsel, info )
881 ilst = m + 1
882*
883* In case of a rare exchange failure, adjust the pointer ILST to
884* the current place to avoid unexpected behaviors.
885*
886 IF( info.NE.0 )
887 $ ilst = max(info, ilst)
888*
889* End of outer deflation detection loop.
890*
891 GO TO 80
892 END IF
893
894*
895* Post-reordering step: copy output eigenvalues to output.
896*
897 CALL scopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
898 CALL scopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
899*
900* Check local residual for reordered AED Schur decomposition.
901*
902 resaed = 0.0
903*
904* Return to Hessenberg form.
905*
906 IF( ns.EQ.0 )
907 $ s = zero
908*
909 IF( ns.LT.jw .AND. sortgrad ) THEN
910*
911* Sorting diagonal blocks of T improves accuracy for
912* graded matrices. Bubble sort deals well with exchange
913* failures. Eigenvalues/shifts from T are also restored.
914*
915 round = 0
916 sorted = .false.
917 i = ns + 1 + iroffh
918 140 CONTINUE
919 IF( sorted )
920 $ GO TO 180
921 sorted = .true.
922 round = round + 1
923*
924 kend = i - 1
925 i = infqr + 1 + iroffh
926 IF( i.EQ.ns+iroffh ) THEN
927 k = i + 1
928 ELSE IF( si( kwtop-iroffh + i-1 ).EQ.zero ) THEN
929 k = i + 1
930 ELSE
931 k = i + 2
932 END IF
933 150 CONTINUE
934 IF( k.LE.kend ) THEN
935 IF( k.EQ.i+1 ) THEN
936 evi = abs( sr( kwtop-iroffh+i-1 ) )
937 ELSE
938 evi = abs( sr( kwtop-iroffh+i-1 ) ) +
939 $ abs( si( kwtop-iroffh+i-1 ) )
940 END IF
941*
942 IF( k.EQ.kend ) THEN
943 evk = abs( sr( kwtop-iroffh+k-1 ) )
944 ELSEIF( si( kwtop-iroffh+k-1 ).EQ.zero ) THEN
945 evk = abs( sr( kwtop-iroffh+k-1 ) )
946 ELSE
947 evk = abs( sr( kwtop-iroffh+k-1 ) ) +
948 $ abs( si( kwtop-iroffh+k-1 ) )
949 END IF
950*
951 IF( evi.GE.evk ) THEN
952 i = k
953 ELSE
954 mloc = 0
955 sorted = .false.
956 ifst = i
957 ilst = k
958 DO 160 j = 1, i-1
959 iwork( j ) = 1
960 mloc = mloc + 1
961 160 CONTINUE
962 IF( k.EQ.i+2 ) THEN
963 iwork( i ) = 0
964 iwork(i+1) = 0
965 ELSE
966 iwork( i ) = 0
967 END IF
968 IF( k.NE.kend .AND. si( kwtop-iroffh+k-1 ).NE.zero ) THEN
969 iwork( k ) = 1
970 iwork(k+1) = 1
971 mloc = mloc + 2
972 ELSE
973 iwork( k ) = 1
974 IF( k.LT.kend ) iwork(k+1) = 0
975 mloc = mloc + 1
976 END IF
977 DO 170 j = k+2, jw+iroffh
978 iwork( j ) = 0
979 170 CONTINUE
980 CALL pstrord( 'Vectors', iwork, par, jw+iroffh, t, 1, 1,
981 $ desct, v, 1, 1, descv, work, work(jw+iroffh+1), m,
982 $ work(2*(jw+iroffh)+1), lwork-2*(jw+iroffh),
983 $ iwork(nsel+1), liwork-nsel, ierr )
984 CALL scopy( jw, work(1+iroffh), 1, sr( kwtop ), 1 )
985 CALL scopy( jw, work(jw+2*iroffh+1), 1, si( kwtop ), 1 )
986 IF( ierr.EQ.0 ) THEN
987 i = ilst
988 ELSE
989 i = k
990 END IF
991 END IF
992 IF( i.EQ.kend ) THEN
993 k = i + 1
994 ELSE IF( si( kwtop-iroffh+i-1 ).EQ.zero ) THEN
995 k = i + 1
996 ELSE
997 k = i + 2
998 END IF
999 GO TO 150
1000 END IF
1001 GO TO 140
1002 180 CONTINUE
1003 END IF
1004*
1005* Restore number of rows and columns of T matrix descriptor.
1006*
1007 desct( m_ ) = nw+iroffh
1008 desct( n_ ) = nh+iroffh
1009*
1010 IF( ns.LT.jw .OR. s.EQ.zero ) THEN
1011 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1012*
1013* Reflect spike back into lower triangle.
1014*
1015 rrows = numroc( ns+iroffh, nb, myrow, descv(rsrc_), nprow )
1016 rcols = numroc( 1, 1, mycol, descv(csrc_), npcol )
1017 CALL descinit( descr, ns+iroffh, 1, nb, 1, descv(rsrc_),
1018 $ descv(csrc_), ictxt, max(1, rrows), info )
1019 taurows = numroc( 1, 1, mycol, descv(rsrc_), nprow )
1020 taucols = numroc( jw+iroffh, nb, mycol, descv(csrc_),
1021 $ npcol )
1022 CALL descinit( desctau, 1, jw+iroffh, 1, nb, descv(rsrc_),
1023 $ descv(csrc_), ictxt, max(1, taurows), info )
1024*
1025 ir = 1
1026 itau = ir + descr( lld_ ) * rcols
1027 ipw = itau + desctau( lld_ ) * taucols
1028*
1029 CALL pslaset( 'All', ns+iroffh, 1, zero, zero, work(itau),
1030 $ 1, 1, desctau )
1031*
1032 CALL pscopy( ns, v, 1+iroffh, 1+iroffh, descv, descv(m_),
1033 $ work(ir), 1+iroffh, 1, descr, 1 )
1034 CALL pslarfg( ns, beta, 1+iroffh, 1, work(ir), 2+iroffh, 1,
1035 $ descr, 1, work(itau+iroffh) )
1036 CALL pselset( work(ir), 1+iroffh, 1, descr, one )
1037*
1038 CALL pslaset( 'Lower', jw-2, jw-2, zero, zero, t, 3+iroffh,
1039 $ 1+iroffh, desct )
1040*
1041 CALL pslarf( 'Left', ns, jw, work(ir), 1+iroffh, 1, descr,
1042 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1043 $ desct, work( ipw ) )
1044 CALL pslarf( 'Right', ns, ns, work(ir), 1+iroffh, 1, descr,
1045 $ 1, work(itau+iroffh), t, 1+iroffh, 1+iroffh,
1046 $ desct, work( ipw ) )
1047 CALL pslarf( 'Right', jw, ns, work(ir), 1+iroffh, 1, descr,
1048 $ 1, work(itau+iroffh), v, 1+iroffh, 1+iroffh,
1049 $ descv, work( ipw ) )
1050*
1051 itau = 1
1052 ipw = itau + desctau( lld_ ) * taucols
1053 CALL psgehrd( jw+iroffh, 1+iroffh, ns+iroffh, t, 1, 1,
1054 $ desct, work(itau), work( ipw ), lwork-ipw+1, info )
1055 END IF
1056*
1057* Copy updated reduced window into place.
1058*
1059 IF( kwtop.GT.1 ) THEN
1060 CALL pselget( 'All', '1-Tree', elem, v, 1+iroffh,
1061 $ 1+iroffh, descv )
1062 CALL pselset( h, kwtop, kwtop-1, desch, s*elem )
1063 END IF
1064 CALL pslacpy( 'Upper', jw-1, jw-1, t, 1+iroffh+1, 1+iroffh,
1065 $ desct, h, kwtop+1, kwtop, desch )
1066 CALL pslacpy( 'All', 1, jw, t, 1+iroffh, 1+iroffh, desct, h,
1067 $ kwtop, kwtop, desch )
1068 CALL pslacpy( 'All', jw-1, 1, t, 1+iroffh+1, 1+iroffh+jw-1,
1069 $ desct, h, kwtop+1, kwtop+jw-1, desch )
1070*
1071* Accumulate orthogonal matrix in order to update
1072* H and Z, if requested.
1073*
1074 IF( ns.GT.1 .AND. s.NE.zero ) THEN
1075 CALL psormhr( 'Right', 'No', jw+iroffh, ns+iroffh, 1+iroffh,
1076 $ ns+iroffh, t, 1, 1, desct, work(itau), v, 1,
1077 $ 1, descv, work( ipw ), lwork-ipw+1, info )
1078 END IF
1079*
1080* Update vertical slab in H.
1081*
1082 IF( wantt ) THEN
1083 ltop = 1
1084 ELSE
1085 ltop = ktop
1086 END IF
1087 kln = max( 0, kwtop-ltop )
1088 iroffhh = mod( ltop-1, nb )
1089 icoffhh = mod( kwtop-1, nb )
1090 hhrsrc = indxg2p( ltop, nb, myrow, desch(rsrc_), nprow )
1091 hhcsrc = indxg2p( kwtop, nb, mycol, desch(csrc_), npcol )
1092 hhrows = numroc( kln+iroffhh, nb, myrow, hhrsrc, nprow )
1093 hhcols = numroc( jw+icoffhh, nb, mycol, hhcsrc, npcol )
1094 CALL descinit( deschh, kln+iroffhh, jw+icoffhh, nb, nb,
1095 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1096 CALL psgemm( 'No', 'No', kln, jw, jw, one, h, ltop,
1097 $ kwtop, desch, v, 1+iroffh, 1+iroffh, descv, zero,
1098 $ work, 1+iroffhh, 1+icoffhh, deschh )
1099 CALL pslacpy( 'All', kln, jw, work, 1+iroffhh, 1+icoffhh,
1100 $ deschh, h, ltop, kwtop, desch )
1101*
1102* Update horizontal slab in H.
1103*
1104 IF( wantt ) THEN
1105 kln = n-kbot
1106 iroffhh = mod( kwtop-1, nb )
1107 icoffhh = mod( kbot, nb )
1108 hhrsrc = indxg2p( kwtop, nb, myrow, desch(rsrc_), nprow )
1109 hhcsrc = indxg2p( kbot+1, nb, mycol, desch(csrc_), npcol )
1110 hhrows = numroc( jw+iroffhh, nb, myrow, hhrsrc, nprow )
1111 hhcols = numroc( kln+icoffhh, nb, mycol, hhcsrc, npcol )
1112 CALL descinit( deschh, jw+iroffhh, kln+icoffhh, nb, nb,
1113 $ hhrsrc, hhcsrc, ictxt, max(1, hhrows), ierr )
1114 CALL psgemm( 'Tr', 'No', jw, kln, jw, one, v,
1115 $ 1+iroffh, 1+iroffh, descv, h, kwtop, kbot+1,
1116 $ desch, zero, work, 1+iroffhh, 1+icoffhh, deschh )
1117 CALL pslacpy( 'All', jw, kln, work, 1+iroffhh, 1+icoffhh,
1118 $ deschh, h, kwtop, kbot+1, desch )
1119 END IF
1120*
1121* Update vertical slab in Z.
1122*
1123 IF( wantz ) THEN
1124 kln = ihiz-iloz+1
1125 iroffzz = mod( iloz-1, nb )
1126 icoffzz = mod( kwtop-1, nb )
1127 zzrsrc = indxg2p( iloz, nb, myrow, descz(rsrc_), nprow )
1128 zzcsrc = indxg2p( kwtop, nb, mycol, descz(csrc_), npcol )
1129 zzrows = numroc( kln+iroffzz, nb, myrow, zzrsrc, nprow )
1130 zzcols = numroc( jw+icoffzz, nb, mycol, zzcsrc, npcol )
1131 CALL descinit( desczz, kln+iroffzz, jw+icoffzz, nb, nb,
1132 $ zzrsrc, zzcsrc, ictxt, max(1, zzrows), ierr )
1133 CALL psgemm( 'No', 'No', kln, jw, jw, one, z, iloz,
1134 $ kwtop, descz, v, 1+iroffh, 1+iroffh, descv,
1135 $ zero, work, 1+iroffzz, 1+icoffzz, desczz )
1136 CALL pslacpy( 'All', kln, jw, work, 1+iroffzz, 1+icoffzz,
1137 $ desczz, z, iloz, kwtop, descz )
1138 END IF
1139 END IF
1140*
1141* Return the number of deflations (ND) and the number of shifts (NS).
1142* (Subtracting INFQR from the spike length takes care of the case of
1143* a rare QR failure while calculating eigenvalues of the deflation
1144* window.)
1145*
1146 nd = jw - ns
1147 ns = ns - infqr
1148*
1149* Return optimal workspace.
1150*
1151 work( 1 ) = float( lwkopt )
1152 iwork( 1 ) = ilwkopt + nsel
1153*
1154* End of PSLAQR3
1155*
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
integer function pilaenvx(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pilaenvx.f:3
subroutine pslaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition psblastst.f:6863
subroutine pselget(scope, top, alpha, a, ia, ja, desca)
Definition pselget.f:2
subroutine pselset(a, ia, ja, desca, alpha)
Definition pselset.f:2
subroutine psgehrd(n, ilo, ihi, a, ia, ja, desca, tau, work, lwork, info)
Definition psgehrd.f:3
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pslacpy.f:3
subroutine pslamve(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb, dwork)
Definition pslamve.f:3
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition pslange.f:3
recursive subroutine pslaqr0(wantt, wantz, n, ilo, ihi, h, desch, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, liwork, info, reclevel)
Definition pslaqr0.f:4
recursive subroutine pslaqr1(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pslaqr1.f:5
subroutine pslarf(side, m, n, v, iv, jv, descv, incv, tau, c, ic, jc, descc, work)
Definition pslarf.f:3
subroutine pslarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pslarfg.f:3
subroutine psormhr(side, trans, m, n, ilo, ihi, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition psormhr.f:3
subroutine psrot(n, x, ix, jx, descx, incx, y, iy, jy, descy, incy, cs, sn, work, lwork, info)
Definition psrot.f:3
subroutine pstrord(compq, select, para, n, t, it, jt, desct, q, iq, jq, descq, wr, wi, m, work, lwork, iwork, liwork, info)
Definition pstrord.f:4
real function slamch(cmach)
Definition tools.f:867
Here is the call graph for this function:
Here is the caller graph for this function: