ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlaqr2.f
Go to the documentation of this file.
1  SUBROUTINE pdlaqr2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA,
2  $ ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT,
3  $ V, LDV, WR, WI, WORK, LWORK )
4 *
5 * Contribution from the Department of Computing Science and HPC2N,
6 * Umea University, Sweden
7 *
8 * -- ScaLAPACK routine (version 2.0.2) --
9 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10 * May 1 2012
11 *
12  IMPLICIT NONE
13 *
14 * .. Scalar Arguments ..
15  INTEGER IHIZ, ILOZ, KBOT, KTOP, LDT, LDV, LWORK, N, ND,
16  $ NS, NW
17  LOGICAL WANTT, WANTZ
18 * ..
19 * .. Array Arguments ..
20  INTEGER DESCA( * ), DESCZ( * )
21  DOUBLE PRECISION A( * ), SI( KBOT ), SR( KBOT ), T( LDT, * ),
22  $ v( ldv, * ), work( * ), wi( * ), wr( * ),
23  $ z( * )
24 * ..
25 *
26 * Purpose
27 * =======
28 *
29 * Aggressive early deflation:
30 *
31 * PDLAQR2 accepts as input an upper Hessenberg matrix A and performs an
32 * orthogonal similarity transformation designed to detect and deflate
33 * fully converged eigenvalues from a trailing principal submatrix. On
34 * output A has been overwritten by a new Hessenberg matrix that is a
35 * perturbation of an orthogonal similarity transformation of A. It is
36 * to be hoped that the final version of H has many zero subdiagonal
37 * entries.
38 *
39 * This routine handles small deflation windows which is affordable by
40 * one processor. Normally, it is called by PDLAQR1. All the inputs are
41 * assumed to be valid without checking.
42 *
43 * Notes
44 * =====
45 *
46 * Each global data object is described by an associated description
47 * vector. This vector stores the information required to establish
48 * the mapping between an object element and its corresponding process
49 * and memory location.
50 *
51 * Let A be a generic term for any 2D block cyclicly distributed array.
52 * Such a global array has an associated description vector DESCA.
53 * In the following comments, the character _ should be read as
54 * "of the global array".
55 *
56 * NOTATION STORED IN EXPLANATION
57 * --------------- -------------- --------------------------------------
58 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
59 * DTYPE_A = 1.
60 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
61 * the BLACS process grid A is distribu-
62 * ted over. The context itself is glo-
63 * bal, but the handle (the integer
64 * value) may vary.
65 * M_A (global) DESCA( M_ ) The number of rows in the global
66 * array A.
67 * N_A (global) DESCA( N_ ) The number of columns in the global
68 * array A.
69 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
70 * the rows of the array.
71 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
72 * the columns of the array.
73 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
74 * row of the array A is distributed.
75 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
76 * first column of the array A is
77 * distributed.
78 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
79 * array. LLD_A >= MAX(1,LOCr(M_A)).
80 *
81 * Let K be the number of rows or columns of a distributed matrix,
82 * and assume that its process grid has dimension p x q.
83 * LOCr( K ) denotes the number of elements of K that a process
84 * would receive if K were distributed over the p processes of its
85 * process column.
86 * Similarly, LOCc( K ) denotes the number of elements of K that a
87 * process would receive if K were distributed over the q processes of
88 * its process row.
89 * The values of LOCr() and LOCc() may be determined via a call to the
90 * ScaLAPACK tool function, NUMROC:
91 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
92 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
93 * An upper bound for these quantities may be computed by:
94 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
95 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
96 *
97 * Arguments
98 * =========
99 *
100 * WANTT (global input) LOGICAL
101 * If .TRUE., then the Hessenberg matrix H is fully updated
102 * so that the quasi-triangular Schur factor may be
103 * computed (in cooperation with the calling subroutine).
104 * If .FALSE., then only enough of H is updated to preserve
105 * the eigenvalues.
106 *
107 * WANTZ (global input) LOGICAL
108 * If .TRUE., then the orthogonal matrix Z is updated so
109 * so that the orthogonal Schur factor may be computed
110 * (in cooperation with the calling subroutine).
111 * If .FALSE., then Z is not referenced.
112 *
113 * N (global input) INTEGER
114 * The order of the matrix H and (if WANTZ is .TRUE.) the
115 * order of the orthogonal matrix Z.
116 *
117 * KTOP (global input) INTEGER
118 * KBOT (global input) INTEGER
119 * It is assumed without a check that either
120 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
121 * determine an isolated block along the diagonal of the
122 * Hessenberg matrix. However, H(KTOP,KTOP-1)=0 is not
123 * essentially necessary if WANTT is .TRUE. .
124 *
125 * NW (global input) INTEGER
126 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
127 * Normally NW .GE. 3 if PDLAQR2 is called by PDLAQR1.
128 *
129 * A (local input/output) DOUBLE PRECISION array, dimension
130 * (DESCH(LLD_),*)
131 * On input the initial N-by-N section of A stores the
132 * Hessenberg matrix undergoing aggressive early deflation.
133 * On output A has been transformed by an orthogonal
134 * similarity transformation, perturbed, and the returned
135 * to Hessenberg form that (it is to be hoped) has some
136 * zero subdiagonal entries.
137 *
138 * DESCA (global and local input) INTEGER array of dimension DLEN_.
139 * The array descriptor for the distributed matrix A.
140 *
141 * ILOZ (global input) INTEGER
142 * IHIZ (global input) INTEGER
143 * Specify the rows of Z to which transformations must be
144 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
145 *
146 * Z (input/output) DOUBLE PRECISION array, dimension
147 * (DESCH(LLD_),*)
148 * IF WANTZ is .TRUE., then on output, the orthogonal
149 * similarity transformation mentioned above has been
150 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
151 * If WANTZ is .FALSE., then Z is unreferenced.
152 *
153 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
154 * The array descriptor for the distributed matrix Z.
155 *
156 * NS (global output) INTEGER
157 * The number of unconverged (ie approximate) eigenvalues
158 * returned in SR and SI that may be used as shifts by the
159 * calling subroutine.
160 *
161 * ND (global output) INTEGER
162 * The number of converged eigenvalues uncovered by this
163 * subroutine.
164 *
165 * SR (global output) DOUBLE PRECISION array, dimension KBOT
166 * SI (global output) DOUBLE PRECISION array, dimension KBOT
167 * On output, the real and imaginary parts of approximate
168 * eigenvalues that may be used for shifts are stored in
169 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
170 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
171 * On proc #0, the real and imaginary parts of converged
172 * eigenvalues are stored in SR(KBOT-ND+1) through SR(KBOT) and
173 * SI(KBOT-ND+1) through SI(KBOT), respectively. On other
174 * processors, these entries are set to zero.
175 *
176 * T (local workspace) DOUBLE PRECISION array, dimension LDT*NW.
177 *
178 * LDT (local input) INTEGER
179 * The leading dimension of the array T.
180 * LDT >= NW.
181 *
182 * V (local workspace) DOUBLE PRECISION array, dimension LDV*NW.
183 *
184 * LDV (local input) INTEGER
185 * The leading dimension of the array V.
186 * LDV >= NW.
187 *
188 * WR (local workspace) DOUBLE PRECISION array, dimension KBOT.
189 * WI (local workspace) DOUBLE PRECISION array, dimension KBOT.
190 *
191 * WORK (local workspace) DOUBLE PRECISION array, dimension LWORK.
192 *
193 * LWORK (local input) INTEGER
194 * WORK(LWORK) is a local array and LWORK is assumed big enough
195 * so that LWORK >= NW*NW.
196 *
197 * ================================================================
198 * Implemented by
199 * Meiyue Shao, Department of Computing Science and HPC2N,
200 * Umea University, Sweden
201 *
202 * ================================================================
203 * References:
204 * B. Kagstrom, D. Kressner, and M. Shao,
205 * On Aggressive Early Deflation in Parallel Variants of the QR
206 * Algorithm.
207 * Para 2010, to appear.
208 *
209 * ================================================================
210 * .. Parameters ..
211  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
212  $ LLD_, MB_, M_, NB_, N_, RSRC_
213  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
214  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
215  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
216  DOUBLE PRECISION ZERO, ONE
217  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0 )
218 * ..
219 * .. Local Scalars ..
220  INTEGER CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
221  $ ICOL2, INFO, II, IROW, IROW1, IROW2, ITMP1,
222  $ itmp2, j, jafirst, jj, k, l, lda, ldz, lldtmp,
223  $ mycol, myrow, node, npcol, nprow, dblk,
224  $ hstep, vstep, kkrow, kkcol, kln, ltop, left,
225  $ right, up, down, d1, d2
226 * ..
227 * .. Local Arrays ..
228  INTEGER DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
229  $ DESCWV( 9 )
230 * ..
231 * .. External Functions ..
232  INTEGER NUMROC
233  EXTERNAL NUMROC
234 * ..
235 * .. External Subroutines ..
236  EXTERNAL blacs_gridinfo, infog2l, dlaset,
237  $ dlaqr3, descinit, pdgemm, pdgemr2d, dgemm,
238  $ dlamov, dgesd2d, dgerv2d, dgebs2d, dgebr2d,
239  $ igebs2d, igebr2d
240 * ..
241 * .. Intrinsic Functions ..
242  INTRINSIC max, min, mod
243 * ..
244 * .. Executable Statements ..
245 *
246  info = 0
247 *
248  IF( n.EQ.0 )
249  $ RETURN
250 *
251 * NODE (IAFIRST,JAFIRST) OWNS A(1,1)
252 *
253  hbl = desca( mb_ )
254  contxt = desca( ctxt_ )
255  lda = desca( lld_ )
256  iafirst = desca( rsrc_ )
257  jafirst = desca( csrc_ )
258  ldz = descz( lld_ )
259  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
260  node = myrow*npcol + mycol
261  left = mod( mycol+npcol-1, npcol )
262  right = mod( mycol+1, npcol )
263  up = mod( myrow+nprow-1, nprow )
264  down = mod( myrow+1, nprow )
265 *
266 * I1 and I2 are the indices of the first row and last column of A
267 * to which transformations must be applied.
268 *
269  i = kbot
270  l = ktop
271  IF( wantt ) THEN
272  i1 = 1
273  i2 = n
274  ltop = 1
275  ELSE
276  i1 = l
277  i2 = i
278  ltop = l
279  END IF
280 *
281 * Begin Aggressive Early Deflation.
282 *
283  dblk = nw
284  CALL infog2l( i-dblk+1, i-dblk+1, desca, nprow, npcol, myrow,
285  $ mycol, irow, icol, ii, jj )
286  IF ( myrow .EQ. ii ) THEN
287  CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,
288  $ ldt, info )
289  CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,
290  $ ldv, info )
291  ELSE
292  CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,
293  $ 1, info )
294  CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,
295  $ 1, info )
296  END IF
297  CALL pdgemr2d( dblk, dblk, a, i-dblk+1, i-dblk+1, desca, t, 1, 1,
298  $ desct, contxt )
299  IF ( myrow .EQ. ii .AND. mycol .EQ. jj ) THEN
300  CALL dlaset( 'All', dblk, dblk, zero, one, v, ldv )
301  CALL dlaqr3( .true., .true., dblk, 1, dblk, dblk-1, t, ldt, 1,
302  $ dblk, v, ldv, ns, nd, wr, wi, work, dblk, dblk,
303  $ work( dblk*dblk+1 ), dblk, dblk, work( 2*dblk*dblk+1 ),
304  $ dblk, work( 3*dblk*dblk+1 ), lwork-3*dblk*dblk )
305  CALL dgebs2d( contxt, 'All', ' ', dblk, dblk, v, ldv )
306  CALL igebs2d( contxt, 'All', ' ', 1, 1, nd, 1 )
307  ELSE
308  CALL dgebr2d( contxt, 'All', ' ', dblk, dblk, v, ldv, ii, jj )
309  CALL igebr2d( contxt, 'All', ' ', 1, 1, nd, 1, ii, jj )
310  END IF
311 *
312  IF( nd .GT. 0 ) THEN
313 *
314 * Copy the local matrix back to the diagonal block.
315 *
316  CALL pdgemr2d( dblk, dblk, t, 1, 1, desct, a, i-dblk+1,
317  $ i-dblk+1, desca, contxt )
318 *
319 * Update T and Z.
320 *
321  IF( mod( i-dblk, hbl )+dblk .LE. hbl ) THEN
322 *
323 * Simplest case: the deflation window is located on one
324 * processor.
325 * Call DGEMM directly to perform the update.
326 *
327  hstep = lwork / dblk
328  vstep = hstep
329 *
330 * Update horizontal slab in A.
331 *
332  IF( wantt ) THEN
333  CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,
334  $ mycol, irow, icol, ii, jj )
335  IF( myrow .EQ. ii ) THEN
336  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
337  DO 10 kkcol = icol, icol1, hstep
338  kln = min( hstep, icol1-kkcol+1 )
339  CALL dgemm( 'T', 'N', dblk, kln, dblk, one, v,
340  $ ldv, a( irow+(kkcol-1)*lda ), lda, zero, work,
341  $ dblk )
342  CALL dlamov( 'A', dblk, kln, work, dblk,
343  $ a( irow+(kkcol-1)*lda ), lda )
344  10 CONTINUE
345  END IF
346  END IF
347 *
348 * Update vertical slab in A.
349 *
350  CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,
351  $ mycol, irow, icol, ii, jj )
352  IF( mycol .EQ. jj ) THEN
353  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
354  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
355  IF( myrow .NE. itmp1 ) irow1 = irow1-1
356  DO 20 kkrow = irow, irow1, vstep
357  kln = min( vstep, irow1-kkrow+1 )
358  CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
359  $ a( kkrow+(icol-1)*lda ), lda, v, ldv, zero, work,
360  $ kln )
361  CALL dlamov( 'A', kln, dblk, work, kln,
362  $ a( kkrow+(icol-1)*lda ), lda )
363  20 CONTINUE
364  END IF
365 *
366 * Update vertical slab in Z.
367 *
368  IF( wantz ) THEN
369  CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,
370  $ mycol, irow, icol, ii, jj )
371  IF( mycol .EQ. jj ) THEN
372  CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
373  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
374  IF( myrow .NE. itmp1 ) irow1 = irow1-1
375  DO 30 kkrow = irow, irow1, vstep
376  kln = min( vstep, irow1-kkrow+1 )
377  CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
378  $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
379  $ work, kln )
380  CALL dlamov( 'A', kln, dblk, work, kln,
381  $ z( kkrow+(icol-1)*ldz ), ldz )
382  30 CONTINUE
383  END IF
384  END IF
385 *
386  ELSE IF( mod( i-dblk, hbl )+dblk .LE. 2*hbl ) THEN
387 *
388 * More complicated case: the deflation window lay on a 2x2
389 * processor mesh.
390 * Call DGEMM locally and communicate by pair.
391 *
392  d1 = hbl - mod( i-dblk, hbl )
393  d2 = dblk - d1
394  hstep = lwork / dblk
395  vstep = hstep
396 *
397 * Update horizontal slab in A.
398 *
399  IF( wantt ) THEN
400  CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,
401  $ mycol, irow, icol, ii, jj )
402  IF( myrow .EQ. up ) THEN
403  IF( myrow .EQ. ii ) THEN
404  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
405  DO 40 kkcol = icol, icol1, hstep
406  kln = min( hstep, icol1-kkcol+1 )
407  CALL dgemm( 'T', 'N', dblk, kln, dblk, one, v,
408  $ dblk, a( irow+(kkcol-1)*lda ), lda, zero,
409  $ work, dblk )
410  CALL dlamov( 'A', dblk, kln, work, dblk,
411  $ a( irow+(kkcol-1)*lda ), lda )
412  40 CONTINUE
413  END IF
414  ELSE
415  IF( myrow .EQ. ii ) THEN
416  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
417  DO 50 kkcol = icol, icol1, hstep
418  kln = min( hstep, icol1-kkcol+1 )
419  CALL dgemm( 'T', 'N', d2, kln, d1, one,
420  $ v( 1, d1+1 ), ldv, a( irow+(kkcol-1)*lda ),
421  $ lda, zero, work( d1+1 ), dblk )
422  CALL dgesd2d( contxt, d2, kln, work( d1+1 ),
423  $ dblk, down, mycol )
424  CALL dgerv2d( contxt, d1, kln, work, dblk, down,
425  $ mycol )
426  CALL dgemm( 'T', 'N', d1, kln, d1, one,
427  $ v, ldv, a( irow+(kkcol-1)*lda ), lda, one,
428  $ work, dblk )
429  CALL dlamov( 'A', d1, kln, work, dblk,
430  $ a( irow+(kkcol-1)*lda ), lda )
431  50 CONTINUE
432  ELSE IF( up .EQ. ii ) THEN
433  icol1 = numroc( n, hbl, mycol, jafirst, npcol )
434  DO 60 kkcol = icol, icol1, hstep
435  kln = min( hstep, icol1-kkcol+1 )
436  CALL dgemm( 'T', 'N', d1, kln, d2, one,
437  $ v( d1+1, 1 ), ldv, a( irow+(kkcol-1)*lda ),
438  $ lda, zero, work, dblk )
439  CALL dgesd2d( contxt, d1, kln, work, dblk, up,
440  $ mycol )
441  CALL dgerv2d( contxt, d2, kln, work( d1+1 ),
442  $ dblk, up, mycol )
443  CALL dgemm( 'T', 'N', d2, kln, d2, one,
444  $ v( d1+1, d1+1 ), ldv,
445  $ a( irow+(kkcol-1)*lda ), lda, one,
446  $ work( d1+1 ), dblk )
447  CALL dlamov( 'A', d2, kln, work( d1+1 ), dblk,
448  $ a( irow+(kkcol-1)*lda ), lda )
449  60 CONTINUE
450  END IF
451  END IF
452  END IF
453 *
454 * Update vertical slab in A.
455 *
456  CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,
457  $ mycol, irow, icol, ii, jj )
458  IF( mycol .EQ. left ) THEN
459  IF( mycol .EQ. jj ) THEN
460  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
461  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
462  IF( myrow .NE. itmp1 ) irow1 = irow1-1
463  DO 70 kkrow = irow, irow1, vstep
464  kln = min( vstep, irow1-kkrow+1 )
465  CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
466  $ a( kkrow+(icol-1)*lda ), lda, v, ldv, zero,
467  $ work, kln )
468  CALL dlamov( 'A', kln, dblk, work, kln,
469  $ a( kkrow+(icol-1)*lda ), lda )
470  70 CONTINUE
471  END IF
472  ELSE
473  IF( mycol .EQ. jj ) THEN
474  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
475  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
476  IF( myrow .NE. itmp1 ) irow1 = irow1-1
477  DO 80 kkrow = irow, irow1, vstep
478  kln = min( vstep, irow1-kkrow+1 )
479  CALL dgemm( 'N', 'N', kln, d2, d1, one,
480  $ a( kkrow+(icol-1)*lda ), lda,
481  $ v( 1, d1+1 ), ldv, zero, work( 1+d1*kln ),
482  $ kln )
483  CALL dgesd2d( contxt, kln, d2, work( 1+d1*kln ),
484  $ kln, myrow, right )
485  CALL dgerv2d( contxt, kln, d1, work, kln, myrow,
486  $ right )
487  CALL dgemm( 'N', 'N', kln, d1, d1, one,
488  $ a( kkrow+(icol-1)*lda ), lda, v, ldv, one,
489  $ work, kln )
490  CALL dlamov( 'A', kln, d1, work, kln,
491  $ a( kkrow+(icol-1)*lda ), lda )
492  80 CONTINUE
493  ELSE IF ( left .EQ. jj ) THEN
494  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,
495  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
496  IF( myrow .NE. itmp1 ) irow1 = irow1-1
497  DO 90 kkrow = irow, irow1, vstep
498  kln = min( vstep, irow1-kkrow+1 )
499  CALL dgemm( 'N', 'N', kln, d1, d2, one,
500  $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, 1 ),
501  $ ldv, zero, work, kln )
502  CALL dgesd2d( contxt, kln, d1, work, kln, myrow,
503  $ left )
504  CALL dgerv2d( contxt, kln, d2, work( 1+d1*kln ),
505  $ kln, myrow, left )
506  CALL dgemm( 'N', 'N', kln, d2, d2, one,
507  $ a( kkrow+(icol-1)*lda ), lda, v( d1+1, d1+1 ),
508  $ ldv, one, work( 1+d1*kln ), kln )
509  CALL dlamov( 'A', kln, d2, work( 1+d1*kln ), kln,
510  $ a( kkrow+(icol-1)*lda ), lda )
511  90 CONTINUE
512  END IF
513  END IF
514 *
515 * Update vertical slab in Z.
516 *
517  IF( wantz ) THEN
518  CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,
519  $ mycol, irow, icol, ii, jj )
520  IF( mycol .EQ. left ) THEN
521  IF( mycol .EQ. jj ) THEN
522  CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
523  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
524  IF( myrow .NE. itmp1 ) irow1 = irow1-1
525  DO 100 kkrow = irow, irow1, vstep
526  kln = min( vstep, irow1-kkrow+1 )
527  CALL dgemm( 'N', 'N', kln, dblk, dblk, one,
528  $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,
529  $ work, kln )
530  CALL dlamov( 'A', kln, dblk, work, kln,
531  $ z( kkrow+(icol-1)*ldz ), ldz )
532  100 CONTINUE
533  END IF
534  ELSE
535  IF( mycol .EQ. jj ) THEN
536  CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
537  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
538  IF( myrow .NE. itmp1 ) irow1 = irow1-1
539  DO 110 kkrow = irow, irow1, vstep
540  kln = min( vstep, irow1-kkrow+1 )
541  CALL dgemm( 'N', 'N', kln, d2, d1, one,
542  $ z( kkrow+(icol-1)*ldz ), ldz,
543  $ v( 1, d1+1 ), ldv, zero, work( 1+d1*kln ),
544  $ kln )
545  CALL dgesd2d( contxt, kln, d2, work( 1+d1*kln ),
546  $ kln, myrow, right )
547  CALL dgerv2d( contxt, kln, d1, work, kln, myrow,
548  $ right )
549  CALL dgemm( 'N', 'N', kln, d1, d1, one,
550  $ z( kkrow+(icol-1)*ldz ), ldz, v, ldv, one,
551  $ work, kln )
552  CALL dlamov( 'A', kln, d1, work, kln,
553  $ z( kkrow+(icol-1)*ldz ), ldz )
554  110 CONTINUE
555  ELSE IF( left .EQ. jj ) THEN
556  CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,
557  $ myrow, mycol, irow1, icol1, itmp1, itmp2 )
558  IF( myrow .NE. itmp1 ) irow1 = irow1-1
559  DO 120 kkrow = irow, irow1, vstep
560  kln = min( vstep, irow1-kkrow+1 )
561  CALL dgemm( 'N', 'N', kln, d1, d2, one,
562  $ z( kkrow+(icol-1)*ldz ), ldz,
563  $ v( d1+1, 1 ), ldv, zero, work, kln )
564  CALL dgesd2d( contxt, kln, d1, work, kln, myrow,
565  $ left )
566  CALL dgerv2d( contxt, kln, d2, work( 1+d1*kln ),
567  $ kln, myrow, left )
568  CALL dgemm( 'N', 'N', kln, d2, d2, one,
569  $ z( kkrow+(icol-1)*ldz ), ldz,
570  $ v( d1+1, d1+1 ), ldv, one,
571  $ work( 1+d1*kln ), kln )
572  CALL dlamov( 'A', kln, d2, work( 1+d1*kln ),
573  $ kln, z( kkrow+(icol-1)*ldz ), ldz )
574  120 CONTINUE
575  END IF
576  END IF
577  END IF
578 *
579  ELSE
580 *
581 * Most complicated case: the deflation window lay across the
582 * border of the processor mesh.
583 * Treat V as a distributed matrix and call PDGEMM.
584 *
585  hstep = lwork / dblk * npcol
586  vstep = lwork / dblk * nprow
587  lldtmp = numroc( dblk, dblk, myrow, 0, nprow )
588  lldtmp = max( 1, lldtmp )
589  CALL descinit( descv, dblk, dblk, dblk, dblk, 0, 0, contxt,
590  $ lldtmp, info )
591  CALL descinit( descwh, dblk, hstep, dblk, lwork / dblk, 0,
592  $ 0, contxt, lldtmp, info )
593 *
594 * Update horizontal slab in A.
595 *
596  IF( wantt ) THEN
597  DO 130 kkcol = i+1, n, hstep
598  kln = min( hstep, n-kkcol+1 )
599  CALL pdgemm( 'T', 'N', dblk, kln, dblk, one, v, 1, 1,
600  $ descv, a, i-dblk+1, kkcol, desca, zero, work, 1,
601  $ 1, descwh )
602  CALL pdgemr2d( dblk, kln, work, 1, 1, descwh, a,
603  $ i-dblk+1, kkcol, desca, contxt )
604  130 CONTINUE
605  END IF
606 *
607 * Update vertical slab in A.
608 *
609  DO 140 kkrow = ltop, i-dblk, vstep
610  kln = min( vstep, i-dblk-kkrow+1 )
611  lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )
612  lldtmp = max( 1, lldtmp )
613  CALL descinit( descwv, kln, dblk, lwork / dblk, dblk, 0,
614  $ 0, contxt, lldtmp, info )
615  CALL pdgemm( 'N', 'N', kln, dblk, dblk, one, a, kkrow,
616  $ i-dblk+1, desca, v, 1, 1, descv, zero, work, 1, 1,
617  $ descwv )
618  CALL pdgemr2d( kln, dblk, work, 1, 1, descwv, a, kkrow,
619  $ i-dblk+1, desca, contxt )
620  140 CONTINUE
621 *
622 * Update vertical slab in Z.
623 *
624  IF( wantz ) THEN
625  DO 150 kkrow = iloz, ihiz, vstep
626  kln = min( vstep, ihiz-kkrow+1 )
627  lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )
628  lldtmp = max( 1, lldtmp )
629  CALL descinit( descwv, kln, dblk, lwork / dblk, dblk,
630  $ 0, 0, contxt, lldtmp, info )
631  CALL pdgemm( 'N', 'N', kln, dblk, dblk, one, z, kkrow,
632  $ i-dblk+1, descz, v, 1, 1, descv, zero, work, 1,
633  $ 1, descwv )
634  CALL pdgemr2d( kln, dblk, work, 1, 1, descwv, z,
635  $ kkrow, i-dblk+1, descz, contxt )
636  150 CONTINUE
637  END IF
638  END IF
639 *
640 * Extract converged eigenvalues.
641 *
642  ii = 0
643  160 CONTINUE
644  IF( ii .EQ. nd-1 .OR. wi( dblk-ii ) .EQ. zero ) THEN
645  IF( node .EQ. 0 ) THEN
646  sr( i-ii ) = wr( dblk-ii )
647  ELSE
648  sr( i-ii ) = zero
649  END IF
650  si( i-ii ) = zero
651  ii = ii + 1
652  ELSE
653  IF( node .EQ. 0 ) THEN
654  sr( i-ii-1 ) = wr( dblk-ii-1 )
655  sr( i-ii ) = wr( dblk-ii )
656  si( i-ii-1 ) = wi( dblk-ii-1 )
657  si( i-ii ) = wi( dblk-ii )
658  ELSE
659  sr( i-ii-1 ) = zero
660  sr( i-ii ) = zero
661  si( i-ii-1 ) = zero
662  si( i-ii ) = zero
663  END IF
664  ii = ii + 2
665  END IF
666  IF( ii .LT. nd ) GOTO 160
667  END IF
668 *
669 * END OF PDLAQR2
670 *
671  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
pdlaqr2
subroutine pdlaqr2(WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT, V, LDV, WR, WI, WORK, LWORK)
Definition: pdlaqr2.f:4
min
#define min(A, B)
Definition: pcgemr.c:181