ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzlarz.f
Go to the documentation of this file.
1  SUBROUTINE pzlarz( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
2  $ IC, JC, DESCC, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 25, 2001
8 *
9 * .. Scalar Arguments ..
10  CHARACTER SIDE
11  INTEGER IC, INCV, IV, JC, JV, L, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCC( * ), DESCV( * )
15  COMPLEX*16 C( * ), TAU( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PZLARZ applies a complex elementary reflector Q to a complex M-by-N
22 * distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the
23 * left or the right. Q is represented in the form
24 *
25 * Q = I - tau * v * v'
26 *
27 * where tau is a complex scalar and v is a complex vector.
28 *
29 * If tau = 0, then Q is taken to be the unit matrix.
30 *
31 * Q is a product of k elementary reflectors as returned by PZTZRZF.
32 *
33 * Notes
34 * =====
35 *
36 * Each global data object is described by an associated description
37 * vector. This vector stores the information required to establish
38 * the mapping between an object element and its corresponding process
39 * and memory location.
40 *
41 * Let A be a generic term for any 2D block cyclicly distributed array.
42 * Such a global array has an associated description vector DESCA.
43 * In the following comments, the character _ should be read as
44 * "of the global array".
45 *
46 * NOTATION STORED IN EXPLANATION
47 * --------------- -------------- --------------------------------------
48 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49 * DTYPE_A = 1.
50 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51 * the BLACS process grid A is distribu-
52 * ted over. The context itself is glo-
53 * bal, but the handle (the integer
54 * value) may vary.
55 * M_A (global) DESCA( M_ ) The number of rows in the global
56 * array A.
57 * N_A (global) DESCA( N_ ) The number of columns in the global
58 * array A.
59 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60 * the rows of the array.
61 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62 * the columns of the array.
63 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64 * row of the array A is distributed.
65 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66 * first column of the array A is
67 * distributed.
68 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69 * array. LLD_A >= MAX(1,LOCr(M_A)).
70 *
71 * Let K be the number of rows or columns of a distributed matrix,
72 * and assume that its process grid has dimension p x q.
73 * LOCr( K ) denotes the number of elements of K that a process
74 * would receive if K were distributed over the p processes of its
75 * process column.
76 * Similarly, LOCc( K ) denotes the number of elements of K that a
77 * process would receive if K were distributed over the q processes of
78 * its process row.
79 * The values of LOCr() and LOCc() may be determined via a call to the
80 * ScaLAPACK tool function, NUMROC:
81 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83 * An upper bound for these quantities may be computed by:
84 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86 *
87 * Because vectors may be viewed as a subclass of matrices, a
88 * distributed vector is considered to be a distributed matrix.
89 *
90 * Restrictions
91 * ============
92 *
93 * If SIDE = 'Left' and INCV = 1, then the row process having the first
94 * entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
95 * MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
96 * the last equality must be satisfied.
97 *
98 * If SIDE = 'Right' and INCV = M_V then the column process having the
99 * first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
100 * MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
101 * the last equality must be satisfied.
102 *
103 * Arguments
104 * =========
105 *
106 * SIDE (global input) CHARACTER
107 * = 'L': form Q * sub( C ),
108 * = 'R': form sub( C ) * Q.
109 *
110 * M (global input) INTEGER
111 * The number of rows to be operated on i.e the number of rows
112 * of the distributed submatrix sub( C ). M >= 0.
113 *
114 * N (global input) INTEGER
115 * The number of columns to be operated on i.e the number of
116 * columns of the distributed submatrix sub( C ). N >= 0.
117 *
118 * L (global input) INTEGER
119 * The columns of the distributed submatrix sub( A ) containing
120 * the meaningful part of the Householder reflectors.
121 * If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
122 *
123 * V (local input) COMPLEX*16 pointer into the local memory
124 * to an array of dimension (LLD_V,*) containing the local
125 * pieces of the distributed vectors V representing the
126 * Householder transformation Q,
127 * V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
128 * V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
129 * V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
130 * V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
131 *
132 * The vector v in the representation of Q. V is not used if
133 * TAU = 0.
134 *
135 * IV (global input) INTEGER
136 * The row index in the global array V indicating the first
137 * row of sub( V ).
138 *
139 * JV (global input) INTEGER
140 * The column index in the global array V indicating the
141 * first column of sub( V ).
142 *
143 * DESCV (global and local input) INTEGER array of dimension DLEN_.
144 * The array descriptor for the distributed matrix V.
145 *
146 * INCV (global input) INTEGER
147 * The global increment for the elements of V. Only two values
148 * of INCV are supported in this version, namely 1 and M_V.
149 * INCV must not be zero.
150 *
151 * TAU (local input) COMPLEX*16, array, dimension LOCc(JV) if
152 * INCV = 1, and LOCr(IV) otherwise. This array contains the
153 * Householder scalars related to the Householder vectors.
154 * TAU is tied to the distributed matrix V.
155 *
156 * C (local input/local output) COMPLEX*16 pointer into the
157 * local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
158 * containing the local pieces of sub( C ). On exit, sub( C )
159 * is overwritten by the Q * sub( C ) if SIDE = 'L', or
160 * sub( C ) * Q if SIDE = 'R'.
161 *
162 * IC (global input) INTEGER
163 * The row index in the global array C indicating the first
164 * row of sub( C ).
165 *
166 * JC (global input) INTEGER
167 * The column index in the global array C indicating the
168 * first column of sub( C ).
169 *
170 * DESCC (global and local input) INTEGER array of dimension DLEN_.
171 * The array descriptor for the distributed matrix C.
172 *
173 * WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
174 * If INCV = 1,
175 * if SIDE = 'L',
176 * if IVCOL = ICCOL,
177 * LWORK >= NqC0
178 * else
179 * LWORK >= MpC0 + MAX( 1, NqC0 )
180 * end if
181 * else if SIDE = 'R',
182 * LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
183 * N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
184 * end if
185 * else if INCV = M_V,
186 * if SIDE = 'L',
187 * LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
188 * M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
189 * else if SIDE = 'R',
190 * if IVROW = ICROW,
191 * LWORK >= MpC0
192 * else
193 * LWORK >= NqC0 + MAX( 1, MpC0 )
194 * end if
195 * end if
196 * end if
197 *
198 * where LCM is the least common multiple of NPROW and NPCOL and
199 * LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
200 * LCMQ = LCM / NPCOL,
201 *
202 * IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
203 * ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
204 * ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
205 * MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
206 * NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
207 *
208 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
209 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
210 * the subroutine BLACS_GRIDINFO.
211 *
212 * Alignment requirements
213 * ======================
214 *
215 * The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
216 * must verify some alignment properties, namely the following
217 * expressions should be true:
218 *
219 * MB_V = NB_V,
220 *
221 * If INCV = 1,
222 * If SIDE = 'Left',
223 * ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
224 * If SIDE = 'Right',
225 * ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
226 * else if INCV = M_V,
227 * If SIDE = 'Left',
228 * ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
229 * If SIDE = 'Right',
230 * ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
231 * end if
232 *
233 * =====================================================================
234 *
235 * .. Parameters ..
236  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
237  $ lld_, mb_, m_, nb_, n_, rsrc_
238  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
239  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
240  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
241  COMPLEX*16 ONE, ZERO
242  parameter( one = ( 1.0d+0, 0.0d+0 ),
243  $ zero = ( 0.0d+0, 0.0d+0 ) )
244 * ..
245 * .. Local Scalars ..
246  LOGICAL CCBLCK, CRBLCK, LEFT
247  CHARACTER COLBTOP, ROWBTOP
248  INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
249  $ icrow1, icrow2, ictxt, iic1, iic2, iiv, ioffc1,
250  $ ioffc2, ioffv, ipw, iroffc1, iroffc2, iroffv,
251  $ ivcol, ivrow, jjc1, jjc2, jjv, ldc, ldv, mpc2,
252  $ mpv, mycol, myrow, ncc, ncv, npcol, nprow,
253  $ nqc2, nqv, rdest
254  COMPLEX*16 TAULOC
255 * ..
256 * .. External Subroutines ..
257  EXTERNAL blacs_gridinfo, infog2l, pb_topget, pbztrnv,
258  $ zaxpy, zcopy, zgebr2d, zgebs2d,
259  $ zgemv, zgerc, zgerv2d, zgesd2d,
260  $ zgsum2d, zlaset
261 * ..
262 * .. External Functions ..
263  LOGICAL LSAME
264  INTEGER NUMROC
265  EXTERNAL lsame, numroc
266 * ..
267 * .. Intrinsic Functions ..
268  INTRINSIC min, mod
269 * ..
270 * .. Executable Statements ..
271 *
272 * Quick return if possible
273 *
274  IF( m.LE.0 .OR. n.LE.0 )
275  $ RETURN
276 *
277 * Get grid parameters.
278 *
279  ictxt = descc( ctxt_ )
280  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
281 *
282 * Figure local indexes
283 *
284  left = lsame( side, 'L' )
285  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
286  $ ivrow, ivcol )
287  iroffv = mod( iv-1, descv( nb_ ) )
288  mpv = numroc( l+iroffv, descv( mb_ ), myrow, ivrow, nprow )
289  IF( myrow.EQ.ivrow )
290  $ mpv = mpv - iroffv
291  icoffv = mod( jv-1, descv( nb_ ) )
292  nqv = numroc( l+icoffv, descv( nb_ ), mycol, ivcol, npcol )
293  IF( mycol.EQ.ivcol )
294  $ nqv = nqv - icoffv
295  ldv = descv( lld_ )
296  ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),
297  $ npcol )
298  ldv = descv( lld_ )
299  iiv = min( iiv, ldv )
300  jjv = min( jjv, ncv )
301  ioffv = iiv+(jjv-1)*ldv
302  ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),
303  $ npcol )
304  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
305  $ iic1, jjc1, icrow1, iccol1 )
306  iroffc1 = mod( ic-1, descc( mb_ ) )
307  icoffc1 = mod( jc-1, descc( nb_ ) )
308  ldc = descc( lld_ )
309  iic1 = min( iic1, ldc )
310  jjc1 = min( jjc1, max( 1, ncc ) )
311  ioffc1 = iic1 + ( jjc1-1 ) * ldc
312 *
313  IF( left ) THEN
314  CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
315  $ iic2, jjc2, icrow2, iccol2 )
316  iroffc2 = mod( ic+m-l-1, descc( mb_ ) )
317  icoffc2 = mod( jc-1, descc( nb_ ) )
318  nqc2 = numroc( n+icoffc2, descc( nb_ ), mycol, iccol2, npcol )
319  IF( mycol.EQ.iccol2 )
320  $ nqc2 = nqc2 - icoffc2
321  ELSE
322  CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
323  $ iic2, jjc2, icrow2, iccol2 )
324  iroffc2 = mod( ic-1, descc( mb_ ) )
325  mpc2 = numroc( m+iroffc2, descc( mb_ ), myrow, icrow2, nprow )
326  IF( myrow.EQ.icrow2 )
327  $ mpc2 = mpc2 - iroffc2
328  icoffc2 = mod( jc+n-l-1, descc( nb_ ) )
329  END IF
330  iic2 = min( iic2, ldc )
331  jjc2 = min( jjc2, ncc )
332  ioffc2 = iic2 + ( jjc2-1 ) * ldc
333 *
334 * Is sub( C ) only distributed over a process row ?
335 *
336  crblck = ( m.LE.(descc( mb_ )-iroffc1) )
337 *
338 * Is sub( C ) only distributed over a process column ?
339 *
340  ccblck = ( n.LE.(descc( nb_ )-icoffc1) )
341 *
342  IF( left ) THEN
343 *
344  IF( crblck ) THEN
345  rdest = icrow2
346  ELSE
347  rdest = -1
348  END IF
349 *
350  IF( ccblck ) THEN
351 *
352 * sub( C ) is distributed over a process column
353 *
354  IF( descv( m_ ).EQ.incv ) THEN
355 *
356 * Transpose row vector V (ICOFFV = IROFFC2)
357 *
358  ipw = mpv+1
359  CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,
360  $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
361  $ zero,
362  $ work, 1, ivrow, ivcol, icrow2, iccol2,
363  $ work( ipw ) )
364 *
365 * Perform the local computation within a process column
366 *
367  IF( mycol.EQ.iccol2 ) THEN
368 *
369  IF( myrow.EQ.ivrow ) THEN
370 *
371  CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
372  $ tau( iiv ), 1 )
373  tauloc = tau( iiv )
374 *
375  ELSE
376 *
377  CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1,
378  $ tauloc, 1, ivrow, mycol )
379 *
380  END IF
381 *
382  IF( tauloc.NE.zero ) THEN
383 *
384 * w := sub( C )' * v
385 *
386  IF( mpv.GT.0 ) THEN
387  CALL zgemv( 'Conjugate transpose', mpv, nqc2,
388  $ one, c( ioffc2 ), ldc, work, 1,
389  $ zero, work( ipw ), 1 )
390  ELSE
391  CALL zlaset( 'All', nqc2, 1, zero, zero,
392  $ work( ipw ), max( 1, nqc2 ) )
393  END IF
394  IF( myrow.EQ.icrow1 )
395  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
396  $ work( ipw ), max( 1, nqc2 ) )
397 *
398  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
399  $ work( ipw ), max( 1, nqc2 ), rdest,
400  $ mycol )
401 *
402 * sub( C ) := sub( C ) - v * w'
403 *
404  IF( myrow.EQ.icrow1 )
405  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
406  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
407  CALL zgerc( mpv, nqc2, -tauloc, work, 1,
408  $ work( ipw ), 1, c( ioffc2 ), ldc )
409  END IF
410 *
411  END IF
412 *
413  ELSE
414 *
415 * V is a column vector
416 *
417  IF( ivcol.EQ.iccol2 ) THEN
418 *
419 * Perform the local computation within a process column
420 *
421  IF( mycol.EQ.iccol2 ) THEN
422 *
423  tauloc = tau( jjv )
424 *
425  IF( tauloc.NE.zero ) THEN
426 *
427 * w := sub( C )' * v
428 *
429  IF( mpv.GT.0 ) THEN
430  CALL zgemv( 'Conjugate transpose', mpv, nqc2,
431  $ one, c( ioffc2 ), ldc, v( ioffv ),
432  $ 1, zero, work, 1 )
433  ELSE
434  CALL zlaset( 'All', nqc2, 1, zero, zero,
435  $ work, max( 1, nqc2 ) )
436  END IF
437  IF( myrow.EQ.icrow1 )
438  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
439  $ work, max( 1, nqc2 ) )
440 *
441  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
442  $ work, max( 1, nqc2 ), rdest,
443  $ mycol )
444 *
445 * sub( C ) := sub( C ) - v * w'
446 *
447  IF( myrow.EQ.icrow1 )
448  $ CALL zaxpy( nqc2, -tauloc, work,
449  $ max( 1, nqc2 ), c( ioffc1 ),
450  $ ldc )
451  CALL zgerc( mpv, nqc2, -tauloc, v( ioffv ), 1,
452  $ work, 1, c( ioffc2 ), ldc )
453  END IF
454 *
455  END IF
456 *
457  ELSE
458 *
459 * Send V and TAU to the process column ICCOL2
460 *
461  IF( mycol.EQ.ivcol ) THEN
462 *
463  ipw = mpv+1
464  CALL zcopy( mpv, v( ioffv ), 1, work, 1 )
465  work( ipw ) = tau( jjv )
466  CALL zgesd2d( ictxt, ipw, 1, work, ipw, myrow,
467  $ iccol2 )
468 *
469  ELSE IF( mycol.EQ.iccol2 ) THEN
470 *
471  ipw = mpv+1
472  CALL zgerv2d( ictxt, ipw, 1, work, ipw, myrow,
473  $ ivcol )
474  tauloc = work( ipw )
475 *
476  IF( tauloc.NE.zero ) THEN
477 *
478 * w := sub( C )' * v
479 *
480  IF( mpv.GT.0 ) THEN
481  CALL zgemv( 'Conjugate transpose', mpv, nqc2,
482  $ one, c( ioffc2 ), ldc, work, 1,
483  $ zero, work( ipw ), 1 )
484  ELSE
485  CALL zlaset( 'All', nqc2, 1, zero, zero,
486  $ work( ipw ), max( 1, nqc2 ) )
487  END IF
488  IF( myrow.EQ.icrow1 )
489  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
490  $ work( ipw ), max( 1, nqc2 ) )
491 *
492  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
493  $ work( ipw ), max( 1, nqc2 ),
494  $ rdest, mycol )
495 *
496 * sub( C ) := sub( C ) - v * w'
497 *
498  IF( myrow.EQ.icrow1 )
499  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
500  $ max( 1, nqc2 ), c( ioffc1 ),
501  $ ldc )
502  CALL zgerc( mpv, nqc2, -tauloc, work, 1,
503  $ work( ipw ), 1, c( ioffc2 ), ldc )
504  END IF
505 *
506  END IF
507 *
508  END IF
509 *
510  END IF
511 *
512  ELSE
513 *
514 * sub( C ) is a proper distributed matrix
515 *
516  IF( descv( m_ ).EQ.incv ) THEN
517 *
518 * Transpose and broadcast row vector V (ICOFFV=IROFFC2)
519 *
520  ipw = mpv+1
521  CALL pbztrnv( ictxt, 'Rowwise', 'Transpose', m,
522  $ descv( nb_ ), iroffc2, v( ioffv ), ldv,
523  $ zero,
524  $ work, 1, ivrow, ivcol, icrow2, -1,
525  $ work( ipw ) )
526 *
527 * Perform the local computation within a process column
528 *
529  IF( myrow.EQ.ivrow ) THEN
530 *
531  CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1,
532  $ tau( iiv ), 1 )
533  tauloc = tau( iiv )
534 *
535  ELSE
536 *
537  CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,
538  $ 1, ivrow, mycol )
539 *
540  END IF
541 *
542  IF( tauloc.NE.zero ) THEN
543 *
544 * w := sub( C )' * v
545 *
546  IF( mpv.GT.0 ) THEN
547  CALL zgemv( 'Conjugate transpose', mpv, nqc2, one,
548  $ c( ioffc2 ), ldc, work, 1, zero,
549  $ work( ipw ), 1 )
550  ELSE
551  CALL zlaset( 'All', nqc2, 1, zero, zero,
552  $ work( ipw ), max( 1, nqc2 ) )
553  END IF
554  IF( myrow.EQ.icrow1 )
555  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
556  $ work( ipw ), max( 1, nqc2 ) )
557 *
558  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
559  $ work( ipw ), max( 1, nqc2 ), rdest,
560  $ mycol )
561 *
562 * sub( C ) := sub( C ) - v * w'
563 *
564  IF( myrow.EQ.icrow1 )
565  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
566  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
567  CALL zgerc( mpv, nqc2, -tauloc, work, 1, work( ipw ),
568  $ 1, c( ioffc2 ), ldc )
569  END IF
570 *
571  ELSE
572 *
573 * Broadcast column vector V
574 *
575  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
576  IF( mycol.EQ.ivcol ) THEN
577 *
578  ipw = mpv+1
579  CALL zcopy( mpv, v( ioffv ), 1, work, 1 )
580  work( ipw ) = tau( jjv )
581  CALL zgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,
582  $ work, ipw )
583  tauloc = tau( jjv )
584 *
585  ELSE
586 *
587  ipw = mpv+1
588  CALL zgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,
589  $ ipw, myrow, ivcol )
590  tauloc = work( ipw )
591 *
592  END IF
593 *
594  IF( tauloc.NE.zero ) THEN
595 *
596 * w := sub( C )' * v
597 *
598  IF( mpv.GT.0 ) THEN
599  CALL zgemv( 'Conjugate transpose', mpv, nqc2, one,
600  $ c( ioffc2 ), ldc, work, 1, zero,
601  $ work( ipw ), 1 )
602  ELSE
603  CALL zlaset( 'All', nqc2, 1, zero, zero,
604  $ work( ipw ), max( 1, nqc2 ) )
605  END IF
606  IF( myrow.EQ.icrow1 )
607  $ CALL zaxpy( nqc2, one, c( ioffc1 ), ldc,
608  $ work( ipw ), max( 1, nqc2 ) )
609 *
610  CALL zgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,
611  $ work( ipw ), max( 1, nqc2 ), rdest,
612  $ mycol )
613 *
614 * sub( C ) := sub( C ) - v * w'
615 *
616  IF( myrow.EQ.icrow1 )
617  $ CALL zaxpy( nqc2, -tauloc, work( ipw ),
618  $ max( 1, nqc2 ), c( ioffc1 ), ldc )
619  CALL zgerc( mpv, nqc2, -tauloc, work, 1, work( ipw ),
620  $ 1, c( ioffc2 ), ldc )
621  END IF
622 *
623  END IF
624 *
625  END IF
626 *
627  ELSE
628 *
629  IF( ccblck ) THEN
630  rdest = myrow
631  ELSE
632  rdest = -1
633  END IF
634 *
635  IF( crblck ) THEN
636 *
637 * sub( C ) is distributed over a process row
638 *
639  IF( descv( m_ ).EQ.incv ) THEN
640 *
641 * V is a row vector
642 *
643  IF( ivrow.EQ.icrow2 ) THEN
644 *
645 * Perform the local computation within a process row
646 *
647  IF( myrow.EQ.icrow2 ) THEN
648 *
649  tauloc = tau( iiv )
650 *
651  IF( tauloc.NE.zero ) THEN
652 *
653 * w := sub( C ) * v
654 *
655  IF( nqv.GT.0 ) THEN
656  CALL zgemv( 'No transpose', mpc2, nqv, one,
657  $ c( ioffc2 ), ldc, v( ioffv ),
658  $ ldv, zero, work, 1 )
659  ELSE
660  CALL zlaset( 'All', mpc2, 1, zero, zero,
661  $ work, max( 1, mpc2 ) )
662  END IF
663  IF( mycol.EQ.iccol1 )
664  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
665  $ work, 1 )
666 *
667  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
668  $ work, max( 1, mpc2 ), rdest,
669  $ iccol2 )
670 *
671  IF( mycol.EQ.iccol1 )
672  $ CALL zaxpy( mpc2, -tauloc, work, 1,
673  $ c( ioffc1 ), 1 )
674 *
675 * sub( C ) := sub( C ) - w * v'
676 *
677  IF( mpc2.GT.0 .AND. nqv.GT.0 )
678  $ CALL zgerc( mpc2, nqv, -tauloc, work, 1,
679  $ v( ioffv ), ldv, c( ioffc2 ),
680  $ ldc )
681  END IF
682 *
683  END IF
684 *
685  ELSE
686 *
687 * Send V and TAU to the process row ICROW2
688 *
689  IF( myrow.EQ.ivrow ) THEN
690 *
691  ipw = nqv+1
692  CALL zcopy( nqv, v( ioffv ), ldv, work, 1 )
693  work( ipw ) = tau( iiv )
694  CALL zgesd2d( ictxt, ipw, 1, work, ipw, icrow2,
695  $ mycol )
696 *
697  ELSE IF( myrow.EQ.icrow2 ) THEN
698 *
699  ipw = nqv+1
700  CALL zgerv2d( ictxt, ipw, 1, work, ipw, ivrow,
701  $ mycol )
702  tauloc = work( ipw )
703 *
704  IF( tauloc.NE.zero ) THEN
705 *
706 * w := sub( C ) * v
707 *
708  IF( nqv.GT.0 ) THEN
709  CALL zgemv( 'No transpose', mpc2, nqv, one,
710  $ c( ioffc2 ), ldc, work, 1, zero,
711  $ work( ipw ), 1 )
712  ELSE
713  CALL zlaset( 'All', mpc2, 1, zero, zero,
714  $ work( ipw ), max( 1, mpc2 ) )
715  END IF
716  IF( mycol.EQ.iccol1 )
717  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
718  $ work( ipw ), 1 )
719  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
720  $ work( ipw ), max( 1, mpc2 ),
721  $ rdest, iccol2 )
722  IF( mycol.EQ.iccol1 )
723  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
724  $ c( ioffc1 ), 1 )
725 *
726 * sub( C ) := sub( C ) - w * v'
727 *
728  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1,
729  $ work, 1, c( ioffc2 ), ldc )
730  END IF
731 *
732  END IF
733 *
734  END IF
735 *
736  ELSE
737 *
738 * Transpose column vector V (IROFFV = ICOFFC2)
739 *
740  ipw = nqv+1
741  CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,
742  $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
743  $ work, 1, ivrow, ivcol, icrow2, iccol2,
744  $ work( ipw ) )
745 *
746 * Perform the local computation within a process column
747 *
748  IF( myrow.EQ.icrow2 ) THEN
749 *
750  IF( mycol.EQ.ivcol ) THEN
751 *
752  CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1,
753  $ tau( jjv ), 1 )
754  tauloc = tau( jjv )
755 *
756  ELSE
757 *
758  CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,
759  $ 1, myrow, ivcol )
760 *
761  END IF
762 *
763  IF( tauloc.NE.zero ) THEN
764 *
765 * w := sub( C ) * v
766 *
767  IF( nqv.GT.0 ) THEN
768  CALL zgemv( 'No transpose', mpc2, nqv, one,
769  $ c( ioffc2 ), ldc, work, 1, zero,
770  $ work( ipw ), 1 )
771  ELSE
772  CALL zlaset( 'All', mpc2, 1, zero, zero,
773  $ work( ipw ), max( 1, mpc2 ) )
774  END IF
775  IF( mycol.EQ.iccol1 )
776  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
777  $ work( ipw ), 1 )
778  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
779  $ work( ipw ), max( 1, mpc2 ), rdest,
780  $ iccol2 )
781  IF( mycol.EQ.iccol1 )
782  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
783  $ c( ioffc1 ), 1 )
784 *
785 * sub( C ) := sub( C ) - w * v'
786 *
787  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1,
788  $ work, 1, c( ioffc2 ), ldc )
789  END IF
790 *
791  END IF
792 *
793  END IF
794 *
795  ELSE
796 *
797 * sub( C ) is a proper distributed matrix
798 *
799  IF( descv( m_ ).EQ.incv ) THEN
800 *
801 * Broadcast row vector V
802 *
803  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',
804  $ colbtop )
805  IF( myrow.EQ.ivrow ) THEN
806 *
807  ipw = nqv+1
808  CALL zcopy( nqv, v( ioffv ), ldv, work, 1 )
809  work( ipw ) = tau( iiv )
810  CALL zgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,
811  $ work, ipw )
812  tauloc = tau( iiv )
813 *
814  ELSE
815 *
816  ipw = nqv+1
817  CALL zgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,
818  $ work, ipw, ivrow, mycol )
819  tauloc = work( ipw )
820 *
821  END IF
822 *
823  IF( tauloc.NE.zero ) THEN
824 *
825 * w := sub( C ) * v
826 *
827  IF( nqv.GT.0 ) THEN
828  CALL zgemv( 'No Transpose', mpc2, nqv, one,
829  $ c( ioffc2 ), ldc, work, 1, zero,
830  $ work( ipw ), 1 )
831  ELSE
832  CALL zlaset( 'All', mpc2, 1, zero, zero,
833  $ work( ipw ), max( 1, mpc2 ) )
834  END IF
835  IF( mycol.EQ.iccol1 )
836  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
837  $ work( ipw ), 1 )
838 *
839  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
840  $ work( ipw ), max( 1, mpc2 ), rdest,
841  $ iccol2 )
842  IF( mycol.EQ.iccol1 )
843  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
844  $ c( ioffc1 ), 1 )
845 *
846 * sub( C ) := sub( C ) - w * v'
847 *
848  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1, work,
849  $ 1, c( ioffc2 ), ldc )
850  END IF
851 *
852  ELSE
853 *
854 * Transpose and broadcast column vector V (ICOFFC2=IROFFV)
855 *
856  ipw = nqv+1
857  CALL pbztrnv( ictxt, 'Columnwise', 'Transpose', n,
858  $ descv( mb_ ), icoffc2, v( ioffv ), 1, zero,
859  $ work, 1, ivrow, ivcol, -1, iccol2,
860  $ work( ipw ) )
861 *
862 * Perform the local computation within a process column
863 *
864  IF( mycol.EQ.ivcol ) THEN
865 *
866  CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),
867  $ 1 )
868  tauloc = tau( jjv )
869 *
870  ELSE
871 *
872  CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,
873  $ myrow, ivcol )
874 *
875  END IF
876 *
877  IF( tauloc.NE.zero ) THEN
878 *
879 * w := sub( C ) * v
880 *
881  IF( nqv.GT.0 ) THEN
882  CALL zgemv( 'No transpose', mpc2, nqv, one,
883  $ c( ioffc2 ), ldc, work, 1, zero,
884  $ work( ipw ), 1 )
885  ELSE
886  CALL zlaset( 'All', mpc2, 1, zero, zero,
887  $ work( ipw ), max( 1, mpc2 ) )
888  END IF
889  IF( mycol.EQ.iccol1 )
890  $ CALL zaxpy( mpc2, one, c( ioffc1 ), 1,
891  $ work( ipw ), 1 )
892  CALL zgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,
893  $ work( ipw ), max( 1, mpc2 ), rdest,
894  $ iccol2 )
895  IF( mycol.EQ.iccol1 )
896  $ CALL zaxpy( mpc2, -tauloc, work( ipw ), 1,
897  $ c( ioffc1 ), 1 )
898 *
899 * sub( C ) := sub( C ) - w * v'
900 *
901  CALL zgerc( mpc2, nqv, -tauloc, work( ipw ), 1, work,
902  $ 1, c( ioffc2 ), ldc )
903  END IF
904 *
905  END IF
906 *
907  END IF
908 *
909  END IF
910 *
911  RETURN
912 *
913 * End of PZLARZ
914 *
915  END
max
#define max(A, B)
Definition: pcgemr.c:180
pbztrnv
subroutine pbztrnv(ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX, BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL, WORK)
Definition: pbztrnv.f:4
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzlarz
subroutine pzlarz(SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pzlarz.f:3
min
#define min(A, B)
Definition: pcgemr.c:181