SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdlarzb.f
Go to the documentation of this file.
1 SUBROUTINE pdlarzb( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
2 $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER DIRECT, SIDE, STOREV, TRANS
10 INTEGER IC, IV, JC, JV, K, L, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PDLARZB applies a real block reflector Q or its transpose Q**T to
21* a real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1)
22* from the left or the right.
23*
24* Q is a product of k elementary reflectors as returned by PDTZRZF.
25*
26* Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
27*
28* Notes
29* =====
30*
31* Each global data object is described by an associated description
32* vector. This vector stores the information required to establish
33* the mapping between an object element and its corresponding process
34* and memory location.
35*
36* Let A be a generic term for any 2D block cyclicly distributed array.
37* Such a global array has an associated description vector DESCA.
38* In the following comments, the character _ should be read as
39* "of the global array".
40*
41* NOTATION STORED IN EXPLANATION
42* --------------- -------------- --------------------------------------
43* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
44* DTYPE_A = 1.
45* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46* the BLACS process grid A is distribu-
47* ted over. The context itself is glo-
48* bal, but the handle (the integer
49* value) may vary.
50* M_A (global) DESCA( M_ ) The number of rows in the global
51* array A.
52* N_A (global) DESCA( N_ ) The number of columns in the global
53* array A.
54* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
55* the rows of the array.
56* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
57* the columns of the array.
58* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59* row of the array A is distributed.
60* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61* first column of the array A is
62* distributed.
63* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
64* array. LLD_A >= MAX(1,LOCr(M_A)).
65*
66* Let K be the number of rows or columns of a distributed matrix,
67* and assume that its process grid has dimension p x q.
68* LOCr( K ) denotes the number of elements of K that a process
69* would receive if K were distributed over the p processes of its
70* process column.
71* Similarly, LOCc( K ) denotes the number of elements of K that a
72* process would receive if K were distributed over the q processes of
73* its process row.
74* The values of LOCr() and LOCc() may be determined via a call to the
75* ScaLAPACK tool function, NUMROC:
76* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78* An upper bound for these quantities may be computed by:
79* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81*
82* Arguments
83* =========
84*
85* SIDE (global input) CHARACTER
86* = 'L': apply Q or Q**T from the Left;
87* = 'R': apply Q or Q**T from the Right.
88*
89* TRANS (global input) CHARACTER
90* = 'N': No transpose, apply Q;
91* = 'T': Transpose, apply Q**T.
92*
93* DIRECT (global input) CHARACTER
94* Indicates how H is formed from a product of elementary
95* reflectors
96* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
97* = 'B': H = H(k) . . . H(2) H(1) (Backward)
98*
99* STOREV (global input) CHARACTER
100* Indicates how the vectors which define the elementary
101* reflectors are stored:
102* = 'C': Columnwise (not supported yet)
103* = 'R': Rowwise
104*
105* M (global input) INTEGER
106* The number of rows to be operated on i.e the number of rows
107* of the distributed submatrix sub( C ). M >= 0.
108*
109* N (global input) INTEGER
110* The number of columns to be operated on i.e the number of
111* columns of the distributed submatrix sub( C ). N >= 0.
112*
113* K (global input) INTEGER
114* The order of the matrix T (= the number of elementary
115* reflectors whose product defines the block reflector).
116*
117* L (global input) INTEGER
118* The columns of the distributed submatrix sub( A ) containing
119* the meaningful part of the Householder reflectors.
120* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
121*
122* V (local input) DOUBLE PRECISION pointer into the local memory
123* to an array of dimension (LLD_V, LOCc(JV+M-1)) if SIDE = 'L',
124* (LLD_V, LOCc(JV+N-1)) if SIDE = 'R'. It contains the local
125* pieces of the distributed vectors V representing the
126* Householder transformation as returned by PDTZRZF.
127* LLD_V >= LOCr(IV+K-1).
128*
129* IV (global input) INTEGER
130* The row index in the global array V indicating the first
131* row of sub( V ).
132*
133* JV (global input) INTEGER
134* The column index in the global array V indicating the
135* first column of sub( V ).
136*
137* DESCV (global and local input) INTEGER array of dimension DLEN_.
138* The array descriptor for the distributed matrix V.
139*
140* T (local input) DOUBLE PRECISION array, dimension MB_V by MB_V
141* The lower triangular matrix T in the representation of the
142* block reflector.
143*
144* C (local input/local output) DOUBLE PRECISION pointer into the
145* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
146* On entry, the M-by-N distributed matrix sub( C ). On exit,
147* sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
148* sub( C )*Q or sub( C )*Q'.
149*
150* IC (global input) INTEGER
151* The row index in the global array C indicating the first
152* row of sub( C ).
153*
154* JC (global input) INTEGER
155* The column index in the global array C indicating the
156* first column of sub( C ).
157*
158* DESCC (global and local input) INTEGER array of dimension DLEN_.
159* The array descriptor for the distributed matrix C.
160*
161* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
162* If STOREV = 'C',
163* if SIDE = 'L',
164* LWORK >= ( NqC0 + MpC0 ) * K
165* else if SIDE = 'R',
166* LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
167* NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
168* MpC0 ) ) * K
169* end if
170* else if STOREV = 'R',
171* if SIDE = 'L',
172* LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
173* MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
174* NqC0 ) ) * K
175* else if SIDE = 'R',
176* LWORK >= ( MpC0 + NqC0 ) * K
177* end if
178* end if
179*
180* where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
181*
182* IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
183* IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
184* IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
185* MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
186* NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
187*
188* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
189* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
190* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
191* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
192* NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
193* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
194*
195* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
196* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
197* the subroutine BLACS_GRIDINFO.
198*
199* Alignment requirements
200* ======================
201*
202* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
203* must verify some alignment properties, namely the following
204* expressions should be true:
205*
206* If STOREV = 'Columnwise'
207* If SIDE = 'Left',
208* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
209* If SIDE = 'Right',
210* ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
211* else if STOREV = 'Rowwise'
212* If SIDE = 'Left',
213* ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
214* If SIDE = 'Right',
215* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
216* end if
217*
218* =====================================================================
219*
220* .. Parameters ..
221 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
222 $ lld_, mb_, m_, nb_, n_, rsrc_
223 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
224 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
225 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
226 DOUBLE PRECISION ONE, ZERO
227 parameter( one = 1.0d+0, zero = 0.0d+0 )
228* ..
229* .. Local Scalars ..
230 LOGICAL LEFT
231 CHARACTER COLBTOP, TRANST
232 INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
233 $ icrow1, icrow2, ictxt, iibeg, iic1, iic2,
234 $ iiend, iinxt, iiv, ileft, info, ioffc2, ioffv,
235 $ ipt, ipv, ipw, iroffc1, iroffc2, itop, ivcol,
236 $ ivrow, jjbeg, jjend, jjnxt, jjc1, jjc2, jjv,
237 $ ldc, ldv, lv, lw, mbc, mbv, mpc1, mpc2, mpc20,
238 $ mqv, mqv0, mycol, mydist, myrow, nbc, nbv,
239 $ npcol, nprow, nqc1, nqc2, nqcall, nqv
240* ..
241* .. External Subroutines ..
242 EXTERNAL blacs_abort, blacs_gridinfo, dgebr2d,
243 $ dgebs2d,dgemm, dgsum2d, dlamov,
244 $ dlaset, dtrbr2d, dtrbs2d, dtrmm,
245 $ infog2l, pbdmatadd, pbdtran, pb_topget, pxerbla
246* ..
247* .. Intrinsic Functions ..
248 INTRINSIC max, min, mod
249* ..
250* .. External Functions ..
251 LOGICAL LSAME
252 INTEGER ICEIL, NUMROC
253 EXTERNAL iceil, lsame, numroc
254* ..
255* .. Executable Statements ..
256*
257* Quick return if possible
258*
259 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
260 $ RETURN
261*
262* Get grid parameters
263*
264 ictxt = descc( ctxt_ )
265 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
266*
267* Check for currently supported options
268*
269 info = 0
270 IF( .NOT.lsame( direct, 'B' ) ) THEN
271 info = -3
272 ELSE IF( .NOT.lsame( storev, 'R' ) ) THEN
273 info = -4
274 END IF
275 IF( info.NE.0 ) THEN
276 CALL pxerbla( ictxt, 'PDLARZB', -info )
277 CALL blacs_abort( ictxt, 1 )
278 RETURN
279 END IF
280*
281 left = lsame( side, 'L' )
282 IF( lsame( trans, 'N' ) ) THEN
283 transt = 'T'
284 ELSE
285 transt = 'N'
286 END IF
287*
288 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
289 $ ivrow, ivcol )
290 mbv = descv( mb_ )
291 nbv = descv( nb_ )
292 icoffv = mod( jv-1, nbv )
293 nqv = numroc( l+icoffv, nbv, mycol, ivcol, npcol )
294 IF( mycol.EQ.ivcol )
295 $ nqv = nqv - icoffv
296 ldv = descv( lld_ )
297 iiv = min( iiv, ldv )
298 jjv = min( jjv, max( 1, numroc( descv( n_ ), nbv, mycol,
299 $ descv( csrc_ ), npcol ) ) )
300 ioffv = iiv + ( jjv-1 ) * ldv
301 mbc = descc( mb_ )
302 nbc = descc( nb_ )
303 nqcall = numroc( descc( n_ ), nbc, mycol, descc( csrc_ ), npcol )
304 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic1,
305 $ jjc1, icrow1, iccol1 )
306 ldc = descc( lld_ )
307 iic1 = min( iic1, ldc )
308 jjc1 = min( jjc1, max( 1, nqcall ) )
309*
310 IF( left ) THEN
311 iroffc1 = mod( ic-1, mbc )
312 mpc1 = numroc( k+iroffc1, mbc, myrow, icrow1, nprow )
313 IF( myrow.EQ.icrow1 )
314 $ mpc1 = mpc1 - iroffc1
315 icoffc1 = mod( jc-1, nbc )
316 nqc1 = numroc( n+icoffc1, nbc, mycol, iccol1, npcol )
317 IF( mycol.EQ.iccol1 )
318 $ nqc1 = nqc1 - icoffc1
319 CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
320 $ iic2, jjc2, icrow2, iccol2 )
321 iroffc2 = mod( ic+m-l-1, mbc )
322 mpc2 = numroc( l+iroffc2, mbc, myrow, icrow2, nprow )
323 IF( myrow.EQ.icrow2 )
324 $ mpc2 = mpc2 - iroffc2
325 icoffc2 = icoffc1
326 nqc2 = nqc1
327 ELSE
328 iroffc1 = mod( ic-1, mbc )
329 mpc1 = numroc( m+iroffc1, mbc, myrow, icrow1, nprow )
330 IF( myrow.EQ.icrow1 )
331 $ mpc1 = mpc1 - iroffc1
332 icoffc1 = mod( jc-1, nbc )
333 nqc1 = numroc( k+icoffc1, nbc, mycol, iccol1, npcol )
334 IF( mycol.EQ.iccol1 )
335 $ nqc1 = nqc1 - icoffc1
336 CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
337 $ iic2, jjc2, icrow2, iccol2 )
338 iroffc2 = iroffc1
339 mpc2 = mpc1
340 icoffc2 = mod( jc+n-l-1, nbc )
341 nqc2 = numroc( l+icoffc2, nbc, mycol, iccol2, npcol )
342 IF( mycol.EQ.iccol2 )
343 $ nqc2 = nqc2 - icoffc2
344 END IF
345 iic2 = min( iic2, ldc )
346 jjc2 = min( jjc2, nqcall )
347 ioffc2 = iic2 + ( jjc2-1 ) * ldc
348*
349 IF( lsame( side, 'L' ) ) THEN
350*
351* Form Q*sub( C ) or Q'*sub( C )
352*
353* IROFFC2 = ICOFFV is required by the current transposition
354* routine PBDTRAN
355*
356 mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
357 IF( mycol.EQ.ivcol ) THEN
358 mqv = mqv0 - icoffv
359 ELSE
360 mqv = mqv0
361 END IF
362 IF( myrow.EQ.icrow2 ) THEN
363 mpc20 = mpc2 + iroffc2
364 ELSE
365 mpc20 = mpc2
366 END IF
367*
368* Locally V( IOFFV ) is K x MQV, C( IOFFC2 ) is MPC2 x NQC2
369* WORK( IPV ) is MPC20 x K = [ . V( IOFFV ) ]'
370* WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
371* WORK( IPT ) is the workspace for PBDTRAN
372*
373 ipv = 1
374 ipw = ipv + mpc20 * k
375 ipt = ipw + k * mqv0
376 lv = max( 1, mpc20 )
377 lw = max( 1, k )
378*
379 IF( myrow.EQ.ivrow ) THEN
380 IF( mycol.EQ.ivcol ) THEN
381 CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
382 $ work( ipw+icoffv*lw ), lw )
383 ELSE
384 CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
385 $ work( ipw ), lw )
386 END IF
387 END IF
388*
389* WORK( IPV ) = WORK( IPW )' (replicated) is MPC20 x K
390*
391 CALL pbdtran( ictxt, 'Rowwise', 'Transpose', k, m+icoffv,
392 $ descv( nb_ ), work( ipw ), lw, zero,
393 $ work( ipv ), lv, ivrow, ivcol, icrow2, -1,
394 $ work( ipt ) )
395*
396* WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC2 x K
397*
398 IF( myrow.EQ.icrow2 )
399 $ ipv = ipv + iroffc2
400*
401* WORK( IPW ) becomes NQC2 x K = C( IOFFC2 )' * V'
402* WORK( IPW ) = C( IOFFC2 )' * V' (NQC2 x MPC2 x K) -> NQC2 x K
403*
404 lw = max( 1, nqc2 )
405*
406 IF( mpc2.GT.0 ) THEN
407 CALL dgemm( 'Transpose', 'No transpose', nqc2, k, mpc2,
408 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
409 $ work( ipw ), lw )
410 ELSE
411 CALL dlaset( 'All', nqc2, k, zero, zero, work( ipw ), lw )
412 END IF
413*
414* WORK( IPW ) = WORK( IPW ) + C1 ( NQC1 = NQC2 )
415*
416 IF( mpc1.GT.0 ) THEN
417 mydist = mod( myrow-icrow1+nprow, nprow )
418 itop = max( 0, mydist * mbc - iroffc1 )
419 iibeg = iic1
420 iiend = iic1 + mpc1 - 1
421 iinxt = min( iceil( iibeg, mbc ) * mbc, iiend )
422*
423 10 CONTINUE
424 IF( iibeg.LE.iinxt ) THEN
425 CALL pbdmatadd( ictxt, 'Transpose', nqc2, iinxt-iibeg+1,
426 $ one, c( iibeg+(jjc1-1)*ldc ), ldc, one,
427 $ work( ipw+itop ), lw )
428 mydist = mydist + nprow
429 itop = mydist * mbc - iroffc1
430 iibeg = iinxt +1
431 iinxt = min( iinxt+mbc, iiend )
432 GO TO 10
433 END IF
434 END IF
435*
436 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, k, work( ipw ),
437 $ lw, ivrow, mycol )
438*
439* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
440*
441 IF( myrow.EQ.ivrow ) THEN
442 IF( mycol.EQ.ivcol ) THEN
443*
444* Broadcast the block reflector to the other columns.
445*
446 CALL dtrbs2d( ictxt, 'Rowwise', ' ', 'Lower', 'Non unit',
447 $ k, k, t, mbv )
448 ELSE
449 CALL dtrbr2d( ictxt, 'Rowwise', ' ', 'Lower', 'Non unit',
450 $ k, k, t, mbv, myrow, ivcol )
451 END IF
452 CALL dtrmm( 'Right', 'Lower', transt, 'Non unit', nqc2, k,
453 $ one, t, mbv, work( ipw ), lw )
454*
455 CALL dgebs2d( ictxt, 'Columnwise', ' ', nqc2, k,
456 $ work( ipw ), lw )
457 ELSE
458 CALL dgebr2d( ictxt, 'Columnwise', ' ', nqc2, k,
459 $ work( ipw ), lw, ivrow, mycol )
460 END IF
461*
462* C1 = C1 - WORK( IPW )
463*
464 IF( mpc1.GT.0 ) THEN
465 mydist = mod( myrow-icrow1+nprow, nprow )
466 itop = max( 0, mydist * mbc - iroffc1 )
467 iibeg = iic1
468 iiend = iic1 + mpc1 - 1
469 iinxt = min( iceil( iibeg, mbc ) * mbc, iiend )
470*
471 20 CONTINUE
472 IF( iibeg.LE.iinxt ) THEN
473 CALL pbdmatadd( ictxt, 'Transpose', iinxt-iibeg+1, nqc2,
474 $ -one, work( ipw+itop ), lw, one,
475 $ c( iibeg+(jjc1-1)*ldc ), ldc )
476 mydist = mydist + nprow
477 itop = mydist * mbc - iroffc1
478 iibeg = iinxt +1
479 iinxt = min( iinxt+mbc, iiend )
480 GO TO 20
481 END IF
482 END IF
483*
484* C2 C2 - V' * W'
485* C( IOFFC2 ) = C( IOFFC2 ) - WORK( IPV ) * WORK( IPW )'
486* MPC2 x NQC2 MPC2 x K K x NQC2
487*
488 CALL dgemm( 'No transpose', 'Transpose', mpc2, nqc2, k, -one,
489 $ work( ipv ), lv, work( ipw ), lw, one,
490 $ c( ioffc2 ), ldc )
491*
492 ELSE
493*
494* Form sub( C ) * Q or sub( C ) * Q'
495*
496* Locally V( IOFFV ) is K x NQV, C( IOFFC2 ) is MPC2 x NQC2
497* WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC2
498* WORK( IPW ) is MPC2 x K = C( IOFFC2 ) * V( IOFFV )'
499*
500 ipv = 1
501 ipw = ipv + k * nqc2
502 lv = max( 1, k )
503 lw = max( 1, mpc2 )
504*
505* Broadcast V to the other process rows.
506*
507 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
508 IF( myrow.EQ.ivrow ) THEN
509 CALL dgebs2d( ictxt, 'Columnwise', colbtop, k, nqc2,
510 $ v( ioffv ), ldv )
511 IF( mycol.EQ.ivcol )
512 $ CALL dtrbs2d( ictxt, 'Columnwise', colbtop, 'Lower',
513 $ 'Non unit', k, k, t, mbv )
514 CALL dlamov( 'All', k, nqc2, v( ioffv ), ldv, work( ipv ),
515 $ lv )
516 ELSE
517 CALL dgebr2d( ictxt, 'Columnwise', colbtop, k, nqc2,
518 $ work( ipv ), lv, ivrow, mycol )
519 IF( mycol.EQ.ivcol )
520 $ CALL dtrbr2d( ictxt, 'Columnwise', colbtop, 'Lower',
521 $ 'Non unit', k, k, t, mbv, ivrow, mycol )
522 END IF
523*
524* WORK( IPV ) is K x NQC2 = V = V( IOFFV )
525* WORK( IPW ) = C( IOFFC2 ) * V' (MPC2 x NQC2 x K) -> MPC2 x K
526*
527 IF( nqc2.GT.0 ) THEN
528 CALL dgemm( 'No Transpose', 'Transpose', mpc2, k, nqc2,
529 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
530 $ work( ipw ), lw )
531 ELSE
532 CALL dlaset( 'All', mpc2, k, zero, zero, work( ipw ), lw )
533 END IF
534*
535* WORK( IPW ) = WORK( IPW ) + C1 ( MPC1 = MPC2 )
536*
537 IF( nqc1.GT.0 ) THEN
538 mydist = mod( mycol-iccol1+npcol, npcol )
539 ileft = max( 0, mydist * nbc - icoffc1 )
540 jjbeg = jjc1
541 jjend = jjc1 + nqc1 - 1
542 jjnxt = min( iceil( jjbeg, nbc ) * nbc, jjend )
543*
544 30 CONTINUE
545 IF( jjbeg.LE.jjnxt ) THEN
546 CALL pbdmatadd( ictxt, 'No transpose', mpc2,
547 $ jjnxt-jjbeg+1, one,
548 $ c( iic1+(jjbeg-1)*ldc ), ldc, one,
549 $ work( ipw+ileft*lw ), lw )
550 mydist = mydist + npcol
551 ileft = mydist * nbc - icoffc1
552 jjbeg = jjnxt +1
553 jjnxt = min( jjnxt+nbc, jjend )
554 GO TO 30
555 END IF
556 END IF
557*
558 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
559 $ lw, myrow, ivcol )
560*
561* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
562*
563 IF( mycol.EQ.ivcol ) THEN
564 CALL dtrmm( 'Right', 'Lower', trans, 'Non unit', mpc2, k,
565 $ one, t, mbv, work( ipw ), lw )
566 CALL dgebs2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
567 $ lw )
568 ELSE
569 CALL dgebr2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
570 $ lw, myrow, ivcol )
571 END IF
572*
573* C1 = C1 - WORK( IPW )
574*
575 IF( nqc1.GT.0 ) THEN
576 mydist = mod( mycol-iccol1+npcol, npcol )
577 ileft = max( 0, mydist * nbc - icoffc1 )
578 jjbeg = jjc1
579 jjend = jjc1 + nqc1 - 1
580 jjnxt = min( iceil( jjbeg, nbc ) * nbc, jjend )
581*
582 40 CONTINUE
583 IF( jjbeg.LE.jjnxt ) THEN
584 CALL pbdmatadd( ictxt, 'No transpose', mpc2,
585 $ jjnxt-jjbeg+1, -one,
586 $ work( ipw+ileft*lw ), lw, one,
587 $ c( iic1+(jjbeg-1)*ldc ), ldc )
588 mydist = mydist + npcol
589 ileft = mydist * nbc - icoffc1
590 jjbeg = jjnxt +1
591 jjnxt = min( jjnxt+nbc, jjend )
592 GO TO 40
593 END IF
594 END IF
595*
596* C2 C2 - W * V
597* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
598* MPC2 x NQC2 MPC2 x K K x NQC2
599*
600 IF( ioffc2.GT.0 )
601 $ CALL dgemm( 'No transpose', 'No transpose', mpc2, nqc2, k,
602 $ -one, work( ipw ), lw, work( ipv ), lv, one,
603 $ c( ioffc2 ), ldc )
604*
605 END IF
606*
607 RETURN
608*
609* End of PDLARZB
610*
611 END
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
subroutine pbdmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
Definition pbdmatadd.f:3
subroutine pbdtran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbdtran.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlarzb(side, trans, direct, storev, m, n, k, l, v, iv, jv, descv, t, c, ic, jc, descc, work)
Definition pdlarzb.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2