SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ PB_CptrmmB()

void PB_CptrmmB ( PBTYP_T TYPE,
char *  DIRECB,
char *  SIDE,
char *  UPLO,
char *  TRANSA,
char *  DIAG,
Int  M,
Int  N,
char *  ALPHA,
char *  A,
Int  IA,
Int  JA,
Int DESCA,
char *  B,
Int  IB,
Int  JB,
Int DESCB 
)

Definition at line 25 of file PB_CptrmmB.c.

40{
41/*
42* Purpose
43* =======
44*
45* PB_CptrmmB performs one of the matrix-matrix operations
46*
47* sub( B ) := alpha * op( sub( A ) ) * sub( B ),
48*
49* or
50*
51* sub( B ) := alpha * sub( B ) * op( sub( A ) ),
52*
53* where
54*
55* sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L',
56* A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R', and,
57*
58* sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1).
59*
60* Alpha is a scalar, sub( B ) is an m by n submatrix, sub( A ) is a
61* unit, or non-unit, upper or lower triangular submatrix and op( X ) is
62* one of
63*
64* op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ).
65*
66* This is the inner-product algorithm using the logical LCM hybrid
67* and static blocking techniques. The submatrix operand sub( A ) stays
68* in place.
69*
70* Notes
71* =====
72*
73* A description vector is associated with each 2D block-cyclicly dis-
74* tributed matrix. This vector stores the information required to
75* establish the mapping between a matrix entry and its corresponding
76* process and memory location.
77*
78* In the following comments, the character _ should be read as
79* "of the distributed matrix". Let A be a generic term for any 2D
80* block cyclicly distributed matrix. Its description vector is DESC_A:
81*
82* NOTATION STORED IN EXPLANATION
83* ---------------- --------------- ------------------------------------
84* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
85* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
86* the NPROW x NPCOL BLACS process grid
87* A is distributed over. The context
88* itself is global, but the handle
89* (the integer value) may vary.
90* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
91* ted matrix A, M_A >= 0.
92* N_A (global) DESCA[ N_ ] The number of columns in the distri-
93* buted matrix A, N_A >= 0.
94* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
95* block of the matrix A, IMB_A > 0.
96* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
97* left block of the matrix A,
98* INB_A > 0.
99* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
100* bute the last M_A-IMB_A rows of A,
101* MB_A > 0.
102* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
103* bute the last N_A-INB_A columns of
104* A, NB_A > 0.
105* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
106* row of the matrix A is distributed,
107* NPROW > RSRC_A >= 0.
108* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
109* first column of A is distributed.
110* NPCOL > CSRC_A >= 0.
111* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
112* array storing the local blocks of
113* the distributed matrix A,
114* IF( Lc( 1, N_A ) > 0 )
115* LLD_A >= MAX( 1, Lr( 1, M_A ) )
116* ELSE
117* LLD_A >= 1.
118*
119* Let K be the number of rows of a matrix A starting at the global in-
120* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
121* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
122* receive if these K rows were distributed over NPROW processes. If K
123* is the number of columns of a matrix A starting at the global index
124* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
125* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
126* these K columns were distributed over NPCOL processes.
127*
128* The values of Lr() and Lc() may be determined via a call to the func-
129* tion PB_Cnumroc:
130* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
131* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
132*
133* Arguments
134* =========
135*
136* TYPE (local input) pointer to a PBTYP_T structure
137* On entry, TYPE is a pointer to a structure of type PBTYP_T,
138* that contains type information (See pblas.h).
139*
140* DIRECB (global input) pointer to CHAR
141* On entry, DIRECB specifies the direction in which the rows
142* or columns of sub( B ) should be looped over as follows:
143* DIRECB = 'F' or 'f' forward or increasing,
144* DIRECB = 'B' or 'b' backward or decreasing.
145*
146* SIDE (global input) pointer to CHAR
147* On entry, SIDE specifies whether op( sub( A ) ) multiplies
148* sub( B ) from the left or right as follows:
149*
150* SIDE = 'L' or 'l' sub( B ) := alpha*op( sub( A ) )*sub( B ),
151*
152* SIDE = 'R' or 'r' sub( B ) := alpha*sub( B )*op( sub( A ) ).
153*
154* UPLO (global input) pointer to CHAR
155* On entry, UPLO specifies whether the submatrix sub( A ) is
156* an upper or lower triangular submatrix as follows:
157*
158* UPLO = 'U' or 'u' sub( A ) is an upper triangular
159* submatrix,
160*
161* UPLO = 'L' or 'l' sub( A ) is a lower triangular
162* submatrix.
163*
164* TRANSA (global input) pointer to CHAR
165* On entry, TRANSA specifies the form of op( sub( A ) ) to be
166* used in the matrix multiplication as follows:
167*
168* TRANSA = 'N' or 'n' op( sub( A ) ) = sub( A ),
169*
170* TRANSA = 'T' or 't' op( sub( A ) ) = sub( A )',
171*
172* TRANSA = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ).
173*
174* DIAG (global input) pointer to CHAR
175* On entry, DIAG specifies whether or not sub( A ) is unit
176* triangular as follows:
177*
178* DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
179* gular,
180*
181* DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
182* angular.
183*
184* M (global input) INTEGER
185* On entry, M specifies the number of rows of the submatrix
186* sub( B ). M must be at least zero.
187*
188* N (global input) INTEGER
189* On entry, N specifies the number of columns of the submatrix
190* sub( B ). N must be at least zero.
191*
192* ALPHA (global input) pointer to CHAR
193* On entry, ALPHA specifies the scalar alpha. When ALPHA is
194* supplied as zero then the local entries of the array B
195* corresponding to the entries of the submatrix sub( B ) need
196* not be set on input.
197*
198* A (local input) pointer to CHAR
199* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
200* at least Lc( 1, JA+M-1 ) when SIDE = 'L' or 'l' and is at
201* least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
202* contains the local entries of the matrix A.
203* Before entry with UPLO = 'U' or 'u', this array contains the
204* local entries corresponding to the entries of the upper tri-
205* angular submatrix sub( A ), and the local entries correspon-
206* ding to the entries of the strictly lower triangular part of
207* the submatrix sub( A ) are not referenced.
208* Before entry with UPLO = 'L' or 'l', this array contains the
209* local entries corresponding to the entries of the lower tri-
210* angular submatrix sub( A ), and the local entries correspon-
211* ding to the entries of the strictly upper triangular part of
212* the submatrix sub( A ) are not referenced.
213* Note that when DIAG = 'U' or 'u', the local entries corres-
214* ponding to the diagonal elements of the submatrix sub( A )
215* are not referenced either, but are assumed to be unity.
216*
217* IA (global input) INTEGER
218* On entry, IA specifies A's global row index, which points to
219* the beginning of the submatrix sub( A ).
220*
221* JA (global input) INTEGER
222* On entry, JA specifies A's global column index, which points
223* to the beginning of the submatrix sub( A ).
224*
225* DESCA (global and local input) INTEGER array
226* On entry, DESCA is an integer array of dimension DLEN_. This
227* is the array descriptor for the matrix A.
228*
229* B (local input/local output) pointer to CHAR
230* On entry, B is an array of dimension (LLD_B, Kb), where Kb is
231* at least Lc( 1, JB+N-1 ). Before entry, this array contains
232* the local entries of the matrix B.
233* On exit, the local entries of this array corresponding to the
234* to the entries of the submatrix sub( B ) are overwritten by
235* the local entries of the m by n transformed submatrix.
236*
237* IB (global input) INTEGER
238* On entry, IB specifies B's global row index, which points to
239* the beginning of the submatrix sub( B ).
240*
241* JB (global input) INTEGER
242* On entry, JB specifies B's global column index, which points
243* to the beginning of the submatrix sub( B ).
244*
245* DESCB (global and local input) INTEGER array
246* On entry, DESCB is an integer array of dimension DLEN_. This
247* is the array descriptor for the matrix B.
248*
249* -- Written on April 1, 1998 by
250* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
251*
252* ---------------------------------------------------------------------
253*/
254/*
255* .. Local Scalars ..
256*/
257 char Broc, GemmTa, GemmTb, TranOp, WBroc, WCroc, conjg, * one,
258 * talpha, * tbeta, top, * zero;
259 Int Acol, Aii, Aimb1, Ainb1, Ajj, Alcmb, Ald, Alp, Alp0, Alq,
260 Alq0, Amb, Amp, An, Anb, Anq, Arow, BcurrocR, Bfwd, BiiD,
261 BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR, BnD,
262 BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR,
263 BsrcR, LNorRT, WBfr, WBld, WCfr, WCld, WCpbY, WCsum, ctxt,
264 l, lb, lside, ltmp, mycol, myrow, n, nb, nbb, notran, npcol,
265 nprow, p=0, size, tmp, upper;
266 GEMM_T gemm;
267 GSUM2D_T gsum2d;
268/*
269* .. Local Arrays ..
270*/
271 Int Ad0[DLEN_], DBUFB[DLEN_], WCd[DLEN_], WBd[DLEN_];
272 char * Aptr = NULL, * Bptr = NULL, * WB = NULL, * WC = NULL;
273/* ..
274* .. Executable Statements ..
275*
276*/
277/*
278* Retrieve process grid information
279*/
280 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
281
282 Bfwd = ( Mupcase( DIRECB[0] ) == CFORWARD );
283 lside = ( Mupcase( SIDE [0] ) == CLEFT );
284 upper = ( Mupcase( UPLO [0] ) == CUPPER );
285 notran = ( ( TranOp = Mupcase( TRANSA[0] ) ) == CNOTRAN );
286 LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
287
288 size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
289 gemm = TYPE->Fgemm; gsum2d = TYPE->Cgsum2d;
290 nb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) );
291/*
292* Compute local information for sub( A ) and sub( B )
293*/
294 if( lside )
295 {
296 BnD = An = M; BnR = N; Broc = CCOLUMN;
297 BmyprocD = myrow; BnprocsD = nprow;
298 BmyprocR = mycol; BnprocsR = npcol;
299 BnbD = DESCB[MB_ ]; BnbR = DESCB[NB_ ];
300 BsrcR = DESCB[CSRC_]; Bld = DESCB[LLD_];
301 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
302 &BiiD, &BiiR, &BrocD, &BrocR );
303 Binb1D = PB_Cfirstnb( BnD, IB, DESCB[IMB_], BnbD );
304 Binb1R = PB_Cfirstnb( BnR, JB, DESCB[INB_], BnbR );
305 }
306 else
307 {
308 BnD = An = N; BnR = M; Broc = CROW;
309 BmyprocD = mycol; BnprocsD = npcol;
310 BmyprocR = myrow; BnprocsR = nprow;
311 BnbR = DESCB[MB_ ]; BnbD = DESCB[NB_ ];
312 BsrcR = DESCB[RSRC_]; Bld = DESCB[LLD_];
313 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
314 &BiiR, &BiiD, &BrocR, &BrocD );
315 Binb1D = PB_Cfirstnb( BnD, JB, DESCB[INB_], BnbD );
316 Binb1R = PB_Cfirstnb( BnR, IB, DESCB[IMB_], BnbR );
317 }
318/*
319* Compute descriptor Ad0 for sub( A )
320*/
321 PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
322 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
323
324 Amp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
325 Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
326 if( ( Amp > 0 ) && ( Anq > 0 ) ) Aptr = Mptr( A, Aii, Ajj, Ald, size );
327/*
328* Compute conjugate of alpha for the conjugate transpose cases
329*/
330 if( TranOp == CCOTRAN )
331 {
332 conjg = CCONJG; talpha = PB_Cmalloc( size );
333 PB_Cconjg( TYPE, ALPHA, talpha );
334 }
335 else { conjg = CNOCONJG; talpha = ALPHA; }
336/*
337* Retrieve BLACS combine topology, set the transpose parameters to be passed
338* to the BLAS matrix multiply routine and finally describe the form of the
339* input and output operands.
340*/
341 if( LNorRT )
342 {
343 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
344 GemmTa = CNOTRAN; GemmTb = ( lside ? CTRAN : TranOp );
345 WCroc = CCOLUMN; WBroc = CROW;
346 }
347 else
348 {
349 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
350 GemmTb = CNOTRAN; GemmTa = ( lside ? TranOp : CTRAN );
351 WCroc = CROW; WBroc = CCOLUMN;
352 }
353/*
354* Computational partitioning size is computed as the product of the logical
355* value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
356*/
357 Alcmb = 2 * nb * PB_Clcm( ( Arow >= 0 ? nprow : 1 ),
358 ( Acol >= 0 ? npcol : 1 ) );
359/*
360* When sub( B ) is not replicated and backward pass on sub( B ), find the
361* virtual process p owning the last row or column of sub( B ).
362*/
363 if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd )
364 {
365 tmp = PB_Cindxg2p( BnR-1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
366 p = MModSub( tmp, BrocR, BnprocsR );
367 }
368/*
369* Loop over the processes rows or columns owning the BnR rows or columns of
370* sub( B ) to be processed.
371*/
372 n = BnR;
373
374 while( n > 0 )
375 {
376/*
377* Find out who is the active process row or column as well as the number of
378* rows or columns of sub( B ) it owns.
379*/
380 BcurrocR = ( BisR ? -1 : MModAdd( BrocR, p, BnprocsR ) );
381 BnpR = PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
382
383 n -= BnpR;
384/*
385* Re-adjust the number of rows or columns to be handled at each step, in order
386* to average the message sizes and the computational granularity.
387*/
388 if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 );
389
390 while( BnpR )
391 {
392 nbb = MIN( nbb, BnpR );
393/*
394* Describe the local contiguous panel of sub( B )
395*/
396 if( lside )
397 {
398 PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD,
399 BcurrocR, ctxt, Bld );
400 if( BisR || ( BmyprocR == BcurrocR ) )
401 Bptr = Mptr( B, BiiD, BiiR, Bld, size );
402 }
403 else
404 {
405 PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR,
406 BrocD, ctxt, Bld );
407 if( BisR || ( BmyprocR == BcurrocR ) )
408 Bptr = Mptr( B, BiiR, BiiD, Bld, size );
409 }
410/*
411* Replicate this panel in the process rows or columns spanned by sub( A ): WB
412*/
413 PB_CInV( TYPE, NOCONJG, &WBroc, An, An, Ad0, nbb, Bptr, 0, 0, DBUFB,
414 &Broc, &WB, WBd, &WBfr );
415/*
416* Reuse sub( B ) and/or create vector WC in process columns or rows spanned by
417* sub( A )
418*/
419 PB_CInOutV( TYPE, &WCroc, An, An, Ad0, nbb, one, Bptr, 0, 0, DBUFB,
420 &Broc, &tbeta, &WC, WCd, &WCfr, &WCsum, &WCpbY );
421/*
422* When the input data is first transposed, zero it now for later accumulation
423*/
424 if( notran )
425 PB_Cplapad( TYPE, ALL, NOCONJG, DBUFB[M_], DBUFB[N_], zero, zero,
426 Bptr, 0, 0, DBUFB );
427/*
428* Local matrix-matrix multiply iff I own some data
429*/
430 Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
431 Arow = Ad0[RSRC_]; Acol = Ad0[CSRC_];
432 Amp = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
433 Anq = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
434
435 WCld = WCd[LLD_];
436
437 if( ( Amp > 0 ) && ( Anq > 0 ) )
438 {
439 WBld = WBd[LLD_];
440
441 if( upper )
442 {
443/*
444* sub( A ) is upper triangular
445*/
446 if( LNorRT )
447 {
448 for( l = 0; l < An; l += Alcmb )
449 {
450 lb = An - l; lb = MIN( lb, Alcmb );
451 Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
452 Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
453 if( Alp > 0 )
454 {
455 Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
456 npcol );
457 gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Alp,
458 &nbb, &Alq0, talpha, Mptr( Aptr, 0, Alq, Ald,
459 size ), &Ald, Mptr( WB, 0, Alq, WBld, size ),
460 &WBld, one, WC, &WCld );
461 }
462 PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
463 talpha, Aptr, l, l, Ad0, Mptr( WB, 0, Alq, WBld,
464 size ), WBld, Mptr( WC, Alp, 0, WCld, size ),
465 WCld, PB_Ctztrmm );
466 }
467 }
468 else
469 {
470 for( l = 0; l < An; l += Alcmb )
471 {
472 lb = An - l; lb = MIN( lb, Alcmb );
473 Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
474 Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
475 Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol );
476 if( Alq0 > 0 )
477 gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &nbb,
478 &Alq0, &Alp, talpha, WB, &WBld, Mptr( Aptr, 0,
479 Alq, Ald, size ), &Ald, one, Mptr( WC, 0, Alq,
480 WCld, size ), &WCld );
481 PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
482 talpha, Aptr, l, l, Ad0, Mptr( WB, Alp, 0, WBld,
483 size ), WBld, Mptr( WC, 0, Alq, WCld, size ),
484 WCld, PB_Ctztrmm );
485 }
486 }
487 }
488 else
489 {
490/*
491* sub( A ) is lower triangular
492*/
493 if( LNorRT )
494 {
495 for( l = 0; l < An; l += Alcmb )
496 {
497 lb = An - l; ltmp = l + ( lb = MIN( lb, Alcmb ) );
498 Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
499 Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
500 PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
501 talpha, Aptr, l, l, Ad0, Mptr( WB, 0, Alq, WBld,
502 size ), WBld, Mptr( WC, Alp, 0, WCld, size ),
503 WCld, PB_Ctztrmm );
504 Alp = PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
505 nprow );
506 Alp0 = Amp - Alp;
507 Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
508 npcol );
509 if( Alp0 > 0 )
510 gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &Alp0,
511 &nbb, &Alq0, talpha, Mptr( Aptr, Alp, Alq, Ald,
512 size ), &Ald, Mptr( WB, 0, Alq, WBld, size ),
513 &WBld, one, Mptr( WC, Alp, 0, WCld, size ),
514 &WCld );
515 }
516 }
517 else
518 {
519 for( l = 0; l < An; l += Alcmb )
520 {
521 lb = An - l; ltmp = l + ( lb = MIN( lb, Alcmb ) );
522 Alp = PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
523 Alq = PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
524 PB_Cptrm( TYPE, TYPE, SIDE, UPLO, TRANSA, DIAG, lb, nbb,
525 talpha, Aptr, l, l, Ad0, Mptr( WB, Alp, 0, WBld,
526 size ), WBld, Mptr( WC, 0, Alq, WCld, size ),
527 WCld, PB_Ctztrmm );
528 Alp = PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
529 nprow );
530 Alp0 = Amp - Alp;
531 Alq0 = PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
532 npcol );
533 if( Alq0 > 0 )
534 gemm( C2F_CHAR( &GemmTa ), C2F_CHAR( &GemmTb ), &nbb,
535 &Alq0, &Alp0, talpha, Mptr( WB, Alp, 0, WBld,
536 size ), &WBld, Mptr( Aptr, Alp, Alq, Ald, size ),
537 &Ald, one, Mptr( WC, 0, Alq, WCld, size ),
538 &WCld );
539 }
540 }
541 }
542 }
543 if( WBfr ) free( WB );
544
545 if( LNorRT )
546 {
547/*
548* Combine the partial column results into WC
549*/
550 if( WCsum && ( Amp > 0 ) )
551 gsum2d( ctxt, ROW, &top, Amp, nbb, WC, WCld, myrow, WCd[CSRC_] );
552/*
553* sub( B ) := WC (if necessary)
554*/
555 if( WCpbY )
556 PB_Cpaxpby( TYPE, &conjg, An, nbb, one, WC, 0, 0, WCd, &WCroc,
557 zero, Bptr, 0, 0, DBUFB, &Broc );
558 }
559 else
560 {
561/*
562* Combine the partial row results into WC
563*/
564 if( WCsum && ( Anq > 0 ) )
565 gsum2d( ctxt, COLUMN, &top, nbb, Anq, WC, WCld, WCd[RSRC_],
566 mycol );
567/*
568* sub( B ) := WC (if necessary)
569*/
570 if( WCpbY )
571 PB_Cpaxpby( TYPE, &conjg, nbb, An, one, WC, 0, 0, WCd, &WCroc,
572 zero, Bptr, 0, 0, DBUFB, &Broc );
573 }
574 if( WCfr ) free( WC );
575/*
576* Go to the next contiguous panel if any residing in this process row or column
577*/
578 BnpR -= nbb;
579
580 if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb;
581 }
582/*
583* Go to next or previous process row or column owning some of sub( B )
584*/
585 if( !BisR )
586 p = ( Bfwd ? MModAdd1( p, BnprocsR ) : MModSub1( p, BnprocsR ) );
587 }
588
589 if( TranOp == CCOTRAN ) free( talpha );
590/*
591* End of PB_CptrmmB
592*/
593}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* GEMM_T)()
Definition pblas.h:317
void(* GSUM2D_T)()
Definition pblas.h:286
#define C2F_CHAR(a)
Definition pblas.h:125
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define COMBINE
Definition PBblacs.h:49
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define CCONJG
Definition PBblas.h:21
#define ALL
Definition PBblas.h:50
#define CLEFT
Definition PBblas.h:29
#define CNOCONJG
Definition PBblas.h:19
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define CCOTRAN
Definition PBblas.h:22
#define CFORWARD
Definition PBblas.h:38
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
Int PB_Cfirstnb()
#define MB_
Definition PBtools.h:43
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MModSub(I1, I2, d)
Definition PBtools.h:102
void PB_Cptrm()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
void PB_Ctztrmm()
char * PB_Ctop()
void PB_CInV()
void PB_Cplapad()
void PB_CInOutV()
#define RSRC_
Definition PBtools.h:45
void PB_Cdescset()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define M_
Definition PBtools.h:39
#define MModAdd(I1, I2, d)
Definition PBtools.h:97
#define INB_
Definition PBtools.h:42
#define MModSub1(I, d)
Definition PBtools.h:105
#define CSRC_
Definition PBtools.h:46
Int PB_Clcm()
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
void PB_Cconjg()
void PB_Cpaxpby()
void PB_Cdescribe()
#define N_
Definition PBtools.h:40
#define TYPE
Definition clamov.c:7
Here is the call graph for this function: