ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18
19 #ifdef __STDC__
20 void PB_Cpgeadd( PBTYP_T * TYPE, char * DIRECA, char * DIRECC,
21  char * CONJUG, int M, int N, char * ALPHA, char * A,
22  int IA, int JA, int * DESCA, char * BETA, char * C,
23  int IC, int JC, int * DESCC )
24 #else
25 void PB_Cpgeadd( TYPE, DIRECA, DIRECC, CONJUG, M, N, ALPHA, A, IA, JA,
26  DESCA, BETA, C, IC, JC, DESCC )
27 /*
28 * .. Scalar Arguments ..
29 */
30  char * CONJUG, * DIRECA, * DIRECC;
31  int IA, IC, JA, JC, M, N;
32  char * ALPHA, * BETA;
33  PBTYP_T * TYPE;
34 /*
35 * .. Array Arguments ..
36 */
37  int * DESCA, * DESCC;
38  char * A, * C;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PB_Cpgeadd adds a matrix to another
46 *
47 * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )
48 *
49 * where
50 *
51 * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of
52 *
53 * op( X ) = X or op( X ) = conjg( X ).
54 *
55 * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if CONJUG = 'N',
56 * conjg(A(IA:IA+N-1,JA:JA+M-1)) if CONJUG = 'C'.
57 *
58 * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n
59 * submatrices.
60 *
61 * Notes
62 * =====
63 *
64 * A description vector is associated with each 2D block-cyclicly dis-
65 * tributed matrix. This vector stores the information required to
66 * establish the mapping between a matrix entry and its corresponding
67 * process and memory location.
68 *
69 * In the following comments, the character _ should be read as
70 * "of the distributed matrix". Let A be a generic term for any 2D
71 * block cyclicly distributed matrix. Its description vector is DESC_A:
72 *
73 * NOTATION STORED IN EXPLANATION
74 * ---------------- --------------- ------------------------------------
75 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
76 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
77 * the NPROW x NPCOL BLACS process grid
78 * A is distributed over. The context
79 * itself is global, but the handle
80 * (the integer value) may vary.
81 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
82 * ted matrix A, M_A >= 0.
83 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
84 * buted matrix A, N_A >= 0.
85 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
86 * block of the matrix A, IMB_A > 0.
87 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
88 * left block of the matrix A,
89 * INB_A > 0.
90 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
91 * bute the last M_A-IMB_A rows of A,
92 * MB_A > 0.
93 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
94 * bute the last N_A-INB_A columns of
95 * A, NB_A > 0.
96 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
97 * row of the matrix A is distributed,
98 * NPROW > RSRC_A >= 0.
99 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
100 * first column of A is distributed.
101 * NPCOL > CSRC_A >= 0.
102 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
103 * array storing the local blocks of
104 * the distributed matrix A,
105 * IF( Lc( 1, N_A ) > 0 )
106 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
107 * ELSE
108 * LLD_A >= 1.
109 *
110 * Let K be the number of rows of a matrix A starting at the global in-
111 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
112 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
113 * receive if these K rows were distributed over NPROW processes. If K
114 * is the number of columns of a matrix A starting at the global index
115 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
116 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
117 * these K columns were distributed over NPCOL processes.
118 *
119 * The values of Lr() and Lc() may be determined via a call to the func-
120 * tion PB_Cnumroc:
121 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
122 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
123 *
124 * Arguments
125 * =========
126 *
127 * TYPE (local input) pointer to a PBTYP_T structure
128 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
129 * that contains type information (See pblas.h).
130 *
131 * DIRECA (global input) pointer to CHAR
132 * On entry, DIRECA specifies the direction in which the rows
133 * or columns of sub( A ) should be looped over as follows:
134 * DIRECA = 'F' or 'f' forward or increasing,
135 * DIRECA = 'B' or 'b' backward or decreasing.
136 *
137 * DIRECC (global input) pointer to CHAR
138 * On entry, DIRECC specifies the direction in which the rows
139 * or columns of sub( C ) should be looped over as follows:
140 * DIRECC = 'F' or 'f' forward or increasing,
141 * DIRECC = 'B' or 'b' backward or decreasing.
142 *
143 * CONJUG (global input) pointer to CHAR
144 * On entry, CONJUG specifies whether conjg( sub( A ) ) or
145 * sub( A ) should be added to sub( C ) as follows:
146 * CONJUG = 'N' or 'n':
147 * sub( C ) := beta*sub( C ) + alpha*sub( A )'
148 * otherwise
149 * sub( C ) := beta*sub( C ) + alpha*conjg( sub( A ) )'.
150 *
151 * M (global input) INTEGER
152 * On entry, M specifies the number of rows of the submatrices
153 * sub( A ) and sub( C ). M must be at least zero.
154 *
155 * N (global input) INTEGER
156 * On entry, N specifies the number of columns of the submatri-
157 * ces sub( A ) and sub( C ). N must be at least zero.
158 *
159 * ALPHA (global input) pointer to CHAR
160 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
161 * supplied as zero then the local entries of the array A
162 * corresponding to the entries of the submatrix sub( A ) need
163 * not be set on input.
164 *
165 * A (local input) pointer to CHAR
166 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
167 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
168 * the local entries of the matrix A.
169 *
170 * IA (global input) INTEGER
171 * On entry, IA specifies A's global row index, which points to
172 * the beginning of the submatrix sub( A ).
173 *
174 * JA (global input) INTEGER
175 * On entry, JA specifies A's global column index, which points
176 * to the beginning of the submatrix sub( A ).
177 *
178 * DESCA (global and local input) INTEGER array
179 * On entry, DESCA is an integer array of dimension DLEN_. This
180 * is the array descriptor for the matrix A.
181 *
182 * BETA (global input) pointer to CHAR
183 * On entry, BETA specifies the scalar beta. When BETA is
184 * supplied as zero then the local entries of the array C
185 * corresponding to the entries of the submatrix sub( C ) need
186 * not be set on input.
187 *
188 * C (local input/local output) pointer to CHAR
189 * On entry, C is an array of dimension (LLD_C, Kc), where Kc is
190 * at least Lc( 1, JC+N-1 ). Before entry, this array contains
191 * the local entries of the matrix C.
192 * On exit, the entries of this array corresponding to the local
193 * entries of the submatrix sub( C ) are overwritten by the
194 * local entries of the m by n updated submatrix.
195 *
196 * IC (global input) INTEGER
197 * On entry, IC specifies C's global row index, which points to
198 * the beginning of the submatrix sub( C ).
199 *
200 * JC (global input) INTEGER
201 * On entry, JC specifies C's global column index, which points
202 * to the beginning of the submatrix sub( C ).
203 *
204 * DESCC (global and local input) INTEGER array
205 * On entry, DESCC is an integer array of dimension DLEN_. This
206 * is the array descriptor for the matrix C.
207 *
208 * -- Written on April 1, 1998 by
209 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
210 *
211 * ---------------------------------------------------------------------
212 */
213 /*
214 * .. Local Scalars ..
215 */
216  char ACroc, * one, * talpha, * tbeta, * zero;
217  int ACmyprocD, ACmyprocR, ACnD, ACnR, ACnprocsD, ACnprocsR,
218  Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR, AinbD,
219  AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AnbD, AnbR, AnpD,
220  AnpR, Aoff, ArocD, ArocR, AsrcR, Cbufld, CcurrocR, Cfr,
221  Cfwd, CiD, CiR, CiiD, CiiR, CinbD, CinbR, Cinb1D, Cinb1R,
222  CisR, Ckk, Cld, CnbD, CnbR, CnpD, CnpR, Coff, CrocD, CrocR,
223  CsrcR, ctxt, k, kb, kbb, lcmb, maxp, maxpm1, maxpq, maxq,
224  mycol, myrow, npcol, npq, nprow, ncpq, nrpq, p=0, q=0,
225  row2row, size, tmp;
226  PB_VM_T VM;
227 /*
228 * .. Local Arrays ..
229 */
230  int DBUFA[DLEN_], DBUFC[DLEN_];
231  char * Abuf = NULL, * Cbuf = NULL;
232 /* ..
233 * .. Executable Statements ..
234 *
235 */
236 /*
237 * Retrieve process grid information
238 */
239  Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
240 /*
241 * Loop over the rows of sub( C ) when M <= N, and the columns of sub( C )
242 * otherwise.
243 */
244  row2row = ( ( M <= N ) || ( npcol == 1 ) || ( DESCA[CSRC_] == -1 ) );
245
246  if( row2row )
247  {
248  AinbR = DESCA[IMB_]; AnbR = DESCA[MB_]; AsrcR = DESCA[RSRC_];
249  CinbR = DESCC[IMB_]; CnbR = DESCC[MB_]; CsrcR = DESCC[RSRC_];
250 /*
251 * If sub( A ) and sub( C ) span only one process row, then there is no need
252 * to pack the data.
253 */
254  if( !( PB_Cspan( M, IA, AinbR, AnbR, AsrcR, nprow ) ) &&
255  !( PB_Cspan( M, IC, CinbR, CnbR, CsrcR, nprow ) ) )
256  {
257  PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, ROW, BETA,
258  C, IC, JC, DESCC, ROW );
259  return;
260  }
261 /*
262 * Compute local information for sub( A ) and sub( C )
263 */
264  ACnR = M; ACnD = N;
265  ACmyprocR = myrow; ACnprocsR = nprow;
266  ACmyprocD = mycol; ACnprocsD = npcol; ACroc = CROW;
267  AiR = IA; AiD = JA;
268  AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_];
269  PB_Cinfog2l( IA, JA, DESCA, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD,
270  &AiiR, &AiiD, &ArocR, &ArocD );
271  CiR = IC; CiD = JC;
272  CinbD = DESCC[INB_]; CnbD = DESCC[NB_]; Cld = DESCC[LLD_];
273  PB_Cinfog2l( IC, JC, DESCC, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD,
274  &CiiR, &CiiD, &CrocR, &CrocD );
275  }
276  else
277  {
278  AinbR = DESCA[INB_]; AnbR = DESCA[NB_]; AsrcR = DESCA[CSRC_];
279  CinbR = DESCC[INB_]; CnbR = DESCC[NB_]; CsrcR = DESCC[CSRC_];
280 /*
281 * If sub( A ) and sub( C ) span only one process column, then there is no need
282 * to pack the data.
283 */
284  if( !( PB_Cspan( N, JA, AinbR, AnbR, AsrcR, npcol ) ) &&
285  !( PB_Cspan( N, JC, CinbR, CnbR, CsrcR, npcol ) ) )
286  {
287  PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, COLUMN, BETA,
288  C, IC, JC, DESCC, COLUMN );
289  return;
290  }
291 /*
292 * Compute local information for sub( A ) and sub( C )
293 */
294  ACnR = N; ACnD = M;
295  ACmyprocR = mycol; ACnprocsR = npcol;
296  ACmyprocD = myrow; ACnprocsD = nprow; ACroc = CCOLUMN;
297  AiR = JA; AiD = IA;
298  AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_];
299  PB_Cinfog2l( IA, JA, DESCA, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR,
300  &AiiD, &AiiR, &ArocD, &ArocR );
301  CiR = JC; CiD = IC;
302  CinbD = DESCC[IMB_]; CnbD = DESCC[MB_]; Cld = DESCC[LLD_];
303  PB_Cinfog2l( IC, JC, DESCC, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR,
304  &CiiD, &CiiR, &CrocD, &CrocR );
305  }
306
307  size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
308  kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) );
309
310  Ainb1D = PB_Cfirstnb( ACnD, AiD, AinbD, AnbD );
311  AnpD = PB_Cnumroc( ACnD, 0, Ainb1D, AnbD, ACmyprocD, ArocD, ACnprocsD );
312  Ainb1R = PB_Cfirstnb( ACnR, AiR, AinbR, AnbR );
313  AisR = ( ( AsrcR < 0 ) || ( ACnprocsR == 1 ) );
314
315  Cinb1D = PB_Cfirstnb( ACnD, CiD, CinbD, CnbD );
316  CnpD = PB_Cnumroc( ACnD, 0, Cinb1D, CnbD, ACmyprocD, CrocD, ACnprocsD );
317  Cinb1R = PB_Cfirstnb( ACnR, CiR, CinbR, CnbR );
318  CisR = ( ( CsrcR < 0 ) || ( ACnprocsR == 1 ) );
319
320  lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : ACnprocsR ) ) * CnbR,
321  ( maxq = ( AisR ? 1 : ACnprocsR ) ) * AnbR );
322
323  Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD );
324  Cfwd = ( Mupcase( DIRECC[0] ) == CFORWARD );
325 /*
326 * When sub( A ) is not replicated and backward pass on sub( A ), find the
327 * virtual process q owning the last row or column of sub( A ).
328 */
329  if( !( AisR ) && !( Afwd ) )
330  {
331  tmp = PB_Cindxg2p( ACnR-1, Ainb1R, AnbR, ArocR, ArocR, ACnprocsR );
332  q = MModSub( tmp, ArocR, ACnprocsR );
333  }
334 /*
335 * When sub( C ) is not replicated and backward pass on sub( C ), find the
336 * virtual process p owning the last row or column of sub( C ).
337 */
338  if( !( CisR ) && !( Cfwd ) )
339  {
340  tmp = PB_Cindxg2p( ACnR-1, Cinb1R, CnbR, CrocR, CrocR, ACnprocsR );
341  p = MModSub( tmp, CrocR, ACnprocsR );
342  }
343 /*
344 * Loop over the processes of the virtual grid
345 */
346  maxpm1 = maxp - 1; maxpq = maxp * maxq;
347
348  for( k = 0; k < maxpq; k++ )
349  {
350  AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, ACnprocsR ) );
351  CcurrocR = ( CisR ? -1 : MModAdd( CrocR, p, ACnprocsR ) );
352
353  if( ( AisR || ( ACmyprocR == AcurrocR ) ) ||
354  ( CisR || ( ACmyprocR == CcurrocR ) ) )
355  {
356  Ckk = CiiR; Akk = AiiR;
357 /*
358 * Initialize local virtual matrix in process (p,q)
359 */
360  AnpR = PB_Cnumroc( ACnR, 0, Ainb1R, AnbR, AcurrocR, ArocR, ACnprocsR );
361  CnpR = PB_Cnumroc( ACnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, ACnprocsR );
362  PB_CVMinit( &VM, 0, CnpR, AnpR, Cinb1R, Ainb1R, CnbR, AnbR, p, q,
363  maxp, maxq, lcmb );
364 /*
365 * Figure out how many diagonal entries in this new virtual process (npq).
366 */
367  npq = PB_CVMnpq( &VM );
368 /*
369 * Re-adjust the number of rows or columns to be (un)packed, in order to average
370 * the message sizes.
371 */
372  if( npq ) kbb = npq / ( ( npq - 1 ) / kb + 1 );
373
374  if( row2row )
375  {
376  while( npq )
377  {
378  kbb = MIN( kbb, npq );
379 /*
380 * Find out how many rows of sub( A ) and sub( C ) are contiguous
381 */
382  PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff );
383 /*
384 * Compute the descriptor DBUFA for the buffer that will contained the packed
385 * rows of sub( A ).
386 */
387  if( ( Afr = ( ncpq < kbb ) ) != 0 )
388  {
389 /*
390 * If rows of sub( A ) are not contiguous, then allocate the buffer and pack
391 * the kbb rows of sub( A ).
392 */
393  Abufld = kbb;
394  if( AisR || ( ACmyprocR == AcurrocR ) )
395  {
396  Abuf = PB_Cmalloc( AnpD * kbb * size );
397  PB_CVMpack( TYPE, &VM, COLUMN, &ACroc, PACKING, NOTRAN,
398  kbb, AnpD, one, Mptr( A, Akk, AiiD, Ald,
399  size ), Ald, zero, Abuf, Abufld );
400  }
401  }
402  else
403  {
404 /*
405 * Otherwise, re-use sub( A ) directly.
406 */
407  Abufld = Ald;
408  if( AisR || ( ACmyprocR == AcurrocR ) )
409  Abuf = Mptr( A, Akk+Aoff, AiiD, Ald, size );
410  }
411  PB_Cdescset( DBUFA, kbb, ACnD, kbb, Ainb1D, kbb, AnbD, AcurrocR,
412  ArocD, ctxt, Abufld );
413 /*
414 * Compute the descriptor DBUFC for the buffer that will contained the packed
415 * rows of sub( C ). Allocate it.
416 */
417  if( ( Cfr = ( nrpq < kbb ) ) != 0 )
418  {
419 /*
420 * If rows of sub( C ) are not contiguous, then allocate receiving buffer.
421 */
422  Cbufld = kbb; talpha = one; tbeta = zero;
423  if( CisR || ( ACmyprocR == CcurrocR ) )
424  Cbuf = PB_Cmalloc( CnpD * kbb * size );
425  }
426  else
427  {
428 /*
429 * Otherwise, re-use sub( C ) directly.
430 */
431  Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
432  if( CisR || ( ACmyprocR == CcurrocR ) )
433  Cbuf = Mptr( C, Ckk+Coff, CiiD, Cld, size );
434  }
435  PB_Cdescset( DBUFC, kbb, ACnD, kbb, Cinb1D, kbb, CnbD, CcurrocR,
436  CrocD, ctxt, Cbufld );
437 /*
438 * Add the one-dimensional buffer Abuf into Cbuf.
439 */
440  PB_Cpaxpby( TYPE, CONJUG, kbb, ACnD, talpha, Abuf, 0, 0, DBUFA,
441  &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc );
442 /*
443 * Release the buffer containing the packed rows of sub( A )
444 */
445  if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) )
446  if( Abuf ) free( Abuf );
447 /*
448 * Unpack the kbb rows of sub( C ) and release the buffer containing them.
449 */
450  if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) )
451  {
452  PB_CVMpack( TYPE, &VM, ROW, &ACroc, UNPACKING, NOTRAN, kbb,
453  CnpD, BETA, Mptr( C, Ckk, CiiD, Cld, size ), Cld,
454  ALPHA, Cbuf, Cbufld );
455  if( Cbuf ) free( Cbuf );
456  }
457 /*
458 * Update the local row indexes of sub( A ) and sub( C )
459 */
460  PB_CVMupdate( &VM, kbb, &Ckk, &Akk );
461  npq -= kbb;
462  }
463  }
464  else
465  {
466  while( npq )
467  {
468  kbb = MIN( kbb, npq );
469 /*
470 * Find out how many columns of sub( A ) and sub( C ) are contiguous
471 */
472  PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff );
473 /*
474 * Compute the descriptor DBUFA for the buffer that will contained the packed
475 * columns of sub( A ).
476 */
477  if( ( Afr = ( ncpq < kbb ) ) != 0 )
478  {
479 /*
480 * If columns of sub( A ) are not contiguous, then allocate the buffer and
481 * pack the kbb columns of sub( A ).
482 */
483  Abufld = MAX( 1, AnpD );
484  if( AisR || ( ACmyprocR == AcurrocR ) )
485  {
486  Abuf = PB_Cmalloc( AnpD * kbb * size );
487  PB_CVMpack( TYPE, &VM, COLUMN, &ACroc, PACKING, NOTRAN,
488  kbb, AnpD, one, Mptr( A, AiiD, Akk, Ald,
489  size ), Ald, zero, Abuf, Abufld );
490  }
491  }
492  else
493  {
494 /*
495 * Otherwise, re-use sub( A ) directly.
496 */
497  Abufld = Ald;
498  if( AisR || ( ACmyprocR == AcurrocR ) )
499  Abuf = Mptr( A, AiiD, Akk+Aoff, Ald, size );
500  }
501  PB_Cdescset( DBUFA, ACnD, kbb, Ainb1D, kbb, AnbD, kbb, ArocD,
502  AcurrocR, ctxt, Abufld );
503 /*
504 * Compute the descriptor DBUFC for the buffer that will contained the packed
505 * columns of sub( C ). Allocate it.
506 */
507  if( ( Cfr = ( nrpq < kbb ) ) != 0 )
508  {
509 /*
510 * If columns of sub( C ) are not contiguous, then allocate receiving buffer.
511 */
512  Cbufld = MAX( 1, CnpD ); talpha = one; tbeta = zero;
513  if( CisR || ( ACmyprocR == CcurrocR ) )
514  Cbuf = PB_Cmalloc( CnpD * kbb * size );
515  }
516  else
517  {
518  Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
519  if( CisR || ( ACmyprocR == CcurrocR ) )
520  Cbuf = Mptr( C, CiiD, Ckk+Coff, Cld, size );
521  }
522  PB_Cdescset( DBUFC, ACnD, kbb, Cinb1D, kbb, CnbD, kbb, CrocD,
523  CcurrocR, ctxt, Cbufld );
524 /*
525 * Add the one-dimensional buffer Abuf into Cbuf.
526 */
527  PB_Cpaxpby( TYPE, CONJUG, ACnD, kbb, talpha, Abuf, 0, 0, DBUFA,
528  &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc );
529 /*
530 * Release the buffer containing the packed columns of sub( A )
531 */
532  if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) )
533  if( Abuf ) free( Abuf );
534 /*
535 * Unpack the kbb columns of sub( C ) and release the buffer containing them.
536 */
537  if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) )
538  {
539  PB_CVMpack( TYPE, &VM, ROW, &ACroc, UNPACKING, NOTRAN, kbb,
540  CnpD, BETA, Mptr( C, CiiD, Ckk, Cld, size ), Cld,
541  ALPHA, Cbuf, Cbufld );
542  if( Cbuf ) free( Cbuf );
543  }
544 /*
545 * Update the local row index of sub( A ) and the local column index of sub( C )
546 */
547  PB_CVMupdate( &VM, kbb, &Ckk, &Akk );
548  npq -= kbb;
549  }
550  }
551  }
552 /*
553 * Go to the next virtual process (p,q)
554 */
555  if( ( Cfwd && ( p == maxpm1 ) ) || ( !( Cfwd ) && ( p == 0 ) ) )
556  q = ( Afwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) );
557  p = ( Cfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) );
558  }
559 /*
560 * End of PB_Cpgeadd
561 */
562 }
TYPE
#define TYPE
Definition: clamov.c:7
ROW
#define ROW
Definition: PBblacs.h:46
MB_
#define MB_
Definition: PBtools.h:43
PB_Cpaxpby
void PB_Cpaxpby()
PB_CVMcontig
void PB_CVMcontig()
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
CSRC_
#define CSRC_
Definition: PBtools.h:46
UNPACKING
#define UNPACKING
Definition: PBtools.h:54
void PB_Cpgeadd(PBTYP_T *TYPE, char *DIRECA, char *DIRECC, char *CONJUG, int M, int N, char *ALPHA, char *A, int IA, int JA, int *DESCA, char *BETA, char *C, int IC, int JC, int *DESCC)
PB_Cfirstnb
int PB_Cfirstnb()
DLEN_
#define DLEN_
Definition: PBtools.h:48
NOTRAN
#define NOTRAN
Definition: PBblas.h:44
LLD_
#define LLD_
Definition: PBtools.h:47
PB_CVMinit
void PB_CVMinit()
PB_CVMpack
int PB_CVMpack()
#define MModAdd(I1, I2, d)
Definition: PBtools.h:97
CROW
#define CROW
Definition: PBblacs.h:21
IMB_
#define IMB_
Definition: PBtools.h:41
MModSub
#define MModSub(I1, I2, d)
Definition: PBtools.h:102
pilaenv_
int pilaenv_()
PB_Cdescset
void PB_Cdescset()
PB_CVMupdate
void PB_CVMupdate()
Definition: PBtools.h:100
PB_Cindxg2p
int PB_Cindxg2p()
RSRC_
#define RSRC_
Definition: PBtools.h:45
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
CFORWARD
#define CFORWARD
Definition: PBblas.h:38
PB_VM_T
Definition: pblas.h:432
PB_CVMnpq
int PB_CVMnpq()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
CCOLUMN
#define CCOLUMN
Definition: PBblacs.h:20
PACKING
#define PACKING
Definition: PBtools.h:53
INB_
#define INB_
Definition: PBtools.h:42
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
PB_Cspan
int PB_Cspan()
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
MAX
#define MAX(a_, b_)
Definition: PBtools.h:77
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38
PB_Clcm
int PB_Clcm()