ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CScatterV.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_CScatterV( PBTYP_T * TYPE, char * DIRECA, int M, int N,
21  char * A, int IA, int JA, int * DESCA, char * AROC,
22  char * ALPHA, char * B, int IB, int JB, int * DESCB,
23  char * BROC )
24 #else
25 void PB_CScatterV( TYPE, DIRECA, M, N, A, IA, JA, DESCA, AROC,
26  ALPHA, B, IB, JB, DESCB, BROC )
27 /*
28 * .. Scalar Arguments ..
29 */
30  char * ALPHA, * AROC, * BROC, * DIRECA;
31  int IA, IB, JA, JB, M, N;
32  PBTYP_T * TYPE;
33 /*
34 * .. Array Arguments ..
35 */
36  int * DESCA, * DESCB;
37  char * A, * B;
38 #endif
39 {
40 /*
41 * Purpose
42 * =======
43 *
44 * PB_CScatterV disaggregates the one-dimensional submatrix sub( A ) de-
45 * noting A( IA:IA+M-1, JA:JA+N-1 ) into a two-dimensional submatrix
46 * sub( B ) denoting B( IB:IB+M-1, JB:JB+N-1 ) when AROC is equal to
47 * BROC and B( IB:IB+N-1, JB:JB+M-1 ) otherwise:
48 *
49 * sub( B ) := alpha * sub( B ) + sub( A ).
50 *
51 * Notes
52 * =====
53 *
54 * A description vector is associated with each 2D block-cyclicly dis-
55 * tributed matrix. This vector stores the information required to
56 * establish the mapping between a matrix entry and its corresponding
57 * process and memory location.
58 *
59 * In the following comments, the character _ should be read as
60 * "of the distributed matrix". Let A be a generic term for any 2D
61 * block cyclicly distributed matrix. Its description vector is DESC_A:
62 *
63 * NOTATION STORED IN EXPLANATION
64 * ---------------- --------------- ------------------------------------
65 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
66 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
67 * the NPROW x NPCOL BLACS process grid
68 * A is distributed over. The context
69 * itself is global, but the handle
70 * (the integer value) may vary.
71 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
72 * ted matrix A, M_A >= 0.
73 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
74 * buted matrix A, N_A >= 0.
75 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
76 * block of the matrix A, IMB_A > 0.
77 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
78 * left block of the matrix A,
79 * INB_A > 0.
80 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
81 * bute the last M_A-IMB_A rows of A,
82 * MB_A > 0.
83 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
84 * bute the last N_A-INB_A columns of
85 * A, NB_A > 0.
86 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
87 * row of the matrix A is distributed,
88 * NPROW > RSRC_A >= 0.
89 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
90 * first column of A is distributed.
91 * NPCOL > CSRC_A >= 0.
92 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
93 * array storing the local blocks of
94 * the distributed matrix A,
95 * IF( Lc( 1, N_A ) > 0 )
96 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
97 * ELSE
98 * LLD_A >= 1.
99 *
100 * Let K be the number of rows of a matrix A starting at the global in-
101 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
102 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
103 * receive if these K rows were distributed over NPROW processes. If K
104 * is the number of columns of a matrix A starting at the global index
105 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
106 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
107 * these K columns were distributed over NPCOL processes.
108 *
109 * The values of Lr() and Lc() may be determined via a call to the func-
110 * tion PB_Cnumroc:
111 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
112 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
113 *
114 * Arguments
115 * =========
116 *
117 * TYPE (local input) pointer to a PBTYP_T structure
118 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
119 * that contains type information (See pblas.h).
120 *
121 * DIRECA (global input) pointer to CHAR
122 * On entry, DIRECA specifies the direction in which the rows
123 * or columns of sub( A ) should be disaggregated as follows:
124 * DIRECA = 'F' or 'f' forward or increasing,
125 * DIRECA = 'B' or 'b' backward or decreasing.
126 *
127 * M (global input) INTEGER
128 * On entry, M specifies the number of rows of the submatrix
129 * sub( A ). M must be at least zero.
130 *
131 * N (global input) INTEGER
132 * On entry, N specifies the number of columns of the submatrix
133 * sub( A ). N must be at least zero.
134 *
135 * A (local input) pointer to CHAR
136 * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
137 * is DESCA[LLD_], i.e. at least MAX( 1, Lr( M, IA ) ), and,
138 * Ka is at least Lc( N, JA ). Before entry, this array contains
139 * the local entries of the matrix A.
140 *
141 * IA (global input) INTEGER
142 * On entry, IA specifies A's global row index, which points to
143 * the beginning of the submatrix sub( A ).
144 *
145 * JA (global input) INTEGER
146 * On entry, JA specifies A's global column index, which points
147 * to the beginning of the submatrix sub( A ).
148 *
149 * DESCA (global and local input) INTEGER array
150 * On entry, DESCA is an integer array of dimension DLEN_. This
151 * is the array descriptor for the matrix A.
152 *
153 * AROC (global input) pointer to CHAR
154 * On entry, AROC specifies the orientation of the submatrix
155 * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row matrix,
156 * and a column matrix otherwise.
157 *
158 * ALPHA (local input) pointer to CHAR
159 * On entry, ALPHA specifies the scalar alpha.
160 *
161 * B (local output) pointer to CHAR
162 * On entry, A is an array of dimension (LLD_B, Kb), where LLD_B
163 * is DESCB[LLD_], i.e. at least MAX( 1, Lr( M, IB ) ) when AROC
164 * and BROC are equal, and MAX( 1, Lr( N, IB ) ) otherwise, and,
165 * Kb is at least Lc( N, JB ) when AROC and BROC are equal, and
166 * Lc( M, JB ) otherwise. On exit, this array contains the local
167 * entries of the disaggregated submatrix sub( A ).
168 *
169 * IB (global input) INTEGER
170 * On entry, IB specifies B's global row index, which points to
171 * the beginning of the submatrix sub( B ).
172 *
173 * JB (global input) INTEGER
174 * On entry, JB specifies B's global column index, which points
175 * to the beginning of the submatrix sub( B ).
176 *
177 * DESCB (global and local input) INTEGER array
178 * On entry, DESCB is an integer array of dimension DLEN_. This
179 * is the array descriptor for the matrix B.
180 *
181 * BROC (global input) pointer to CHAR
182 * On entry, BROC specifies the orientation of the submatrix
183 * sub( B ). When BROC is 'R' or 'r', sub( B ) is a row matrix,
184 * and a column matrix otherwise.
185 *
186 * -- Written on April 1, 1998 by
187 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
188 *
189 * ---------------------------------------------------------------------
190 */
191 /*
192 * .. Local Scalars ..
193 */
194  char * one;
195  int Afwd, Bbufld, Bcol, Bcurcol, Bcurrow, Bii, Bimb, Bimb1, Binb,
196  Binb1, BisRow, Bjj, Bld, Bm, Bmb, Bmp, Bn, Bnb, Bnnxt, BnnxtL,
197  Bnpre, Bnq, Brow, WAfr, ctxt, kb, mycol, mydist, mydistnb,
198  myrow, nlen, npcol, nprow, offset, size, srcdist, stride,
199  stridenb, tmp;
200  MMADD_T add;
201  MMSHFT_T shft;
202 /*
203 * .. Local Arrays ..
204 */
205  int Bd0[DLEN_], WAd[DLEN_];
206  char * Bptr = NULL, * Bbuf = NULL, * Bbufptr = NULL, * WA = NULL;
207 /* ..
208 * .. Executable Statements ..
209 *
210 */
211 /*
212 * Quick return if possible
213 */
214  if( ( M <= 0 ) || ( N <= 0 ) ) return;
215 /*
216 * Retrieve process grid information
217 */
218  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
219 
220  Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD );
221  BisRow = ( Mupcase( BROC [0] ) == CROW );
222  if( Mupcase( AROC[0] ) == Mupcase( BROC[0] ) ) { Bm = M; Bn = N; }
223  else { Bm = N; Bn = M; }
224 /*
225 * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ...
226 */
227  PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
228  &Brow, &Bcol );
229  Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_]; Bnb = DESCB[NB_];
230  Bimb1 = PB_Cfirstnb( Bm, IB, Bimb, Bmb );
231  Bmp = PB_Cnumroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
232  Binb1 = PB_Cfirstnb( Bn, JB, Binb, Bnb );
233  Bnq = PB_Cnumroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
234  Bld = DESCB[LLD_]; size = TYPE->size; one = TYPE->one;
235  if( ( Bmp > 0 ) && ( Bnq > 0 ) ) Bptr = Mptr( B, Bii, Bjj, Bld, size );
236 
237  if( BisRow )
238  {
239 /*
240 * Compute descriptor Bd0 for sub( B ).
241 */
242  if( Afwd ) { Bcurrow = Brow; }
243  else { Bcurrow = PB_Cindxg2p( Bm-1, Bimb1, Bmb, Brow, Brow, nprow ); }
244  PB_Cdescset( Bd0, Bm, Bn, Bm, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld );
245 /*
246 * Align sub( A ) with sub( B )
247 */
248  PB_CInV( TYPE, NOCONJG, BROC, Bm, Bn, Bd0, Bm, A, IA, JA, DESCA, AROC,
249  &WA, WAd, &WAfr );
250 /*
251 * Disaggregate WA = sub( A )
252 */
253  if( ( Brow == -1 ) || ( nprow == 1 ) )
254  {
255 /*
256 * sub( B ) is replicated
257 */
258  if( Bnq > 0 )
259  TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
260  if( WAfr ) free( WA );
261  return;
262  }
263 
264  if( !( PB_Cspan( Bm, 0, Bimb1, Bmb, Brow, nprow ) ) )
265  {
266 /*
267 * sub( B ) spans only one process row
268 */
269  if( ( myrow == Brow ) && ( Bnq > 0 ) )
270  TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
271  if( WAfr ) free( WA );
272  return;
273  }
274 /*
275 * sub( B ) spans more than one process row
276 */
277  if( Afwd )
278  {
279 /*
280 * sub( B ) is not replicated and spans more than one process row. Forward row
281 * dissagregation starts in the process row where the global row IB resides.
282 */
283  if( ( Bmp > 0 ) && ( Bnq > 0 ) )
284  {
285 /*
286 * Compute how may rows are before and after me (Bnpre and Bnnxt).
287 */
288  Bnpre = PB_Cnpreroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
289  Bnnxt = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
290  nlen = Bmp + Bnnxt;
291 
292  if( Bnpre > 0 )
293  {
294 /*
295 * If I don't own the row IB, then allocate and receive a buffer of length
296 * ( Bmp + Bnnxt ) * Bnq from the previous process row.
297 */
298  Bbufptr = Bbuf = PB_Cmalloc( nlen * Bnq * size );
299  Bbufld = nlen;
300  TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld, MModSub1( myrow,
301  nprow ), mycol );
302  kb = Bmb;
303  }
304  else
305  {
306 /*
307 * Otherwise, reuse WA.
308 */
309  Bbufptr = Bbuf = WA;
310  Bbufld = WAd[LLD_];
311  kb = Bimb1;
312  }
313 /*
314 * Unpack the received data
315 */
316  if( Bnnxt > 0 )
317  {
318 /*
319 * If some rows reside in the process row following mine, then unpack my piece,
320 * sort the buffer and send those Bnnxt rows to the next process row.
321 */
322  add = TYPE->Fmmadd; shft = TYPE->Frshft;
323  mydistnb = ( nprow - MModSub( myrow, Brow, nprow ) - 1 );
324  stride = ( mydistnb *= Bmb ) * size;
325 
326  do
327  {
328  kb = MIN( kb, nlen );
329  add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
330  nlen -= kb;
331  offset = -kb;
332  shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld );
333  Bptr += kb*size;
334  Bbufptr += stride;
335  nlen -= mydistnb;
336  kb = Bmb;
337  } while( nlen > 0 );
338 /*
339 * send buffer of length Bnnxt * Bnq to the next process row.
340 */
341  TYPE->Cgesd2d( ctxt, Bnnxt, Bnq, Bbuf, Bbufld, MModAdd1( myrow,
342  nprow ), mycol );
343  }
344  else
345  {
346 /*
347 * Otherwise, I must be the last process involved in the operation, so no
348 * unpacking is necessary.
349 */
350  TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
351  &Bld );
352  }
353 /*
354 * If I don't own the row IB, then release the dynamically allocated buffer.
355 */
356  if( Bnpre > 0 ) free( Bbuf );
357  }
358  if( WAfr ) free( WA );
359  }
360  else
361  {
362  if( ( Bmp > 0 ) && ( Bnq > 0 ) )
363  {
364 /*
365 * Compute how may rows are before and after me (Bnpre, Bnnxt).
366 */
367  Bnnxt = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
368  BnnxtL = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, Bcurrow, Brow, nprow );
369  Bnnxt = MModSub( Bnnxt, BnnxtL, Bm );
370  Bnpre = ( nlen = Bm - Bnnxt ) - Bmp;
371 
372  if( Bnnxt > 0 )
373  {
374 /*
375 * If I don't own the row IB+M-1, then allocate and receive a buffer of length
376 * ( Bm - Bnnxt ) * Bnq from the next process row.
377 */
378  Bbufptr = Bbuf = PB_Cmalloc( nlen * Bnq * size );
379  Bbufld = nlen;
380  TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld, MModAdd1( myrow,
381  nprow ), mycol );
382  }
383  else
384  {
385 /*
386 * Otherwise, reuse WA.
387 */
388  Bbufptr = Bbuf = WA;
389  Bbufld = WAd[LLD_];
390  }
391 /*
392 * Unpack the received data
393 */
394  if( Bnpre > 0 )
395  {
396 /*
397 * If some rows reside in the process row preceeding mine, then unpack my piece,
398 * sort the buffer and send those Bnpre rows to the previous process row.
399 */
400  add = TYPE->Fmmadd; shft = TYPE->Frshft;
401  mydist = MModSub( Bcurrow, myrow, nprow );
402  srcdist = MModSub( Bcurrow, Brow, nprow );
403  stridenb = ( nprow - mydist - 1 ) * Bmb;
404 
405  if( mydist < srcdist )
406  {
407  tmp = ( Bimb1 + ( srcdist - mydist - 1 ) * Bmb );
408  Bbufptr += tmp * size;
409  nlen -= tmp;
410  kb = Bmb;
411  }
412  else if( mydist == srcdist )
413  {
414  kb = Bimb1;
415  }
416  else
417  {
418  Bbufptr += stridenb * size;
419  nlen -= stridenb;
420  kb = Bmb;
421  }
422 
423  do
424  {
425  kb = MIN( kb, nlen );
426  add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
427  nlen -= kb;
428  offset = -kb;
429  shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld );
430  Bptr += kb*size;
431  Bbufptr += stridenb*size;
432  nlen -= stridenb;
433  kb = Bmb;
434  } while( nlen > 0 );
435 /*
436 * send buffer of length Bnpre * Bnq to the previous process row.
437 */
438  TYPE->Cgesd2d( ctxt, Bnpre, Bnq, Bbuf, Bbufld, MModSub1( myrow,
439  nprow ), mycol );
440  }
441  else
442  {
443 /*
444 * Otherwise, I must be the last process involved in the operation, so no
445 * unpacking is necessary.
446 */
447  TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
448  &Bld );
449  }
450 /*
451 * If I don't own the row IB+M-1, then release the dynamically allocated buffer.
452 */
453  if( Bnnxt > 0 ) free( Bbuf );
454  }
455  if( WAfr ) free( WA );
456  }
457  }
458  else
459  {
460 /*
461 * Compute descriptor Bd0 for sub( B ).
462 */
463  if( Afwd ) { Bcurcol = Bcol; }
464  else { Bcurcol = PB_Cindxg2p( Bn-1, Binb1, Bnb, Bcol, Bcol, npcol ); }
465  PB_Cdescset( Bd0, Bm, Bn, Bimb1, Bn, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld );
466 /*
467 * Align sub( A ) with sub( B )
468 */
469  PB_CInV( TYPE, NOCONJG, BROC, Bm, Bn, Bd0, Bn, A, IA, JA, DESCA, AROC,
470  &WA, WAd, &WAfr );
471 /*
472 * Disaggregate WA = sub( A )
473 */
474  if( ( Bcol == -1 ) || ( npcol == 1 ) )
475  {
476 /*
477 * sub( B ) is replicated
478 */
479  if( Bmp > 0 )
480  TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
481  if( WAfr ) free( WA );
482  return;
483  }
484 
485  if( !( PB_Cspan( Bn, 0, Binb1, Bnb, Bcol, npcol ) ) )
486  {
487 /*
488 * sub( B ) spans only one process column
489 */
490  if( ( mycol == Bcol ) && ( Bmp > 0 ) )
491  TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
492  if( WAfr ) free( WA );
493  return;
494  }
495 /*
496 * sub( B ) spans more than one process column
497 */
498  if( Afwd )
499  {
500 /*
501 * sub( B ) is not replicated and spans more than one process column. Forward
502 * column dissagregation starts in the process column where the global column
503 * JB resides.
504 */
505  if( ( Bmp > 0 ) && ( Bnq > 0 ) )
506  {
507 /*
508 * Compute how may columns are before and after me (Bnpre and Bnnxt).
509 */
510  Bnpre = PB_Cnpreroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
511  Bnnxt = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
512  nlen = Bnq + Bnnxt;
513 
514  if( Bnpre > 0 )
515  {
516 /*
517 * If I don't own the column JB, then allocate and receive a buffer of length
518 * Bmp * ( Bnq + Bnnxt ) from the previous process column.
519 */
520  Bbufptr = Bbuf = PB_Cmalloc( Bmp * nlen * size );
521  Bbufld = Bmp;
522  TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow,
523  MModSub1( mycol, npcol ) );
524  kb = Bnb;
525  }
526  else
527  {
528 /*
529 * Otherwise, reuse WA.
530 */
531  Bbufptr = Bbuf = WA;
532  Bbufld = WAd[LLD_];
533  kb = Binb1;
534  }
535 /*
536 * Unpack the received data
537 */
538  if( Bnnxt > 0 )
539  {
540 /*
541 * If some columns reside in the process column following mine, then unpack my
542 * piece, sort the buffer and send those Bnnxt columns to the next process
543 * column.
544 */
545  add = TYPE->Fmmadd; shft = TYPE->Fcshft;
546  mydistnb = ( npcol - MModSub( mycol, Bcol, npcol ) - 1 );
547  stride = ( mydistnb *= Bnb ) * Bbufld * size;
548 
549  do
550  {
551  kb = MIN( kb, nlen );
552  add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
553  nlen -= kb;
554  offset = -kb;
555  shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld );
556  Bptr += kb*Bld*size;
557  Bbufptr += stride;
558  nlen -= mydistnb;
559  kb = Bnb;
560  } while( nlen > 0 );
561 /*
562 * send buffer of length Bmp * Bnnxt to the next process column.
563 */
564  TYPE->Cgesd2d( ctxt, Bmp, Bnnxt, Bbuf, Bbufld, myrow,
565  MModAdd1( mycol, npcol ) );
566  }
567  else
568  {
569 /*
570 * Otherwise, I must be the last process involved in the operation, so no
571 * unpacking is necessary.
572 */
573  TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
574  &Bld );
575  }
576 /*
577 * If I don't own the column JB, then release the dynamically allocated buffer.
578 */
579  if( Bnpre > 0 ) free( Bbuf );
580  }
581  if( WAfr ) free( WA );
582  }
583  else
584  {
585  if( ( Bmp > 0 ) && ( Bnq > 0 ) )
586  {
587 /*
588 * Compute how may rows are before and after me (Bnpre, Bnnxt).
589 */
590  Bnnxt = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
591  BnnxtL = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, Bcurcol, Bcol, npcol );
592  Bnnxt = MModSub( Bnnxt, BnnxtL, Bn );
593  Bnpre = ( nlen = Bn - Bnnxt ) - Bnq;
594 
595  if( Bnnxt > 0 )
596  {
597 /*
598 * If I don't own the column JB+N-1, then allocate and receive a buffer of
599 * length Bmp * ( Bn - Bnnxt ) from the next process column.
600 */
601  Bbufptr = Bbuf = PB_Cmalloc( Bmp * nlen * size );
602  Bbufld = Bmp;
603  TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow,
604  MModAdd1( mycol, npcol ) );
605  }
606  else
607  {
608 /*
609 * Otherwise, reuse WA.
610 */
611  Bbufptr = Bbuf = WA;
612  Bbufld = WAd[LLD_];
613  }
614 /*
615 * Unpack the received data
616 */
617  if( Bnpre > 0 )
618  {
619 /*
620 * If some columns reside in the process column preceeding mine, then unpack my
621 * piece, sort the buffer and send those Bnpre columns to the previous process
622 * column.
623 */
624  add = TYPE->Fmmadd; shft = TYPE->Fcshft;
625  mydist = MModSub( Bcurcol, mycol, npcol );
626  srcdist = MModSub( Bcurcol, Bcol, npcol );
627  stridenb = ( npcol - mydist - 1 ) * Bnb;
628 
629  if( mydist < srcdist )
630  {
631  tmp = ( Binb1 + ( srcdist - mydist - 1 ) * Bnb );
632  Bbufptr += tmp * Bbufld * size;
633  nlen -= tmp;
634  kb = Bnb;
635  }
636  else if( mydist == srcdist )
637  {
638  kb = Binb1;
639  }
640  else
641  {
642  Bbufptr += stridenb * Bbufld * size;
643  nlen -= stridenb;
644  kb = Bnb;
645  }
646 
647  do
648  {
649  kb = MIN( kb, nlen );
650  add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
651  nlen -= kb;
652  offset = -kb;
653  shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld );
654  Bptr += kb * Bld * size;
655  Bbufptr += stridenb * Bbufld * size;
656  nlen -= stridenb;
657  kb = Bnb;
658  } while( nlen > 0 );
659 /*
660 * send buffer of length Bmp * Bnpre to the previous process column.
661 */
662  TYPE->Cgesd2d( ctxt, Bmp, Bnpre, Bbuf, Bbufld, myrow,
663  MModSub1( mycol, npcol ) );
664  }
665  else
666  {
667 /*
668 * Otherwise, I must be the last process involved in the operation, so no
669 * unpacking is necessary.
670 */
671  TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
672  &Bld );
673  }
674 /*
675 * If I don't own the column JB+N-1, then release the dynamically allocated
676 * buffer.
677 */
678  if( Bnnxt > 0 ) free( Bbuf );
679  }
680  if( WAfr ) free( WA );
681  }
682  }
683 /*
684 * End of PB_CScatterV
685 */
686 }
TYPE
#define TYPE
Definition: clamov.c:7
MB_
#define MB_
Definition: PBtools.h:43
NB_
#define NB_
Definition: PBtools.h:44
PB_Cnpreroc
int PB_Cnpreroc()
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
PB_Cfirstnb
int PB_Cfirstnb()
DLEN_
#define DLEN_
Definition: PBtools.h:48
LLD_
#define LLD_
Definition: PBtools.h:47
PB_CScatterV
void PB_CScatterV(PBTYP_T *TYPE, char *DIRECA, int M, int N, char *A, int IA, int JA, int *DESCA, char *AROC, char *ALPHA, char *B, int IB, int JB, int *DESCB, char *BROC)
Definition: PB_CScatterV.c:25
PB_Cnnxtroc
int PB_Cnnxtroc()
CROW
#define CROW
Definition: PBblacs.h:21
IMB_
#define IMB_
Definition: PBtools.h:41
MModSub
#define MModSub(I1, I2, d)
Definition: PBtools.h:102
PB_Cdescset
void PB_Cdescset()
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
MMADD_T
F_VOID_FCT(* MMADD_T)()
Definition: pblas.h:284
PB_Cindxg2p
int PB_Cindxg2p()
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
CFORWARD
#define CFORWARD
Definition: PBblas.h:38
PB_CInV
void PB_CInV()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
INB_
#define INB_
Definition: PBtools.h:42
PB_Cspan
int PB_Cspan()
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
MMSHFT_T
F_VOID_FCT(* MMSHFT_T)()
Definition: pblas.h:285
CTXT_
#define CTXT_
Definition: PBtools.h:38