SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CGatherV.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CGatherV( PBTYP_T * TYPE, char * ALLOC, char * DIRECA, Int M, Int N,
21 char * A, Int IA, Int JA, Int * DESCA, char * AROC,
22 char * * B, Int * DESCB, Int * BFREE )
23#else
24void PB_CGatherV( TYPE, ALLOC, DIRECA, M, N, A, IA, JA, DESCA, AROC, B,
25 DESCB, BFREE )
26/*
27* .. Scalar Arguments ..
28*/
29 char * ALLOC, * AROC, * DIRECA;
30 Int * BFREE, IA, JA, M, N;
31 PBTYP_T * TYPE;
32/*
33* .. Array Arguments ..
34*/
35 Int * DESCA, * DESCB;
36 char * A, * * B;
37#endif
38{
39/*
40* Purpose
41* =======
42*
43* PB_CGatherV aggregates a submatrix sub( A ) = A(IA:IA+M-1, JA:JA+N-1)
44* into a one-dimensional multivector B. The submatrix sub( A ) is spe-
45* cified on input to this routine that is reused whenever possible. On
46* return, the one-dimensional multivector is specified by a pointer to
47* some data, a descriptor array describing its layout and a logical va-
48* lue indicating if this local piece of data has been dynamically allo-
49* cated by this function.
50*
51* Notes
52* =====
53*
54* A description vector is associated with each 2D block-cyclicly dis-
55* tributed matrix. This vector stores the information required to
56* establish the mapping between a matrix entry and its corresponding
57* process and memory location.
58*
59* In the following comments, the character _ should be read as
60* "of the distributed matrix". Let A be a generic term for any 2D
61* block cyclicly distributed matrix. Its description vector is DESC_A:
62*
63* NOTATION STORED IN EXPLANATION
64* ---------------- --------------- ------------------------------------
65* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
66* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
67* the NPROW x NPCOL BLACS process grid
68* A is distributed over. The context
69* itself is global, but the handle
70* (the integer value) may vary.
71* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
72* ted matrix A, M_A >= 0.
73* N_A (global) DESCA[ N_ ] The number of columns in the distri-
74* buted matrix A, N_A >= 0.
75* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
76* block of the matrix A, IMB_A > 0.
77* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
78* left block of the matrix A,
79* INB_A > 0.
80* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
81* bute the last M_A-IMB_A rows of A,
82* MB_A > 0.
83* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
84* bute the last N_A-INB_A columns of
85* A, NB_A > 0.
86* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
87* row of the matrix A is distributed,
88* NPROW > RSRC_A >= 0.
89* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
90* first column of A is distributed.
91* NPCOL > CSRC_A >= 0.
92* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
93* array storing the local blocks of
94* the distributed matrix A,
95* IF( Lc( 1, N_A ) > 0 )
96* LLD_A >= MAX( 1, Lr( 1, M_A ) )
97* ELSE
98* LLD_A >= 1.
99*
100* Let K be the number of rows of a matrix A starting at the global in-
101* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
102* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
103* receive if these K rows were distributed over NPROW processes. If K
104* is the number of columns of a matrix A starting at the global index
105* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
106* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
107* these K columns were distributed over NPCOL processes.
108*
109* The values of Lr() and Lc() may be determined via a call to the func-
110* tion PB_Cnumroc:
111* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
112* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
113*
114* Arguments
115* =========
116*
117* TYPE (local input) pointer to a PBTYP_T structure
118* On entry, TYPE is a pointer to a structure of type PBTYP_T,
119* that contains type information (See pblas.h).
120*
121* ALLOC (global input) pointer to CHAR
122* On entry, ALLOC specifies if data should be allocated even
123* when unnecessary as follows:
124* ALLOC = 'A' or 'a' data allocation is enforced,
125* ALLOC = 'R' or 'r' data is reused when possible.
126*
127* DIRECA (global input) pointer to CHAR
128* On entry, DIRECA specifies the direction in which the rows
129* or columns of sub( A ) should be aggregated as follows:
130* DIRECA = 'F' or 'f' forward or increasing,
131* DIRECA = 'B' or 'b' backward or decreasing.
132*
133* M (global input) INTEGER
134* On entry, M specifies the number of rows of the submatrix
135* sub( A ). M must be at least zero.
136*
137* N (global input) INTEGER
138* On entry, N specifies the number of columns of the submatrix
139* sub( A ). N must be at least zero.
140*
141* A (local input) pointer to CHAR
142* On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
143* is DESCA[LLD_], i.e. at least MAX( 1, Lr( M, IA ) ), and,
144* Ka is at least Lc( N, JA ). Before entry, this array contains
145* the local entries of the matrix A.
146*
147* IA (global input) INTEGER
148* On entry, IA specifies A's global row index, which points to
149* the beginning of the submatrix sub( A ).
150*
151* JA (global input) INTEGER
152* On entry, JA specifies A's global column index, which points
153* to the beginning of the submatrix sub( A ).
154*
155* DESCA (global and local input) INTEGER array
156* On entry, DESCA is an integer array of dimension DLEN_. This
157* is the array descriptor for the matrix A.
158*
159* AROC (global input) pointer to CHAR
160* On entry, AROC specifies the orientation of the submatrix
161* sub( A ). When AROC is 'R' or 'r', sub( A ) is a row matrix,
162* and a column matrix otherwise.
163*
164* B (local output) pointer to pointer to CHAR
165* On exit, * B is an array containing the aggregated submatrix
166* sub( A ).
167*
168* DESCB (global and local output) INTEGER array
169* On exit, DESCB is a descriptor array of dimension DLEN_ des-
170* cribing the data layout of the data pointed to by * B.
171*
172* BFREE (local output) INTEGER
173* On exit, BFREE specifies if it has been possible to reuse
174* the submatrix sub( A ), i.e., if some dynamic memory was al-
175* located for the data pointed to by *B or not. When BFREE is
176* zero, no dynamic memory was allocated. Otherwise, some dyna-
177* mic memory was allocated by this function that one MUST re-
178* lease as soon as possible.
179*
180* -- Written on April 1, 1998 by
181* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
182*
183* ---------------------------------------------------------------------
184*/
185/*
186* .. Local Scalars ..
187*/
188 char * one, * zero;
189 Int Afwd, AggRow, AiiD, AiiR, Ainb1D, Ainb1R, Ald, AmyprocD,
190 AmyprocR, AnR, AnbD, AnbR, AnnxtL, AnnxtR, AnpD, AnpR, AnpreR,
191 AnprocsR, ArocR, AsrcD, AsrcR, Bld, Bsrc_, ctxt, k, kb, kblks,
192 kn, ktmp, mycol, mydist, mydistnb, myrow, nlen, npcol, nprow,
193 offset, size, srcdist;
194 MMADD_T add;
195 MMSHFT_T shft;
196/*
197* .. Local Arrays ..
198*/
199 char * Aptr = NULL, * Bptr = NULL;
200/* ..
201* .. Executable Statements ..
202*
203*/
204/*
205* Initialize the output parameters to a default value
206*/
207 *BFREE = 0;
208 *B = NULL;
209/*
210* Quick return if possible
211*/
212 if( ( M <= 0 ) || ( N <= 0 ) )
213 {
214 PB_Cdescset( DESCB, M, N, DESCA[IMB_], DESCA[INB_], DESCA[MB_],
215 DESCA[NB_], DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], 1 );
216 return;
217 }
218/*
219* Retrieve process grid information
220*/
221 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
222
223 if( ( AggRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 )
224 {
225/*
226* Accumulate rows of sub( A )
227*/
228 AnbR = DESCA[MB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_];
229 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiR, &AiiD,
230 &AsrcR, &AsrcD );
231 Ainb1D = PB_Cfirstnb( N, JA, DESCA[INB_], AnbD );
232 AnpD = PB_Cnumroc( N, 0, Ainb1D, AnbD, mycol, AsrcD, npcol );
233/*
234* If sub( A ) is either replicated or spans only one process row, no data needs
235* to be exchanged by the processes, the operation is purely local.
236*/
237 if( !( PB_Cspan( M, IA, DESCA[IMB_], AnbR, AsrcR, nprow ) ) )
238 {
239 if( Mupcase( ALLOC[0] ) == CREUSE )
240 {
241/*
242* sub( A ) can be reused
243*/
244 if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
245 {
246/*
247* If I own some entries of sub( A ), set *B
248*/
249 Bld = Ald;
250 *B = Mptr( A, AiiR, AiiD, Ald, TYPE->size );
251 }
252 else { Bld = 1; }
253 }
254 else
255 {
256/*
257* sub( A ) cannot be reused, make a copy of it.
258*/
259 if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
260 {
261/*
262* If I own some entries of sub( A ), allocate space for the copy, and copy the
263* data.
264*/
265 Bld = M;
266 if( AnpD > 0 )
267 {
268 size = TYPE->size;
269 *B = PB_Cmalloc( AnpD * M * size );
270 *BFREE = 1;
271 TYPE->Fmmadd( &M, &AnpD, TYPE->one, Mptr( A, AiiR, AiiD, Ald,
272 size ), &Ald, TYPE->zero, *B, &Bld );
273 }
274 }
275 else { Bld = 1; }
276 }
277/*
278* Describe the resulting operand
279*/
280 PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt,
281 Bld );
282 return;
283 }
284
285 AnR = M; Bsrc_ = RSRC_;
286 AmyprocR = myrow; AmyprocD = mycol; AnprocsR = nprow;
287 Ainb1R = PB_Cfirstnb( M, IA, DESCA[IMB_], AnbR );
288 AnpR = PB_Cnumroc( M, 0, Ainb1R, AnbR, myrow, AsrcR, nprow );
289 }
290 else
291 {
292/*
293* Accumulate columns of sub( A )
294*/
295 AnbD = DESCA[MB_ ]; AnbR = DESCA[NB_ ]; Ald = DESCA[LLD_];
296 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiD, &AiiR,
297 &AsrcD, &AsrcR );
298 Ainb1D = PB_Cfirstnb( M, IA, DESCA[IMB_], AnbD );
299 AnpD = PB_Cnumroc( M, 0, Ainb1D, AnbD, myrow, AsrcD, nprow );
300/*
301* If sub( A ) is either replicated or spans only one process column, no data
302* needs to be exchanged by the processes, the operation is purely local.
303*/
304 if( !( PB_Cspan( N, JA, DESCA[INB_], AnbR, AsrcR, npcol ) ) )
305 {
306 if( Mupcase( ALLOC[0] ) == CREUSE )
307 {
308/*
309* sub( A ) can be reused
310*/
311 Bld = Ald;
312 if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
313/*
314* If I own some entries of sub( A ), set *B
315*/
316 *B = Mptr( A, AiiD, AiiR, Ald, TYPE->size );
317 }
318 else
319 {
320/*
321* sub( A ) cannot be reused, make a copy of it.
322*/
323 Bld = MAX( 1, AnpD );
324 if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
325 {
326/*
327* If I own some entries of sub( A ), allocate space for the copy, and copy the
328* data.
329*/
330 if( AnpD > 0 )
331 {
332 size = TYPE->size;
333 *B = PB_Cmalloc( AnpD * N * size );
334 *BFREE = 1;
335 TYPE->Fmmadd( &AnpD, &N, TYPE->one, Mptr( A, AiiD, AiiR, Ald,
336 size ), &Ald, TYPE->zero, *B, &Bld );
337 }
338 }
339 }
340/*
341* Describe the resulting operand
342*/
343 PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt,
344 Bld );
345 return;
346 }
347
348 AnR = N; Bsrc_ = CSRC_;
349 AmyprocR = mycol; AmyprocD = myrow; AnprocsR = npcol;
350 Ainb1R = PB_Cfirstnb( N, JA, DESCA[INB_], AnbR );
351 AnpR = PB_Cnumroc( N, 0, Ainb1R, AnbR, mycol, AsrcR, npcol );
352 }
353/*
354* sub( A ) is not replicated and spans more than one process row or column.
355* Forward row (resp. column) accumulation will leave the resulting operand in
356* the process(es) where the global row IA+M-1 (resp. global column JA+N-1)
357* resides.
358*/
359 if( ( Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ) ) != 0 )
360 {
361 if( ( AnpD > 0 ) && ( AnpR > 0 ) )
362 {
363/*
364* Compute how may rows or columns are before me -> AnpreR
365*/
366 AnpreR = PB_Cnpreroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR,
367 AnprocsR );
368
369 if( AnpreR == 0 )
370 {
371/*
372* If zero rows or columns are before me, I must be the source, so send my piece
373* to the process after me in the grid.
374*/
375 if( AggRow )
376 {
377 TYPE->Cgesd2d( ctxt, AnpR, AnpD, Mptr( A, AiiR, AiiD, Ald,
378 TYPE->size ), Ald, MModAdd1( AmyprocR, AnprocsR ),
379 AmyprocD );
380 }
381 else
382 {
383 TYPE->Cgesd2d( ctxt, AnpD, AnpR, Mptr( A, AiiD, AiiR,
384 Ald, TYPE->size ), Ald, AmyprocD,
385 MModAdd1( AmyprocR, AnprocsR ) );
386 }
387 }
388 else if( AnpreR > 0 )
389 {
390/*
391* Otherwise, allocate some space for the rows or columns I have and the ones
392* globally preceeding the ones I have, that I am about to receive.
393*/
394 size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
395 add = TYPE->Fmmadd;
396 *B = Bptr = PB_Cmalloc( ( AnpreR + AnpR ) * AnpD * size );
397 nlen = AnpreR;
398 mydistnb = MModSub( AmyprocR, AsrcR, AnprocsR ) * AnbR;
399 kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ?
400 ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 );
401 offset = kblks * AnbR;
402 kn = Ainb1R + mydistnb - AnbR;
403 kn = MIN( kn, AnpreR ) +
404 ( MAX( 1, kblks ) - 1 ) * mydistnb;
405 if( AggRow )
406 {
407 shft = TYPE->Frshft;
408 Aptr = Mptr( A, AiiR, AiiD, Ald, size );
409 Bld = AnpreR + AnpR;
410/*
411* Receive the rows globally preceeding the ones I have
412*/
413 TYPE->Cgerv2d( ctxt, AnpreR, AnpD, *B, Bld, MModSub1( AmyprocR,
414 AnprocsR ), AmyprocD );
415/*
416* Sort the received buffer and insert at the correct place the rows of sub( A )
417* I own (from bottom to top).
418*/
419 if( ( ( AnpR - 1 ) / AnbR ) == kblks )
420 {
421 kb = AnpR - offset;
422 add( &kb, &AnpD, one, Mptr( Aptr, offset, 0, Ald, size ),
423 &Ald, zero, Mptr( Bptr, nlen+offset, 0, Bld, size ),
424 &Bld );
425 }
426
427 for( k = kblks; k >= 1; k-- )
428 {
429 kb = nlen - kn;
430 shft( &kb, &AnpD, &offset, Mptr( Bptr, kn, 0, Bld, size ),
431 &Bld );
432 offset -= AnbR;
433 add( &AnbR, &AnpD, one, Mptr( Aptr, offset, 0, Ald, size ),
434 &Ald, zero, Mptr( Bptr, kn+offset, 0, Bld, size ),
435 &Bld );
436 kn -= mydistnb;
437 nlen -= kb;
438 }
439
440 if( AnpreR + AnpR != AnR )
441 {
442/*
443* If I am not the last process, i.e I am not supposed to own all of the AnR
444* rows by the end of the operation, then send the sorted buffer to the next
445* process and release the dynamically allocated buffer.
446*/
447 TYPE->Cgesd2d( ctxt, AnpreR+AnpR, AnpD, *B, Bld,
448 MModAdd1( AmyprocR, AnprocsR ), AmyprocD );
449 if( *B ) free( *B );
450 }
451 }
452 else
453 {
454 shft = TYPE->Fcshft;
455 Aptr = Mptr( A, AiiD, AiiR, Ald, size );
456 Bld = MAX( 1, AnpD );
457/*
458* Receive the columns globally preceeding the ones I have
459*/
460 TYPE->Cgerv2d( ctxt, AnpD, AnpreR, *B, Bld, AmyprocD,
461 MModSub1( AmyprocR, AnprocsR ) );
462/*
463* Sort the received buffer and insert at the correct place the columns of
464* sub( A ) I own (from right to left).
465*/
466 if( ( ( AnpR - 1 ) / AnbR ) == kblks )
467 {
468 kb = AnpR - offset;
469 add( &AnpD, &kb, one, Mptr( Aptr, 0, offset, Ald, size ),
470 &Ald, zero, Mptr( Bptr, 0, nlen+offset, Bld, size ),
471 &Bld );
472 }
473
474 for( k = kblks; k >= 1; k-- )
475 {
476 kb = nlen - kn;
477 shft( &AnpD, &kb, &offset, Mptr( Bptr, 0, kn, Bld, size ),
478 &Bld );
479 offset -= AnbR;
480 add( &AnpD, &AnbR, one, Mptr( Aptr, 0, offset, Ald, size ),
481 &Ald, zero, Mptr( Bptr, 0, kn + offset, Bld, size ),
482 &Bld );
483 kn -= mydistnb;
484 nlen -= kb;
485 }
486
487 if( AnpreR + AnpR != AnR )
488 {
489/*
490* If I am not the last process, i.e I am not supposed to own all of the AnR
491* columns by the end of the operation, then send the sorted buffer to the next
492* process and release the dynamically allocated buffer.
493*/
494 TYPE->Cgesd2d( ctxt, AnpD, AnpreR+AnpR, *B, Bld, AmyprocD,
495 MModAdd1( AmyprocR, AnprocsR ) );
496 if( *B ) free( *B );
497 }
498 }
499 }
500 }
501 }
502 else
503 {
504/*
505* Backward accumulation, compute the process row or column coordinate ArocR,
506* that is going to have the resulting operand.
507*/
508 ArocR = PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR, AnprocsR );
509
510 if( ( AnpD > 0 ) && ( AnpR > 0 ) )
511 {
512/*
513* Compute how may rows or columns are after me -> AnnxtR
514*/
515 AnnxtR = PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR,
516 AnprocsR );
517 AnnxtL = PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, ArocR, AsrcR,
518 AnprocsR );
519
520 if( ( AnnxtR = MModSub( AnnxtR, AnnxtL, AnR ) ) == 0 )
521 {
522/*
523* If zero rows or columns are after me, I must be the source, so send my piece
524* to the process before me in the grid.
525*/
526 if( AggRow )
527 {
528 TYPE->Cgesd2d( ctxt, AnpR, AnpD, Mptr( A, AiiR, AiiD, Ald,
529 TYPE->size ), Ald, MModSub1( AmyprocR, AnprocsR ),
530 AmyprocD );
531 }
532 else
533 {
534 TYPE->Cgesd2d( ctxt, AnpD, AnpR, Mptr( A, AiiD, AiiR, Ald,
535 TYPE->size ), Ald, AmyprocD, MModSub1( AmyprocR,
536 AnprocsR ) );
537 }
538 }
539 else if( AnnxtR > 0 )
540 {
541/*
542* Otherwise, allocate some space for the rows or columns I have and the ones
543* globally following the ones I have, that I am about to receive.
544*/
545 size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
546 add = TYPE->Fmmadd;
547 *B = Bptr = PB_Cmalloc( ( AnnxtR + AnpR ) * AnpD * size );
548 kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ?
549 ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 );
550 mydist = MModSub( ArocR, AmyprocR, AnprocsR );
551 mydistnb = mydist * AnbR;
552 srcdist = MModSub( ArocR, AsrcR, AnprocsR );
553
554 if( AggRow )
555 {
556 shft = TYPE->Frshft;
557 Aptr = Mptr( A, AiiR, AiiD, Ald, size );
558 Bld = AnnxtR + AnpR;
559/*
560* Receive the rows globally following the ones I have
561*/
562 TYPE->Cgerv2d( ctxt, AnnxtR, AnpD, Mptr( *B, AnpR, 0, Bld,
563 size ), Bld, MModAdd1( AmyprocR, AnprocsR ),
564 AmyprocD );
565/*
566* Sort the received buffer and insert at the correct place the rows of sub( A )
567* I own (from top to bottom).
568*/
569 if( mydist > srcdist )
570 {
571 offset = -AnpR;
572 kb = Ainb1R + srcdist*AnbR;
573 }
574 else if( mydist == srcdist )
575 {
576 add( &Ainb1R, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
577 Aptr = Mptr( Aptr, Ainb1R, 0, Ald, size );
578 Bptr = Mptr( Bptr, Ainb1R, 0, Ald, size );
579 offset = Ainb1R - AnpR;
580 kb = mydistnb;
581 }
582 else
583 {
584 add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
585 Aptr = Mptr( Aptr, AnbR, 0, Ald, size );
586 Bptr = Mptr( Bptr, AnbR, 0, Ald, size );
587 offset = AnbR - AnpR;
588 kb = mydistnb;
589 }
590
591 for( k = kblks; k >= 1; k-- )
592 {
593 shft( &kb, &AnpD, &offset, Bptr, &Bld );
594 Bptr = Mptr( Bptr, kb, 0, Bld, size );
595 add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
596 Aptr = Mptr( Aptr, AnbR, 0, Ald, size );
597 Bptr = Mptr( Bptr, AnbR, 0, Ald, size );
598 offset += AnbR;
599 kb = mydistnb;
600 }
601
602 if( AnnxtR + AnpR != AnR )
603 {
604/*
605* If I am not the last process, i.e I am not supposed to own all of the AnR
606* rows by the end of the operation, then send the sorted buffer to the previous
607* process and release the dynamically allocated buffer.
608*/
609 TYPE->Cgesd2d( ctxt, AnnxtR+AnpR, AnpD, *B, Bld,
610 MModSub1( AmyprocR, AnprocsR ), AmyprocD );
611 if( *B ) free( *B );
612 }
613 }
614 else
615 {
616 shft = TYPE->Fcshft;
617 Aptr = Mptr( A, AiiD, AiiR, Ald, size );
618 Bld = MAX( 1, AnpD );
619/*
620* Receive the columns globally following the ones I have
621*/
622 TYPE->Cgerv2d( ctxt, AnpD, AnnxtR, Mptr( *B, 0, AnpR, Bld,
623 size ), Bld, AmyprocD, MModAdd1( AmyprocR,
624 AnprocsR ) );
625/*
626* Sort the received buffer and insert at the correct place the columns of
627* sub( A ) I own (from left to right).
628*/
629 if( mydist > srcdist )
630 {
631 offset = -AnpR;
632 kb = Ainb1R + srcdist*AnbR;
633 }
634 else if( mydist == srcdist )
635 {
636 add( &AnpD, &Ainb1R, one, Aptr, &Ald, zero, Bptr, &Bld );
637 Aptr = Mptr( Aptr, 0, Ainb1R, Ald, size );
638 Bptr = Mptr( Bptr, 0, Ainb1R, Bld, size );
639 offset = Ainb1R - AnpR;
640 kb = mydistnb;
641 }
642 else
643 {
644 add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld );
645 Aptr = Mptr( Aptr, 0, AnbR, Ald, size );
646 Bptr = Mptr( Bptr, 0, AnbR, Bld, size );
647 offset = AnbR - AnpR;
648 kb = mydistnb;
649 }
650
651 for( k = kblks; k >= 1; k-- )
652 {
653 shft( &AnpD, &kb, &offset, Bptr, &Bld );
654 Bptr = Mptr( Bptr, 0, kb, Bld, size );
655 add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld );
656 Aptr = Mptr( Aptr, 0, AnbR, Ald, size );
657 Bptr = Mptr( Bptr, 0, AnbR, Bld, size );
658 offset += AnbR;
659 kb = mydistnb;
660 }
661
662 if( AnnxtR + AnpR != AnR )
663 {
664/*
665* If I am not the last process, i.e I am not supposed to own all of the AnR
666* columns by the end of the operation, then send the sorted buffer to the
667* previous process and release the dynamically allocated buffer.
668*/
669 TYPE->Cgesd2d( ctxt, AnpD, AnnxtR+AnpR, *B, Bld, AmyprocD,
670 MModSub1( AmyprocR, AnprocsR ) );
671 if( *B ) free( *B );
672 }
673 }
674 }
675 }
676 }
677/*
678* Describe the resulting operand
679*/
680 if( AggRow )
681 {
682 PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt, M );
683 }
684 else
685 {
686 PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt,
687 MAX( 1, AnpD ) );
688 }
689/*
690* Compute globally in which process row or column the resulting operand is
691* residing and set *BFREE accordingly.
692*/
693 if( Afwd )
694 {
695 if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR )
696 {
697/*
698* If sub( A ) is spanning all process rows or columns of the grid, the result
699* must be in the process row or column preceeding the one owning IA or JA,
700* don't you think ?
701*/
702 DESCB[Bsrc_] = MModSub1( AsrcR, AnprocsR );
703 }
704 else
705 {
706/*
707* Otherwise, the result is in the process row or column where the row IA+M-1
708* or column JA+N-1 of sub( A ) resides.
709*/
710 DESCB[Bsrc_] = PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR,
711 AnprocsR );
712 }
713 if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) )
714 *BFREE = 1;
715 }
716 else
717 {
718 if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR )
719 {
720/*
721* If sub( A ) is spanning all process rows or columns of the grid, the result
722* must be in the process row or column following the one owning IA+M-1 or
723* JA+N-1, don't you think ?
724*/
725 DESCB[Bsrc_] = MModAdd1( ArocR, AnprocsR );
726 }
727 else
728 {
729/*
730* Otherwise, the result is in the process row or column where the row IA or
731* column JA of sub( A ) resides.
732*/
733 DESCB[Bsrc_] = AsrcR;
734 }
735 if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) )
736 *BFREE = 1;
737 }
738/*
739* End of PB_CGatherV
740*/
741}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* MMSHFT_T)()
Definition pblas.h:289
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define CROW
Definition PBblacs.h:21
void Cblacs_gridinfo()
#define CREUSE
Definition PBblas.h:41
#define CFORWARD
Definition PBblas.h:38
#define CTXT_
Definition PBtools.h:38
Int PB_Cfirstnb()
#define MAX(a_, b_)
Definition PBtools.h:77
#define MB_
Definition PBtools.h:43
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MModSub(I1, I2, d)
Definition PBtools.h:102
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
Int PB_Cnnxtroc()
#define LLD_
Definition PBtools.h:47
void PB_CGatherV()
Int PB_Cnumroc()
#define RSRC_
Definition PBtools.h:45
void PB_Cdescset()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define INB_
Definition PBtools.h:42
#define MModSub1(I, d)
Definition PBtools.h:105
#define CSRC_
Definition PBtools.h:46
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define NB_
Definition PBtools.h:44
Int PB_Cnpreroc()
Int PB_Cspan()
#define TYPE
Definition clamov.c:7