SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CInV2.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CInV2( PBTYP_T * TYPE, char * CONJUG, char * ROWCOL, Int M,
21 Int N, Int * DESCA, Int K, char * X, Int IX, Int JX,
22 Int * DESCX, char * XROC, char * XAPTR, Int IJXA,
23 Int * DXA )
24#else
25void PB_CInV2( TYPE, CONJUG, ROWCOL, M, N, DESCA, K, X, IX, JX, DESCX,
26 XROC, XAPTR, IJXA, DXA )
27/*
28* .. Scalar Arguments ..
29*/
30 char * CONJUG, * ROWCOL, * XROC;
31 Int IJXA, IX, JX, K, M, N;
32 PBTYP_T * TYPE;
33/*
34* .. Array Arguments ..
35*/
36 Int * DESCA, * DESCX, * DXA;
37 char * X, * XAPTR;
38#endif
39{
40/*
41* Purpose
42* =======
43*
44* PB_CInV2 adds data to an array that contains a one-dimensional input
45* only subvector which is replicated over the rows or columns of a sub-
46* matrix described by DESCA. A subvector is specified on input to this
47* routine that is added to the replicated buffer. This routine is spe-
48* cifically designed for LCM hybrid variants.
49*
50* Notes
51* =====
52*
53* A description vector is associated with each 2D block-cyclicly dis-
54* tributed matrix. This vector stores the information required to
55* establish the mapping between a matrix entry and its corresponding
56* process and memory location.
57*
58* In the following comments, the character _ should be read as
59* "of the distributed matrix". Let A be a generic term for any 2D
60* block cyclicly distributed matrix. Its description vector is DESC_A:
61*
62* NOTATION STORED IN EXPLANATION
63* ---------------- --------------- ------------------------------------
64* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
65* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
66* the NPROW x NPCOL BLACS process grid
67* A is distributed over. The context
68* itself is global, but the handle
69* (the integer value) may vary.
70* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
71* ted matrix A, M_A >= 0.
72* N_A (global) DESCA[ N_ ] The number of columns in the distri-
73* buted matrix A, N_A >= 0.
74* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
75* block of the matrix A, IMB_A > 0.
76* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
77* left block of the matrix A,
78* INB_A > 0.
79* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
80* bute the last M_A-IMB_A rows of A,
81* MB_A > 0.
82* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
83* bute the last N_A-INB_A columns of
84* A, NB_A > 0.
85* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
86* row of the matrix A is distributed,
87* NPROW > RSRC_A >= 0.
88* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
89* first column of A is distributed.
90* NPCOL > CSRC_A >= 0.
91* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
92* array storing the local blocks of
93* the distributed matrix A,
94* IF( Lc( 1, N_A ) > 0 )
95* LLD_A >= MAX( 1, Lr( 1, M_A ) )
96* ELSE
97* LLD_A >= 1.
98*
99* Let K be the number of rows of a matrix A starting at the global in-
100* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
101* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
102* receive if these K rows were distributed over NPROW processes. If K
103* is the number of columns of a matrix A starting at the global index
104* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
105* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
106* these K columns were distributed over NPCOL processes.
107*
108* The values of Lr() and Lc() may be determined via a call to the func-
109* tion PB_Cnumroc:
110* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
111* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
112*
113* Arguments
114* =========
115*
116* TYPE (local input) pointer to a PBTYP_T structure
117* On entry, TYPE is a pointer to a structure of type PBTYP_T,
118* that contains type information (See pblas.h).
119*
120* CONJUG (global input) pointer to CHAR
121* On entry, CONJUG specifies if this routine should conjugate
122* the subvector as follows:
123* = 'N' or 'n': The initial subvector is copied,
124* = 'Z' or 'z': The conjugate subvector is copied.
125*
126* ROWCOL (global input) pointer to CHAR
127* On entry, ROWCOL specifies if the existing buffer pointed to
128* XAPTR is a row or column subvector replicated over the under-
129* lying submatrix as follows:
130* = 'R' or 'r': XAPTR is a row subvector,
131* = 'C' or 'c': XAPTR is a column subvector.
132*
133* M (global input) INTEGER
134* On entry, M specifies the number of rows of the underlying
135* submatrix described by DESCA. M must be at least zero.
136*
137* N (global input) INTEGER
138* On entry, N specifies the number of columns of the underlying
139* submatrix described by DESCA. N must be at least zero.
140*
141* DESCA (global and local input) INTEGER array
142* On entry, DESCA is an integer array of dimension DLEN_. This
143* is the array descriptor for the matrix A.
144*
145* K (global input) INTEGER
146* On entry, K specifies the length of the non-distributed di-
147* mension of the subvector sub( X ). K must be at least zero.
148*
149* X (local input) pointer to CHAR
150* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
151* is at least MAX( 1, Lr( K, IX ) ) when XROC is 'R' or 'r'
152* and MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least
153* Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( K, JX ) otherwise.
154* Lx is N when ROWCOL = 'R' or 'r' and M otherwise. Before en-
155* try, this array contains the local entries of the matrix X.
156*
157* IX (global input) INTEGER
158* On entry, IX specifies X's global row index, which points to
159* the beginning of the submatrix sub( X ).
160*
161* JX (global input) INTEGER
162* On entry, JX specifies X's global column index, which points
163* to the beginning of the submatrix sub( X ).
164*
165* DESCX (global and local input) INTEGER array
166* On entry, DESCX is an integer array of dimension DLEN_. This
167* is the array descriptor for the matrix X.
168*
169* XROC (global input) pointer to CHAR
170* On entry, XROC specifies the orientation of the subvector
171* sub( X ). When XROC is 'R' or 'r', sub( X ) is a row vector,
172* and a column vector otherwise.
173*
174* XAPTR (local input/local output) pointer to CHAR
175* On entry, XAPTR is an array containing some initial data. On
176* exit, the subvector sub( X ) is copied into this array which
177* is replicated over the rows or columns of the underlying ma-
178* trix as specified by ROWCOL and DESCA.
179*
180* IJXA (global input) INTEGER
181* On entry, IJXA specifies XA global row or column index depen-
182* ding on ROWCOL in the array pointed to by XAPTR, where the
183* subvector sub( X ) should copied.
184*
185* DXA (global and local input) INTEGER array
186* On entry, DXA is a descriptor array of dimension DLEN_ des-
187* cribing the data layout of the data pointed to by XAPTR.
188*
189* -- Written on April 1, 1998 by
190* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
191*
192* ---------------------------------------------------------------------
193*/
194/*
195* .. Local Scalars ..
196*/
197 char * Xptr = NULL, * top;
198 Int AColSpan, ARowSpan, Acol, Aimb, Ainb, AisD, Amb, Amp, Anb,
199 Anq, Arow, XAld, Xcol, Xii, Ximb1, Xinb1, XisD, XisR, XisRow,
200 Xjj, Xld=1, Xmb, Xnb, Xrow, ctxt, mycol, myrow, npcol, nprow,
201 size;
202/* ..
203* .. Executable Statements ..
204*
205*/
206/*
207* Quick return if possible
208*/
209 if( ( M <= 0 ) || ( N <= 0 ) || ( K <= 0 ) ) return;
210/*
211* Retrieve process grid information
212*/
213 Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
214/*
215* Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Ycol
216*/
217 Minfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, Xii, Xjj, Xrow, Xcol );
218/*
219* Is sub( X ) distributed or not, replicated or not ?
220*/
221 if( ( XisRow = ( Mupcase( XROC[0] ) == CROW ) ) != 0 )
222 {
223 XisD = ( ( Xcol >= 0 ) && ( npcol > 1 ) );
224 XisR = ( ( Xrow == -1 ) || ( nprow == 1 ) );
225 }
226 else
227 {
228 XisD = ( ( Xrow >= 0 ) && ( nprow > 1 ) );
229 XisR = ( ( Xcol == -1 ) || ( npcol == 1 ) );
230 }
231
232 Arow = DESCA[ RSRC_ ]; Acol = DESCA[ CSRC_ ];
233
234 if( Mupcase( ROWCOL[0] ) == CROW )
235 {
236/*
237* Want a row vector. It is possible to reuse sub( X ) iff sub( X ) is already
238* a row vector and the data does not need to be conjugated.
239*/
240 if( XisRow && ( Mupcase( CONJUG[0] ) == CNOCONJG ) )
241 {
242 AisD = ( ( Acol >= 0 ) && ( npcol > 1 ) );
243 Ainb = DESCA[INB_]; Anb = DESCA[NB_]; Xnb = DESCX[NB_];
244 Mfirstnb( Xinb1, N, JX, DESCX[INB_], Xnb );
245/*
246* sub( X ) is aligned with A (reuse condition) iff both operands are not
247* distributed, or both of them are distributed and start in the same process
248* column and either N is smaller than the first blocksize of sub( X ) and A,
249* or their column blocking factors match.
250*/
251 if( ( !AisD && !XisD ) ||
252 ( ( AisD && XisD ) &&
253 ( ( Acol == Xcol ) &&
254 ( ( ( Ainb >= N ) && ( Xinb1 >= N ) ) ||
255 ( ( Ainb == Xinb1 ) && ( Anb == Xnb ) ) ) ) ) )
256 {
257/*
258* sub( X ) is aligned with A. Does A spans multiples process rows ? It does
259* if Arow < 0.
260*/
261 ARowSpan = ( Arow < 0 ) ||
262 Mspan( M, 0, DESCA[IMB_], DESCA[MB_], Arow, nprow );
263
264 Mnumroc( Anq, N, 0, Ainb, Anb, mycol, Acol, npcol );
265
266 if( XisR || ( !ARowSpan && ( Arow == Xrow ) ) )
267 {
268/*
269* If sub( X ) is replicated, or, A spans only one process row and either
270* sub( X ) is replicated or resides in the same process row than A, then
271* sub( X ) is already at the correct place.
272*/
273 if( ( Anq > 0 ) && ( ARowSpan || ( myrow == Arow ) ) )
274 {
275 size = TYPE->size; Xld = DESCX[ LLD_ ]; XAld = DXA[LLD_];
276 TYPE->Fmmadd( &K, &Anq, TYPE->one, Mptr( X, Xii, Xjj, Xld,
277 size ), &Xld, TYPE->zero, Mptr( XAPTR, IJXA,
278 0, XAld, size ), &XAld );
279 }
280 }
281 else if( ARowSpan )
282 {
283/*
284* Otherwise, we know that sub( X ) cannot be replicated, let suppose in
285* addition that A spans all process rows. sub( X ) need simply to be broadcast
286* over A.
287*/
288 if( myrow == Xrow )
289 {
290 if( Anq > 0 )
291 {
292 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
293 size = TYPE->size; Xld = DESCX[LLD_]; XAld = DXA[LLD_];
294 Xptr = Mptr( XAPTR, IJXA, 0, XAld, size );
295 TYPE->Fmmadd( &K, &Anq, TYPE->one, Mptr( X, Xii, Xjj, Xld,
296 size ), &Xld, TYPE->zero, Xptr, &XAld );
297 TYPE->Cgebs2d( ctxt, COLUMN, top, K, Anq, Xptr, XAld );
298 }
299 }
300 else
301 {
302 if( Anq > 0 )
303 {
304 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
305 XAld = DXA[LLD_];
306 TYPE->Cgebr2d( ctxt, COLUMN, top, K, Anq, Mptr( XAPTR,
307 IJXA, 0, XAld, TYPE->size ), XAld, Xrow,
308 mycol );
309 }
310 }
311 }
312 else
313 {
314/*
315* Finally, sub( X ) is not replicated and A spans only one process row. There
316* is no need to broadcast, a send/recv is sufficient.
317*/
318 if( myrow == Xrow )
319 {
320 if( Anq > 0 )
321 {
322 Xld = DESCX[LLD_];
323 TYPE->Cgesd2d( ctxt, K, Anq, Mptr( X, Xii, Xjj, Xld,
324 TYPE->size ), Xld, Arow, mycol );
325 }
326 }
327 else if( myrow == Arow )
328 {
329 if( Anq > 0 )
330 {
331 XAld = DXA[LLD_];
332 TYPE->Cgerv2d( ctxt, K, Anq, Mptr( XAPTR, IJXA, 0, XAld,
333 TYPE->size ), XAld, Xrow, mycol );
334 }
335 }
336 }
337 return;
338 }
339 }
340/*
341* sub( X ) cannot be reused, too bad ... redistribute
342*/
343 if( XisRow )
344 {
345 PB_Cpaxpby( TYPE, CONJUG, K, N, TYPE->one, X, IX, JX, DESCX, XROC,
346 TYPE->zero, XAPTR, IJXA, 0, DXA, ROW );
347 }
348 else
349 {
350 PB_Cpaxpby( TYPE, CONJUG, N, K, TYPE->one, X, IX, JX, DESCX, XROC,
351 TYPE->zero, XAPTR, IJXA, 0, DXA, ROW );
352 }
353 }
354 else
355 {
356/*
357* Want a column vector. It is possible to reuse sub( X ) iff sub( X ) is
358* already a column vector and the data does not need to be conjugated
359*/
360 if( !( XisRow ) && ( Mupcase( CONJUG[0] ) == CNOCONJG ) )
361 {
362 AisD = ( ( Arow >= 0 ) && ( nprow > 1 ) );
363 Aimb = DESCA[IMB_]; Amb = DESCA[MB_]; Xmb = DESCX[MB_];
364 Mfirstnb( Ximb1, M, IX, DESCX[IMB_], Xmb );
365/*
366* sub( X ) is aligned with A (reuse condition) iff both operands are not
367* distributed, or both of them are distributed and start in the same process
368* row and either M is smaller than the first blocksize of sub( X ) and A, or
369* their row blocking factors match.
370*/
371 if( ( !AisD && !XisD ) ||
372 ( ( AisD && XisD ) &&
373 ( ( Arow == Xrow ) &&
374 ( ( ( Aimb >= M ) && ( Ximb1 >= M ) ) ||
375 ( ( Aimb == Ximb1 ) && ( Amb == Xmb ) ) ) ) ) )
376 {
377/*
378* sub( X ) is aligned with A. Does A spans multiples process columns ? It
379* does if Acol < 0.
380*/
381 AColSpan = ( Acol < 0 ) ||
382 Mspan( N, 0, DESCA[INB_], DESCA[NB_], Acol, npcol );
383
384 Mnumroc( Amp, M, 0, Aimb, Amb, myrow, Arow, nprow );
385
386 if( XisR || ( !AColSpan && ( Acol == Xcol ) ) )
387 {
388/*
389* If sub( X ) is replicated, or, A spans only one process column and either
390* sub( X ) is replicated or resides in the same process columns than A, then
391* sub( X ) is already at the correct place.
392*/
393 if( ( Amp > 0 ) && ( AColSpan || ( mycol == Acol ) ) )
394 {
395 size = TYPE->size; Xld = DESCX[ LLD_ ]; XAld = DXA[LLD_];
396 TYPE->Fmmadd( &Amp, &K, TYPE->one, Mptr( X, Xii, Xjj, Xld,
397 size ), &Xld, TYPE->zero, Mptr( XAPTR, 0, IJXA,
398 XAld, size ), &XAld );
399 }
400 }
401 else if( AColSpan )
402 {
403/*
404* Otherwise, we know that sub( X ) is not be replicated, let suppose in
405* addition that A spans all process columns. sub( X ) need simply to be
406* broadcast over A.
407*/
408 if( mycol == Xcol )
409 {
410 if( Amp > 0 )
411 {
412 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
413 size = TYPE->size; Xld = DESCX[LLD_]; XAld = DXA[LLD_];
414 Xptr = Mptr( XAPTR, 0, IJXA, XAld, size );
415 TYPE->Fmmadd( &Amp, &K, TYPE->one, Mptr( X, Xii, Xjj, Xld,
416 size ), &Xld, TYPE->zero, Xptr, &XAld );
417 TYPE->Cgebs2d( ctxt, ROW, top, Amp, K, Xptr, XAld );
418 }
419 }
420 else
421 {
422 if( Amp > 0 )
423 {
424 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
425 XAld = DXA[LLD_];
426 TYPE->Cgebr2d( ctxt, ROW, top, Amp, K, Mptr( XAPTR, 0,
427 IJXA, XAld, TYPE->size ), XAld, myrow,
428 Xcol );
429 }
430 }
431 }
432 else
433 {
434/*
435* Finally, sub( X ) is not replicated and A spans only one process column.
436* There is no need to broadcast, a send/recv is sufficient.
437*/
438 if( mycol == Xcol )
439 {
440 if( Amp > 0 )
441 {
442 Xld = DESCX[LLD_];
443 TYPE->Cgesd2d( ctxt, Amp, K, Mptr( X, Xii, Xjj, Xld,
444 TYPE->size ), Xld, myrow, Acol );
445 }
446 }
447 else if( mycol == Acol )
448 {
449 if( Amp > 0 )
450 {
451 XAld = DXA[LLD_];
452 TYPE->Cgerv2d( ctxt, Amp, K, Mptr( XAPTR, 0, IJXA, XAld,
453 TYPE->size ), XAld, myrow, Xcol );
454 }
455 }
456 }
457 return;
458 }
459 }
460/*
461* sub( X ) cannot be reused, too bad ... redistribute
462*/
463 if( XisRow )
464 {
465 PB_Cpaxpby( TYPE, CONJUG, K, M, TYPE->one, X, IX, JX, DESCX, XROC,
466 TYPE->zero, XAPTR, 0, IJXA, DXA, COLUMN );
467 }
468 else
469 {
470 PB_Cpaxpby( TYPE, CONJUG, M, K, TYPE->one, X, IX, JX, DESCX, XROC,
471 TYPE->zero, XAPTR, 0, IJXA, DXA, COLUMN );
472 }
473 }
474/*
475* End of PB_CInV2
476*/
477}
#define Int
Definition Bconfig.h:22
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define CNOCONJG
Definition PBblas.h:19
#define CTXT_
Definition PBtools.h:38
#define MB_
Definition PBtools.h:43
void PB_CInV2()
#define Mspan(n_, i_, inb_, nb_, srcproc_, nprocs_)
Definition PBtools.h:160
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
#define Mnumroc(np_, n_, i_, inb_, nb_, proc_, srcproc_, nprocs_)
Definition PBtools.h:222
char * PB_Ctop()
#define RSRC_
Definition PBtools.h:45
#define Mfirstnb(inbt_, n_, i_, inb_, nb_)
Definition PBtools.h:139
#define INB_
Definition PBtools.h:42
#define Minfog2l(i_, j_, desc_, nr_, nc_, r_, c_, ii_, jj_, pr_, pc_)
Definition PBtools.h:428
#define CSRC_
Definition PBtools.h:46
#define IMB_
Definition PBtools.h:41
#define Mupcase(C)
Definition PBtools.h:83
#define NB_
Definition PBtools.h:44
void PB_Cpaxpby()
#define TYPE
Definition clamov.c:7