SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pcgemv_()

void pcgemv_ ( F_CHAR_T  TRANS,
Int M,
Int N,
float *  ALPHA,
float *  A,
Int IA,
Int JA,
Int DESCA,
float *  X,
Int IX,
Int JX,
Int DESCX,
Int INCX,
float *  BETA,
float *  Y,
Int IY,
Int JY,
Int DESCY,
Int INCY 
)

Definition at line 26 of file pcgemv_.c.

41{
42/*
43* Purpose
44* =======
45*
46* PCGEMV performs one of the matrix-vector operations
47*
48* sub( Y ) := alpha*sub( A ) *sub( X ) + beta*sub( Y ), or
49* sub( Y ) := alpha*sub( A )'*sub( X ) + beta*sub( Y ), or
50* sub( Y ) := alpha*conjg( sub( A )' )*sub( X ) + beta*sub( Y ),
51*
52* where
53*
54* sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1).
55*
56* When TRANS = 'N',
57*
58* sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X,
59* X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X,
60* and,
61*
62* sub( Y ) denotes Y(IY:IY,JY:JY+M-1), if INCY = M_Y,
63* Y(IY:IY+M-1,JY:JY), if INCY = 1 and INCY <> M_Y,
64* and, otherwise
65*
66* sub( X ) denotes X(IX:IX,JX:JX+M-1), if INCX = M_X,
67* X(IX:IX+M-1,JX:JX), if INCX = 1 and INCX <> M_X,
68* and,
69*
70* sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y,
71* Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y.
72*
73* Alpha and beta are scalars, and sub( X ) and sub( Y ) are subvectors
74* and sub( A ) is an m by n submatrix.
75*
76* Notes
77* =====
78*
79* A description vector is associated with each 2D block-cyclicly dis-
80* tributed matrix. This vector stores the information required to
81* establish the mapping between a matrix entry and its corresponding
82* process and memory location.
83*
84* In the following comments, the character _ should be read as
85* "of the distributed matrix". Let A be a generic term for any 2D
86* block cyclicly distributed matrix. Its description vector is DESC_A:
87*
88* NOTATION STORED IN EXPLANATION
89* ---------------- --------------- ------------------------------------
90* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
91* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
92* the NPROW x NPCOL BLACS process grid
93* A is distributed over. The context
94* itself is global, but the handle
95* (the integer value) may vary.
96* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
97* ted matrix A, M_A >= 0.
98* N_A (global) DESCA[ N_ ] The number of columns in the distri-
99* buted matrix A, N_A >= 0.
100* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
101* block of the matrix A, IMB_A > 0.
102* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
103* left block of the matrix A,
104* INB_A > 0.
105* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
106* bute the last M_A-IMB_A rows of A,
107* MB_A > 0.
108* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
109* bute the last N_A-INB_A columns of
110* A, NB_A > 0.
111* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
112* row of the matrix A is distributed,
113* NPROW > RSRC_A >= 0.
114* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
115* first column of A is distributed.
116* NPCOL > CSRC_A >= 0.
117* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
118* array storing the local blocks of
119* the distributed matrix A,
120* IF( Lc( 1, N_A ) > 0 )
121* LLD_A >= MAX( 1, Lr( 1, M_A ) )
122* ELSE
123* LLD_A >= 1.
124*
125* Let K be the number of rows of a matrix A starting at the global in-
126* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
127* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
128* receive if these K rows were distributed over NPROW processes. If K
129* is the number of columns of a matrix A starting at the global index
130* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
131* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
132* these K columns were distributed over NPCOL processes.
133*
134* The values of Lr() and Lc() may be determined via a call to the func-
135* tion PB_Cnumroc:
136* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
137* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
138*
139* Arguments
140* =========
141*
142* TRANS (global input) CHARACTER*1
143* On entry, TRANS specifies the operation to be performed as
144* follows:
145*
146* TRANS = 'N' or 'n'
147* sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ),
148*
149* TRANS = 'T' or 't',
150* sub( Y ) := alpha*sub( A )' * sub( X ) + beta*sub( Y ),
151*
152* TRANS = 'C' or 'c',
153* sub( Y ) := alpha*conjg( sub( A )' )*sub( X ) +
154* beta*sub( Y ).
155*
156* M (global input) INTEGER
157* On entry, M specifies the number of rows of the submatrix
158* sub( A ). M must be at least zero.
159*
160* N (global input) INTEGER
161* On entry, N specifies the number of columns of the submatrix
162* sub( A ). N must be at least zero.
163*
164* ALPHA (global input) COMPLEX
165* On entry, ALPHA specifies the scalar alpha. When ALPHA is
166* supplied as zero then the local entries of the arrays A
167* and X corresponding to the entries of the submatrix sub( A )
168* and the subvector sub( X ) need not be set on input.
169*
170* A (local input) COMPLEX array
171* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
172* at least Lc( 1, JA+N-1 ). Before entry, this array contains
173* the local entries of the matrix A.
174*
175* IA (global input) INTEGER
176* On entry, IA specifies A's global row index, which points to
177* the beginning of the submatrix sub( A ).
178*
179* JA (global input) INTEGER
180* On entry, JA specifies A's global column index, which points
181* to the beginning of the submatrix sub( A ).
182*
183* DESCA (global and local input) INTEGER array
184* On entry, DESCA is an integer array of dimension DLEN_. This
185* is the array descriptor for the matrix A.
186*
187* X (local input) COMPLEX array
188* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
189* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
190* MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least
191* Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
192* Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en-
193* try, this array contains the local entries of the matrix X.
194*
195* IX (global input) INTEGER
196* On entry, IX specifies X's global row index, which points to
197* the beginning of the submatrix sub( X ).
198*
199* JX (global input) INTEGER
200* On entry, JX specifies X's global column index, which points
201* to the beginning of the submatrix sub( X ).
202*
203* DESCX (global and local input) INTEGER array
204* On entry, DESCX is an integer array of dimension DLEN_. This
205* is the array descriptor for the matrix X.
206*
207* INCX (global input) INTEGER
208* On entry, INCX specifies the global increment for the
209* elements of X. Only two values of INCX are supported in
210* this version, namely 1 and M_X. INCX must not be zero.
211*
212* BETA (global input) COMPLEX
213* On entry, BETA specifies the scalar beta. When BETA is
214* supplied as zero then the local entries of the array Y
215* corresponding to the entries of the subvector sub( Y ) need
216* not be set on input.
217*
218* Y (local input/local output) COMPLEX array
219* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
220* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
221* MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least
222* Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
223* Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en-
224* try, this array contains the local entries of the matrix Y.
225* On exit, sub( Y ) is overwritten by the updated subvector.
226*
227* IY (global input) INTEGER
228* On entry, IY specifies Y's global row index, which points to
229* the beginning of the submatrix sub( Y ).
230*
231* JY (global input) INTEGER
232* On entry, JY specifies Y's global column index, which points
233* to the beginning of the submatrix sub( Y ).
234*
235* DESCY (global and local input) INTEGER array
236* On entry, DESCY is an integer array of dimension DLEN_. This
237* is the array descriptor for the matrix Y.
238*
239* INCY (global input) INTEGER
240* On entry, INCY specifies the global increment for the
241* elements of Y. Only two values of INCY are supported in
242* this version, namely 1 and M_Y. INCY must not be zero.
243*
244* -- Written on April 1, 1998 by
245* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
246*
247* ---------------------------------------------------------------------
248*/
249/*
250* .. Local Scalars ..
251*/
252 char TrA, Yroc, * tbeta, top;
253 Int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb,
254 Anq, Arow, XAfr, Xi, Xj, YAfr, YApbY, YAsum, Ycol, Yi, Yii,
255 Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, mycol,
256 myrow, nota, npcol, nprow;
257 PBTYP_T * type;
258/*
259* .. Local Arrays ..
260*/
261 Int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_],
262 Yd [DLEN_];
263 char * XA = NULL, * YA = NULL;
264/* ..
265* .. Executable Statements ..
266*
267*/
268 nota = ( ( TrA = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
269 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
270 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
271 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
272#ifndef NO_ARGCHK
273/*
274* Test the input parameters
275*/
276 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
277 if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
278 {
279 if( ( !nota ) && ( TrA != CTRAN ) && ( TrA != CCOTRAN ) )
280 {
281 PB_Cwarn( ctxt, __LINE__, "PCGEMV", "Illegal TRANS=%c\n", TrA );
282 info = -1;
283 }
284 PB_Cchkmat( ctxt, "PCGEMV", "A", *M, 2, *N, 3, Ai, Aj, Ad, 8,
285 &info );
286 if( nota )
287 {
288 PB_Cchkvec( ctxt, "PCGEMV", "X", *N, 3, Xi, Xj, Xd, *INCX, 12,
289 &info );
290 PB_Cchkvec( ctxt, "PCGEMV", "Y", *M, 2, Yi, Yj, Yd, *INCY, 18,
291 &info );
292 }
293 else
294 {
295 PB_Cchkvec( ctxt, "PCGEMV", "X", *M, 2, Xi, Xj, Xd, *INCX, 12,
296 &info );
297 PB_Cchkvec( ctxt, "PCGEMV", "Y", *N, 3, Yi, Yj, Yd, *INCY, 18,
298 &info );
299 }
300 }
301 if( info ) { PB_Cabort( ctxt, "PCGEMV", info ); return; }
302#endif
303/*
304* Quick return if possible
305*/
306 if( ( *M == 0 ) || ( *N == 0 ) ||
307 ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) &&
308 ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) )
309 return;
310/*
311* Retrieve process grid information
312*/
313#ifdef NO_ARGCHK
314 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
315#endif
316/*
317* Get type structure
318*/
319 type = PB_Cctypeset();
320/*
321* When alpha is zero
322*/
323 if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) )
324 {
325/*
326* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
327*/
328 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
329 &Yrow, &Ycol );
330
331 if( *INCY == Yd[M_] )
332 {
333/*
334* sub( Y ) resides in (a) process row(s)
335*/
336 if( ( myrow == Yrow ) || ( Yrow < 0 ) )
337 {
338/*
339* Make sure I own some data and scale sub( Y )
340*/
341 Ynq = PB_Cnumroc( ( nota ? *M : *N ), Yj, Yd[INB_], Yd[NB_], mycol,
342 Yd[CSRC_], npcol );
343 if( Ynq > 0 )
344 {
345 Yld = Yd[LLD_];
346 if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
347 {
348 cset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
349 Yjj, Yld, type->size ), &Yld );
350 }
351 else
352 {
353 cscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
354 Yjj, Yld, type->size ), &Yld );
355 }
356 }
357 }
358 }
359 else
360 {
361/*
362* sub( Y ) resides in (a) process column(s)
363*/
364 if( ( mycol == Ycol ) || ( Ycol < 0 ) )
365 {
366/*
367* Make sure I own some data and scale sub( Y )
368*/
369 Ynp = PB_Cnumroc( ( nota ? *M : *N ), Yi, Yd[IMB_], Yd[MB_], myrow,
370 Yd[RSRC_], nprow );
371 if( Ynp > 0 )
372 {
373 if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
374 {
375 cset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
376 Yjj, Yd[LLD_], type->size ), INCY );
377 }
378 else
379 {
380 cscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
381 Yjj, Yd[LLD_], type->size ), INCY );
382 }
383 }
384 }
385 }
386 return;
387 }
388/*
389* Compute descriptor Ad0 for sub( A )
390*/
391 PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
392 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
393
394 Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN );
395
396 if( nota )
397 {
398/*
399* Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A )
400*/
401 PB_CInOutV( type, COLUMN, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
402 Yi, Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY );
403/*
404* Replicate sub( X ) in process rows spanned by sub( A ) -> XA
405*/
406 PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
407 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
408/*
409* Local matrix-vector multiply iff I own some data
410*/
411 Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow );
412 Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol );
413 if( ( Amp > 0 ) && ( Anq > 0 ) )
414 {
415 cgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A),
416 Aii, Ajj, Ald, type->size ), &Ald, XA, &XAd[LLD_], tbeta,
417 YA, &ione );
418 }
419 if( XAfr ) free( XA );
420/*
421* Combine the partial column results into YA
422*/
423 if( YAsum && ( Amp > 0 ) )
424 {
425 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
426 Ccgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
427 YAd[CSRC_] );
428 }
429/*
430* sub( Y ) := beta * sub( Y ) + YA (if necessary)
431*/
432 if( YApbY )
433 {
434 PB_Cpaxpby( type, NOCONJG, *M, 1, type->one, YA, 0, 0, YAd, COLUMN,
435 ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc );
436 }
437 }
438 else
439 {
440/*
441* Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A )
442*/
443 PB_CInOutV( type, ROW, *M, *N, Ad0, 1, ((char *) BETA), ((char *) Y), Yi,
444 Yj, Yd, &Yroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY );
445/*
446* Replicate sub( X ) in process columns spanned by sub( A ) -> XA
447*/
448 PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
449 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
450/*
451* Local matrix-vector multiply iff I own some data
452*/
453 Amp = PB_Cnumroc( *M, 0, Ad0[IMB_], Ad0[MB_], myrow, Ad0[RSRC_], nprow );
454 Anq = PB_Cnumroc( *N, 0, Ad0[INB_], Ad0[NB_], mycol, Ad0[CSRC_], npcol );
455 if( ( Amp > 0 ) && ( Anq > 0 ) )
456 {
457 cgemv_( TRANS, &Amp, &Anq, ((char *) ALPHA), Mptr( ((char *)A),
458 Aii, Ajj, Ald, type->size ), &Ald, XA, &ione, tbeta,
459 YA, &YAd[LLD_] );
460 }
461 if( XAfr ) free( XA );
462/*
463* Combine the partial row results into YA
464*/
465 if( YAsum && ( Anq > 0 ) )
466 {
467 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
468 Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
469 mycol );
470 }
471/*
472* sub( Y ) := beta * sub( Y ) + YA (if necessary)
473*/
474 if( YApbY )
475 {
476 PB_Cpaxpby( type, NOCONJG, 1, *N, type->one, YA, 0, 0, YAd, ROW,
477 ((char *) BETA), ((char *) Y), Yi, Yj, Yd, &Yroc );
478 }
479 }
480 if( YAfr ) free( YA );
481/*
482* End of PCGEMV
483*/
484}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
#define F2C_CHAR(a)
Definition pblas.h:124
#define IMAG_PART
Definition pblas.h:140
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define COMBINE
Definition PBblacs.h:49
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
void Ccgsum2d()
#define cgemv_
Definition PBblas.h:141
#define NOCONJG
Definition PBblas.h:45
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define cscal_
Definition PBblas.h:112
#define CCOTRAN
Definition PBblas.h:22
#define CTXT_
Definition PBtools.h:38
#define MB_
Definition PBtools.h:43
void PB_Cabort()
#define ONE
Definition PBtools.h:64
void PB_Cchkvec()
void PB_Cinfog2l()
void PB_Cchkmat()
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
void PB_Cwarn()
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
char * PB_Ctop()
void PB_CInV()
void PB_CInOutV()
#define RSRC_
Definition PBtools.h:45
#define cset_
Definition PBtools.h:665
#define M_
Definition PBtools.h:39
#define INB_
Definition PBtools.h:42
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
PBTYP_T * PB_Cctypeset()
#define IMB_
Definition PBtools.h:41
#define ZERO
Definition PBtools.h:66
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
void PB_Cpaxpby()
void PB_Cdescribe()
Int size
Definition pblas.h:333
char * one
Definition pblas.h:336
Here is the call graph for this function: