ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdatrmv_.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
20 void pdatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N,
21  double * ALPHA,
22  double * A, int * IA, int * JA, int * DESCA,
23  double * X, int * IX, int * JX, int * DESCX,
24  int * INCX, double * BETA,
25  double * Y, int * IY, int * JY, int * DESCY,
26  int * INCY )
27 #else
28 void pdatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX,
29  JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY )
30 /*
31 * .. Scalar Arguments ..
32 */
33  F_CHAR_T DIAG, TRANS, UPLO;
34  int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
35  * N;
36  double * ALPHA, * BETA;
37 /*
38 * .. Array Arguments ..
39 */
40  int * DESCA, * DESCX, * DESCY;
41  double * A, * X, * Y;
42 #endif
43 {
44 /*
45 * Purpose
46 * =======
47 *
48 * PDATRMV performs one of the matrix-vector operations
49 *
50 * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) +
51 * abs( beta*sub( Y ) ),
52 * or
53 *
54 * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) +
55 * abs( beta*sub( Y ) ),
56 *
57 * where
58 *
59 * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
60 *
61 * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X,
62 * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X,
63 * and,
64 *
65 * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y,
66 * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y.
67 *
68 * Alpha and beta are real scalars, sub( Y ) is a real subvector,
69 * sub( X ) is a subvector and sub( A ) is an n by n triangular subma-
70 * trix.
71 *
72 * Notes
73 * =====
74 *
75 * A description vector is associated with each 2D block-cyclicly dis-
76 * tributed matrix. This vector stores the information required to
77 * establish the mapping between a matrix entry and its corresponding
78 * process and memory location.
79 *
80 * In the following comments, the character _ should be read as
81 * "of the distributed matrix". Let A be a generic term for any 2D
82 * block cyclicly distributed matrix. Its description vector is DESC_A:
83 *
84 * NOTATION STORED IN EXPLANATION
85 * ---------------- --------------- ------------------------------------
86 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
87 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
88 * the NPROW x NPCOL BLACS process grid
89 * A is distributed over. The context
90 * itself is global, but the handle
91 * (the integer value) may vary.
92 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
93 * ted matrix A, M_A >= 0.
94 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
95 * buted matrix A, N_A >= 0.
96 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
97 * block of the matrix A, IMB_A > 0.
98 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
99 * left block of the matrix A,
100 * INB_A > 0.
101 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
102 * bute the last M_A-IMB_A rows of A,
103 * MB_A > 0.
104 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
105 * bute the last N_A-INB_A columns of
106 * A, NB_A > 0.
107 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
108 * row of the matrix A is distributed,
109 * NPROW > RSRC_A >= 0.
110 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
111 * first column of A is distributed.
112 * NPCOL > CSRC_A >= 0.
113 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
114 * array storing the local blocks of
115 * the distributed matrix A,
116 * IF( Lc( 1, N_A ) > 0 )
117 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
118 * ELSE
119 * LLD_A >= 1.
120 *
121 * Let K be the number of rows of a matrix A starting at the global in-
122 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
123 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
124 * receive if these K rows were distributed over NPROW processes. If K
125 * is the number of columns of a matrix A starting at the global index
126 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
127 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
128 * these K columns were distributed over NPCOL processes.
129 *
130 * The values of Lr() and Lc() may be determined via a call to the func-
131 * tion PB_Cnumroc:
132 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
133 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
134 *
135 * Arguments
136 * =========
137 *
138 * UPLO (global input) CHARACTER*1
139 * On entry, UPLO specifies whether the submatrix sub( A ) is
140 * an upper or lower triangular submatrix as follows:
141 *
142 * UPLO = 'U' or 'u' sub( A ) is an upper triangular
143 * submatrix,
144 *
145 * UPLO = 'L' or 'l' sub( A ) is a lower triangular
146 * submatrix.
147 *
148 * TRANS (global input) CHARACTER*1
149 * On entry, TRANS specifies the operation to be performed as
150 * follows:
151 *
152 * TRANS = 'N' or 'n'
153 * sub( Y ) := |alpha|*|sub( A )|*|sub( X )| +
154 * |beta*sub( Y )|.
155 *
156 * TRANS = 'T' or 't'
157 * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
158 * |beta*sub( Y )|.
159 *
160 * TRANS = 'C' or 'c'
161 * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
162 * |beta*sub( Y )|.
163 *
164 * DIAG (global input) CHARACTER*1
165 * On entry, DIAG specifies whether or not sub( A ) is unit
166 * triangular as follows:
167 *
168 * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
169 * gular,
170 *
171 * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
172 * angular.
173 *
174 * N (global input) INTEGER
175 * On entry, N specifies the order of the submatrix sub( A ).
176 * N must be at least zero.
177 *
178 * ALPHA (global input) DOUBLE PRECISION
179 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
180 * supplied as zero then the local entries of the arrays A
181 * and X corresponding to the entries of the submatrix sub( A )
182 * and the subvector sub( X ) need not be set on input.
183 *
184 * A (local input) DOUBLE PRECISION array
185 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
186 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
187 * the local entries of the matrix A.
188 * Before entry with UPLO = 'U' or 'u', this array contains the
189 * local entries corresponding to the entries of the upper tri-
190 * angular submatrix sub( A ), and the local entries correspon-
191 * ding to the entries of the strictly lower triangular part of
192 * the submatrix sub( A ) are not referenced.
193 * Before entry with UPLO = 'L' or 'l', this array contains the
194 * local entries corresponding to the entries of the lower tri-
195 * angular submatrix sub( A ), and the local entries correspon-
196 * ding to the entries of the strictly upper triangular part of
197 * the submatrix sub( A ) are not referenced.
198 * Note that when DIAG = 'U' or 'u', the local entries corres-
199 * ponding to the diagonal elements of the submatrix sub( A )
200 * are not referenced either, but are assumed to be unity.
201 *
202 * IA (global input) INTEGER
203 * On entry, IA specifies A's global row index, which points to
204 * the beginning of the submatrix sub( A ).
205 *
206 * JA (global input) INTEGER
207 * On entry, JA specifies A's global column index, which points
208 * to the beginning of the submatrix sub( A ).
209 *
210 * DESCA (global and local input) INTEGER array
211 * On entry, DESCA is an integer array of dimension DLEN_. This
212 * is the array descriptor for the matrix A.
213 *
214 * X (local input) DOUBLE PRECISION array
215 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
216 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
217 * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least
218 * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
219 * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en-
220 * try, this array contains the local entries of the matrix X.
221 *
222 * IX (global input) INTEGER
223 * On entry, IX specifies X's global row index, which points to
224 * the beginning of the submatrix sub( X ).
225 *
226 * JX (global input) INTEGER
227 * On entry, JX specifies X's global column index, which points
228 * to the beginning of the submatrix sub( X ).
229 *
230 * DESCX (global and local input) INTEGER array
231 * On entry, DESCX is an integer array of dimension DLEN_. This
232 * is the array descriptor for the matrix X.
233 *
234 * INCX (global input) INTEGER
235 * On entry, INCX specifies the global increment for the
236 * elements of X. Only two values of INCX are supported in
237 * this version, namely 1 and M_X. INCX must not be zero.
238 *
239 * BETA (global input) DOUBLE PRECISION
240 * On entry, BETA specifies the scalar beta. When BETA is
241 * supplied as zero then the local entries of the array Y
242 * corresponding to the entries of the subvector sub( Y ) need
243 * not be set on input.
244 *
245 * Y (local input/local output) DOUBLE PRECISION array
246 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
247 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
248 * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least
249 * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
250 * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en-
251 * try, this array contains the local entries of the matrix Y.
252 * On exit, sub( Y ) is overwritten by the updated subvector.
253 *
254 * IY (global input) INTEGER
255 * On entry, IY specifies Y's global row index, which points to
256 * the beginning of the submatrix sub( Y ).
257 *
258 * JY (global input) INTEGER
259 * On entry, JY specifies Y's global column index, which points
260 * to the beginning of the submatrix sub( Y ).
261 *
262 * DESCY (global and local input) INTEGER array
263 * On entry, DESCY is an integer array of dimension DLEN_. This
264 * is the array descriptor for the matrix Y.
265 *
266 * INCY (global input) INTEGER
267 * On entry, INCY specifies the global increment for the
268 * elements of Y. Only two values of INCY are supported in
269 * this version, namely 1 and M_Y. INCY must not be zero.
270 *
271 * -- Written on April 1, 1998 by
272 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
273 *
274 * ---------------------------------------------------------------------
275 */
276 /*
277 * .. Local Scalars ..
278 */
279  char DiagA, TranOp, UploA, Yroc, * one, top;
280  int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
281  Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr,
282  YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq,
283  Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb,
284  notran, nounit, npcol, nprow, size, upper, usiz;
285  double * tbeta;
286  PBTYP_T * type, * utyp;
287 /*
288 * .. Local Arrays ..
289 */
290  int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_],
291  Yd [DLEN_];
292  char * Aptr = NULL, * XA = NULL, * YA = NULL;
293 /* ..
294 * .. Executable Statements ..
295 *
296 */
297  upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
298  notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
299  nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT );
300  PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
301  PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
302  PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
303 #ifndef NO_ARGCHK
304 /*
305 * Test the input parameters
306 */
307  Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
308  if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
309  {
310  if( ( !upper ) && ( UploA != CLOWER ) )
311  {
312  PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal UPLO = %c\n", UploA );
313  info = -1;
314  }
315  else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
316  {
317  PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal TRANS = %c\n", TranOp );
318  info = -2;
319  }
320  else if( ( !nounit ) && ( DiagA != CUNIT ) )
321  {
322  PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal DIAG = %c\n", DiagA );
323  info = -3;
324  }
325  PB_Cchkmat( ctxt, "PDATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info );
326  PB_Cchkvec( ctxt, "PDATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info );
327  PB_Cchkvec( ctxt, "PDATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info );
328  }
329  if( info ) { PB_Cabort( ctxt, "PDATRMV", info ); return; }
330 #endif
331 /*
332 * Quick return if possible
333 */
334  if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) &&
335  ( BETA [REAL_PART] == ONE ) ) ) return;
336 /*
337 * Retrieve process grid information
338 */
339 #ifdef NO_ARGCHK
340  Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
341 #endif
342 /*
343 * Get type structure
344 */
345  type = utyp = PB_Cdtypeset();
346  size = usiz = type->size;
347 /*
348 * and when alpha is zero
349 */
350  if( ALPHA[REAL_PART] == ZERO )
351  {
352 /*
353 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
354 */
355  PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
356  &Yrow, &Ycol );
357 
358  if( *INCY == Yd[M_] )
359  {
360 /*
361 * sub( Y ) resides in (a) process row(s)
362 */
363  if( ( myrow == Yrow ) || ( Yrow < 0 ) )
364  {
365 /*
366 * Make sure I own some data and scale sub( Y )
367 */
368  Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
369  npcol );
370  if( Ynq > 0 )
371  {
372  Yld = Yd[LLD_];
373  dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
374  Yjj, Yld, usiz ), &Yld );
375  }
376  }
377  }
378  else
379  {
380 /*
381 * sub( Y ) resides in (a) process column(s)
382 */
383  if( ( mycol == Ycol ) || ( Ycol < 0 ) )
384  {
385 /*
386 * Make sure I own some data and scale sub( Y )
387 */
388  Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
389  nprow );
390  if( Ynp > 0 )
391  {
392  dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
393  Yjj, Yd[LLD_], usiz ), INCY );
394  }
395  }
396  }
397  return;
398  }
399 /*
400 * Compute descriptor Ad0 for sub( A )
401 */
402  PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
403  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
404 
405  Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN );
406 
407  if( notran )
408  {
409 /*
410 * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A )
411 */
412  PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
413  Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
414  &YAsum, &YApbY );
415 /*
416 * Replicate sub( X ) in process rows spanned by sub( A ) -> XA
417 */
418  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
419  ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
420  }
421  else
422  {
423 /*
424 * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A )
425 */
426  PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
427  Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
428  &YAsum, &YApbY );
429 /*
430 * Replicate sub( X ) in process columns spanned by sub( A ) -> XA
431 */
432  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
433  ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
434  }
435 
436  one = type->one;
437 /*
438 * Local matrix-vector multiply iff I own some data
439 */
440  Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
441  Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_];
442  Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
443  Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
444 
445  if( ( Amp > 0 ) && ( Anq > 0 ) )
446  {
447  Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
448 
449  XAld = XAd[LLD_]; YAld = YAd[LLD_];
450 /*
451 * Scale YA in the case sub( Y ) has been reused
452 */
453  if( notran && !( YApbY ) )
454  {
455 /*
456 * YA resides in (a) process column(s)
457 */
458  if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) )
459  {
460 /*
461 * Make sure I own some data and scale YA
462 */
463  if( Amp > 0 )
464  dascal_( &Amp, ((char *) tbeta), YA, &ione );
465  }
466  }
467  else if( !( notran ) && !( YApbY ) )
468  {
469 /*
470 * YA resides in (a) process row(s)
471 */
472  if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) )
473  {
474 /*
475 * Make sure I own some data and scale YA
476 */
477  if( Anq > 0 )
478  dascal_( &Anq, ((char *) tbeta), YA, &YAld );
479  }
480  }
481 /*
482 * Computational partitioning size is computed as the product of the logical
483 * value returned by pilaenv_ and 2 * lcm( nprow, npcol )
484 */
485  nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) *
486  PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
487 
488  if( upper )
489  {
490  if( notran )
491  {
492  for( k = 0; k < *N; k += nb )
493  {
494  kb = *N - k; kb = MIN( kb, nb );
495  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
496  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
497  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
498  if( Akp > 0 && Anq0 > 0 )
499  {
500  dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
501  Mptr( Aptr, 0, Akq, Ald, size ), &Ald,
502  Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
503  YA, &ione );
504  }
505  PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
506  ((char *) ALPHA), Aptr, k, k, Ad0,
507  Mptr( XA, 0, Akq, XAld, size ), XAld,
508  Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
509  }
510  }
511  else
512  {
513  for( k = 0; k < *N; k += nb )
514  {
515  kb = *N - k; kb = MIN( kb, nb );
516  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
517  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
518  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
519  if( Akp > 0 && Anq0 > 0 )
520  {
521  dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
522  Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione,
523  one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
524  }
525  PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
526  ((char *) ALPHA), Aptr, k, k, Ad0,
527  Mptr( XA, Akp, 0, XAld, size ), XAld,
528  Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
529  }
530  }
531  }
532  else
533  {
534  if( notran )
535  {
536  for( k = 0; k < *N; k += nb )
537  {
538  kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
539  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
540  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
541  PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
542  ((char *) ALPHA), Aptr, k, k, Ad0,
543  Mptr( XA, 0, Akq, XAld, size ), XAld,
544  Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
545  Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
546  Amp0 = Amp - Akp;
547  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
548  if( Amp0 > 0 && Anq0 > 0 )
549  {
550  dagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA),
551  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
552  Mptr( XA, 0, Akq, XAld, size ), &XAld, one,
553  Mptr( YA, Akp, 0, YAld, usiz ), &ione );
554  }
555  }
556  }
557  else
558  {
559  for( k = 0; k < *N; k += nb )
560  {
561  kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
562  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
563  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
564  PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
565  ((char *) ALPHA), Aptr, k, k, Ad0,
566  Mptr( XA, Akp, 0, XAld, size ), XAld,
567  Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
568  Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
569  Amp0 = Amp - Akp;
570  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
571  if( Amp0 > 0 && Anq0 > 0 )
572  {
573  dagemv_( TRANS, &Amp0, &Anq0, one,
574  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald,
575  Mptr( XA, Akp, 0, XAld, size ), &ione, one,
576  Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
577  }
578  }
579  }
580  }
581  }
582  if( XAfr ) free( XA );
583 
584  if( notran )
585  {
586 /*
587 * Combine the partial column results into YA
588 */
589  if( YAsum && ( Amp > 0 ) )
590  {
591  top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
592  Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
593  YAd[CSRC_] );
594  }
595  }
596  else
597  {
598 /*
599 * Combine the partial row results into YA
600 */
601  if( YAsum && ( Anq > 0 ) )
602  {
603  top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
604  Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
605  mycol );
606  }
607  }
608 /*
609 * sub( Y ) := beta * sub( Y ) + YA (if necessary)
610 */
611  if( YApbY )
612  {
613 /*
614 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
615 */
616  PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow,
617  &Ycol );
618 
619  if( *INCY == Yd[M_] )
620  {
621 /*
622 * sub( Y ) resides in (a) process row(s)
623 */
624  if( ( myrow == Yrow ) || ( Yrow < 0 ) )
625  {
626 /*
627 * Make sure I own some data and scale sub( Y )
628 */
629  Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
630  npcol );
631  if( Ynq > 0 )
632  {
633  Yld = Yd[LLD_];
634  dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
635  Yjj, Yld, usiz ), &Yld );
636  }
637  }
638  }
639  else
640  {
641 /*
642 * sub( Y ) resides in (a) process column(s)
643 */
644  if( ( mycol == Ycol ) || ( Ycol < 0 ) )
645  {
646 /*
647 * Make sure I own some data and scale sub( Y )
648 */
649  Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
650  nprow );
651  if( Ynp > 0 )
652  {
653  dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
654  Yjj, Yd[LLD_], usiz ), INCY );
655  }
656  }
657  }
658 
659  if( notran )
660  {
661  PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one,
662  ((char *) Y), Yi, Yj, Yd, &Yroc );
663  }
664  else
665  {
666  PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one,
667  ((char *) Y), Yi, Yj, Yd, &Yroc );
668  }
669  }
670  if( YAfr ) free( YA );
671 /*
672 * End of PDATRMV
673 */
674 }
M_
#define M_
Definition: PBtools.h:39
ROW
#define ROW
Definition: PBblacs.h:46
MB_
#define MB_
Definition: PBtools.h:43
PB_Cpaxpby
void PB_Cpaxpby()
PB_Cwarn
void PB_Cwarn()
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
CSRC_
#define CSRC_
Definition: PBtools.h:46
PBblacs.h
pdatrmv_
void pdatrmv_(F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int *N, double *ALPHA, double *A, int *IA, int *JA, int *DESCA, double *X, int *IX, int *JX, int *DESCX, int *INCX, double *BETA, double *Y, int *IY, int *JY, int *DESCY, int *INCY)
Definition: pdatrmv_.c:28
PBtools.h
PBblas.h
CCOTRAN
#define CCOTRAN
Definition: PBblas.h:22
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
REAL_PART
#define REAL_PART
Definition: pblas.h:135
PBTYP_T::type
char type
Definition: pblas.h:327
PBpblas.h
DLEN_
#define DLEN_
Definition: PBtools.h:48
CUNIT
#define CUNIT
Definition: PBblas.h:32
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cdescribe
void PB_Cdescribe()
dagemv_
F_VOID_FCT dagemv_()
PB_Cdtypeset
PBTYP_T * PB_Cdtypeset()
Definition: PB_Cdtypeset.c:19
F_CHAR_T
char * F_CHAR_T
Definition: pblas.h:118
CROW
#define CROW
Definition: PBblacs.h:21
ZERO
#define ZERO
Definition: PBtools.h:66
PB_Cchkvec
void PB_Cchkvec()
UPPER
#define UPPER
Definition: PBblas.h:52
dascal_
F_VOID_FCT dascal_()
IMB_
#define IMB_
Definition: PBtools.h:41
pilaenv_
int pilaenv_()
PB_Cabort
void PB_Cabort()
CLOWER
#define CLOWER
Definition: PBblas.h:25
LEFT
#define LEFT
Definition: PBblas.h:55
F2C_CHAR
#define F2C_CHAR(a)
Definition: pblas.h:120
PB_Ctzatrmv
void PB_Ctzatrmv()
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
PB_Ctop
char * PB_Ctop()
ONE
#define ONE
Definition: PBtools.h:64
RSRC_
#define RSRC_
Definition: PBtools.h:45
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
PBTYP_T::one
char * one
Definition: pblas.h:331
PB_CargFtoC
void PB_CargFtoC()
COMBINE
#define COMBINE
Definition: PBblacs.h:49
PBTYP_T::size
int size
Definition: pblas.h:329
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cchkmat
void PB_Cchkmat()
PB_Cnumroc
int PB_Cnumroc()
Cdgsum2d
void Cdgsum2d()
PB_CInV
void PB_CInV()
PB_CInOutV
void PB_CInOutV()
PB_Cptrm
void PB_Cptrm()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
CCOLUMN
#define CCOLUMN
Definition: PBblacs.h:20
INB_
#define INB_
Definition: PBtools.h:42
LOWER
#define LOWER
Definition: PBblas.h:51
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
pblas.h
CUPPER
#define CUPPER
Definition: PBblas.h:26
CTRAN
#define CTRAN
Definition: PBblas.h:20
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38
CNOUNIT
#define CNOUNIT
Definition: PBblas.h:33
PB_Clcm
int PB_Clcm()