ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cptrm.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_Cptrm( PBTYP_T * TYPE, PBTYP_T * UTYP, char * SIDE, char * UPLO,
21  char * TRANS, char * DIAG, int N, int K, char * ALPHA,
22  char * A, int IA, int JA, int * DESCA,
23  char * X, int LDX, char * Y, int LDY,
24  TZTRM_T TRM )
25 #else
26 void PB_Cptrm( TYPE, UTYP, SIDE, UPLO, TRANS, DIAG, N, K, ALPHA, A,
27  IA, JA, DESCA, X, LDX, Y, LDY, TRM )
28 /*
29 * .. Scalar Arguments ..
30 */
31  char * DIAG, * SIDE, * TRANS, * UPLO;
32  int IA, JA, K, LDX, LDY, N;
33  char * ALPHA;
34  PBTYP_T * TYPE, * UTYP;
35  TZTRM_T TRM;
36 /*
37 * .. Array Arguments ..
38 */
39  int * DESCA;
40  char * A, * X, * Y;
41 #endif
42 {
43 /*
44 * Purpose
45 * =======
46 *
47 * PB_Cptrm performs a triangular matrix-matrix or matrix-vector multi-
48 * plication. In the following, sub( A ) denotes the triangular subma-
49 * trix operand A( IA:IA+N-1, JA:JA+N-1 ).
50 *
51 * Notes
52 * =====
53 *
54 * A description vector is associated with each 2D block-cyclicly dis-
55 * tributed matrix. This vector stores the information required to
56 * establish the mapping between a matrix entry and its corresponding
57 * process and memory location.
58 *
59 * In the following comments, the character _ should be read as
60 * "of the distributed matrix". Let A be a generic term for any 2D
61 * block cyclicly distributed matrix. Its description vector is DESC_A:
62 *
63 * NOTATION STORED IN EXPLANATION
64 * ---------------- --------------- ------------------------------------
65 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
66 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
67 * the NPROW x NPCOL BLACS process grid
68 * A is distributed over. The context
69 * itself is global, but the handle
70 * (the integer value) may vary.
71 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
72 * ted matrix A, M_A >= 0.
73 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
74 * buted matrix A, N_A >= 0.
75 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
76 * block of the matrix A, IMB_A > 0.
77 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
78 * left block of the matrix A,
79 * INB_A > 0.
80 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
81 * bute the last M_A-IMB_A rows of A,
82 * MB_A > 0.
83 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
84 * bute the last N_A-INB_A columns of
85 * A, NB_A > 0.
86 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
87 * row of the matrix A is distributed,
88 * NPROW > RSRC_A >= 0.
89 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
90 * first column of A is distributed.
91 * NPCOL > CSRC_A >= 0.
92 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
93 * array storing the local blocks of
94 * the distributed matrix A,
95 * IF( Lc( 1, N_A ) > 0 )
96 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
97 * ELSE
98 * LLD_A >= 1.
99 *
100 * Let K be the number of rows of a matrix A starting at the global in-
101 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
102 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
103 * receive if these K rows were distributed over NPROW processes. If K
104 * is the number of columns of a matrix A starting at the global index
105 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
106 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
107 * these K columns were distributed over NPCOL processes.
108 *
109 * The values of Lr() and Lc() may be determined via a call to the func-
110 * tion PB_Cnumroc:
111 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
112 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
113 *
114 * Arguments
115 * =========
116 *
117 * TYPE (local input) pointer to a PBTYP_T structure
118 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
119 * that contains type information (See pblas.h).
120 *
121 * UTYP (local input) pointer to a PBTYP_T structure
122 * On entry, UTYP is a pointer to a structure of type PBTYP_T,
123 * that contains type information for the Y's (See pblas.h).
124 *
125 * SIDE (global input) pointer to CHAR
126 * On entry, SIDE specifies whether op( sub( A ) ) multiplies
127 * its operand X from the left or right as follows:
128 *
129 * SIDE = 'L' or 'l' Y := alpha*op( sub( A ) )*X + Y,
130 *
131 * SIDE = 'R' or 'r' Y := alpha*X*op( sub( A ) ) + Y.
132 *
133 * UPLO (global input) pointer to CHAR
134 * On entry, UPLO specifies whether the submatrix sub( A ) is
135 * an upper or lower triangular submatrix as follows:
136 *
137 * UPLO = 'U' or 'u' sub( A ) is an upper triangular
138 * submatrix,
139 *
140 * UPLO = 'L' or 'l' sub( A ) is a lower triangular
141 * submatrix.
142 *
143 * TRANS (global input) pointer to CHAR
144 * On entry, TRANS specifies the operation to be performed as
145 * follows:
146 *
147 * TRANS = 'N' or 'n' Y := alpha * sub( A ) * X + Y,
148 *
149 * TRANS = 'T' or 't' Y := alpha * sub( A )' * X + Y,
150 *
151 * TRANS = 'C' or 'c' Y := alpha * sub( A )' * X + Y, or
152 * Y := alpha * conjg(sub( A )') * X + Y.
153 *
154 * DIAG (global input) pointer to CHAR
155 * On entry, DIAG specifies whether or not sub( A ) is unit
156 * triangular as follows:
157 *
158 * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
159 * gular,
160 *
161 * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
162 * angular.
163 *
164 * N (global input) INTEGER
165 * On entry, N specifies the order of the submatrix sub( A ).
166 * N must be at least zero.
167 *
168 * K (global input) INTEGER
169 * On entry, K specifies the local number of columns of the lo-
170 * cal array X and the local number of rows of the local array
171 * Y when SIDE is 'L' or 'l' and TRANS is 'N' or 'n', or SIDE is
172 * 'R' or 'r' and TRANS is 'T', 't', 'C' or 'c'. Otherwise, K
173 * specifies the local number of rows of the local array X and
174 * the local number of columns of the local array Y. K mut be at
175 * least zero.
176 *
177 * ALPHA (global input) pointer to CHAR
178 * On entry, ALPHA specifies the scalar alpha.
179 *
180 * A (local input) pointer to CHAR
181 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
182 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
183 * the local entries of the matrix A.
184 * Before entry with UPLO = 'U' or 'u', this array contains
185 * the local entries corresponding to the upper triangular part
186 * of the triangular submatrix sub( A ), and the local entries
187 * corresponding to the strictly lower triangular of sub( A )
188 * are not referenced.
189 * Before entry with UPLO = 'L' or 'l', this array contains
190 * the local entries corresponding to the lower triangular part
191 * of the triangular submatrix sub( A ), and the local entries
192 * corresponding to the strictly upper triangular of sub( A )
193 * are not referenced.
194 * Note that when DIAG = 'U' or 'u', the local entries corres-
195 * ponding to the diagonal elements of the submatrix sub( A )
196 * are not referenced either, but are assumed to be unity.
197 *
198 * IA (global input) INTEGER
199 * On entry, IA specifies A's global row index, which points to
200 * the beginning of the submatrix sub( A ).
201 *
202 * JA (global input) INTEGER
203 * On entry, JA specifies A's global column index, which points
204 * to the beginning of the submatrix sub( A ).
205 *
206 * DESCA (global and local input) INTEGER array
207 * On entry, DESCA is an integer array of dimension DLEN_. This
208 * is the array descriptor for the matrix A.
209 *
210 * X (local input) pointer to CHAR
211 * On entry, X is an array of dimension (LDX,Kx), where Kx is
212 * at least Lc( JA, N ) when SIDE is 'L' or 'l' and TRANS is 'N'
213 * or 'n', or SIDE is 'R' or 'r' and TRANS is 'T', 't', 'C' or
214 * 'c', and K otherwise. Before entry, this array contains the
215 * local entries of the matrix X.
216 *
217 * LDX (local input) INTEGER
218 * On entry, LDX specifies the leading dimension of the array X.
219 * LDX must be at least K when SIDE is 'L' or 'l' and TRANS is
220 * 'N' or 'n', or SIDE is 'R' or 'r' and TRANS is 'T', 't', 'C'
221 * or 'c', and max( 1, Lp( IA, N ) ) otherwise.
222 *
223 * Y (local input/local output) pointer to CHAR
224 * On entry, Y is an array of dimension ( LDY, Ky ), where Ky is
225 * at least max( 1, K ) when SIDE is 'L' or 'l' and TRANS is 'N'
226 * or 'n', or SIDE is 'R' or 'r' and TRANS is 'T', 't', 'C' or
227 * 'c', and max( 1, Lc( JA, N ) ) otherwise. Before entry, this
228 * array contains the local entries of the matrix Y. On exit,
229 * this array contains the updated vector Y.
230 *
231 * LDY (local input) INTEGER
232 * On entry, LDY specifies the leading dimension of the array Y.
233 * LDY must be at least max( 1, Lr( IA, N ) ) when SIDE is 'L'
234 * or 'l' and TRANS is 'N' or 'n', or SIDE is 'R' or 'r' and
235 * TRANS is 'T', 't', 'C' or 'c', and max( 1, K ) otherwise.
236 *
237 * TRM (local input) pointer to function of type TZTRM_T
238 * On entry, TRM specifies the function performing the triangu-
239 * lar multiplication of a single block.
240 *
241 * -- Written on April 1, 1998 by
242 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
243 *
244 * ---------------------------------------------------------------------
245 */
246 /*
247 * .. Local Scalars ..
248 */
249  int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq,
250  Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl,
251  IsRowRepl, Xinc, Yinc, XYii=0, XYjj=0, XYoffi=-1, XYoffj=-1,
252  XisRow, iimax, ilow, imbloc, inbloc, ioffd, ioffxy, iupp,
253  jjmax, joffd, joffxy, lcmt, lcmt00, lmbloc, lnbloc, low,
254  lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc,
255  nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper;
256 /* ..
257 * .. Executable Statements ..
258 *
259 */
260 /*
261 * Quick return if possible
262 */
263  if( N <= 0 ) return;
264 /*
265 * Retrieve process grid information
266 */
267  Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol );
268 /*
269 * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
270 */
271  PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1,
272  &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol );
273 /*
274 * Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in
275 * all processes.
276 */
277  if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
278 
279  IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) );
280  IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) );
281  Amb = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[ LLD_ ];
282  size = TYPE->size;
283 
284  if( IsRowRepl && IsColRepl )
285  {
286  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, Amp, Anq, K, 0, ALPHA,
287  Mptr( A, Aii, Ajj, Ald, size ), Ald, X, LDX, Y, LDY );
288  return;
289  }
290 
291  if( Mupcase( SIDE[0] ) == CLEFT )
292  {
293  if( Mupcase( TRANS[0] ) == CNOTRAN )
294  { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->size; }
295  else { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->size; }
296  }
297  else
298  {
299  if( Mupcase( TRANS[0] ) == CNOTRAN )
300  { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->size; }
301  else { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->size; }
302  }
303  upper = ( Mupcase( UPLO[0] ) == CUPPER );
304  lower = ( Mupcase( UPLO[0] ) == CLOWER );
305 /*
306 * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low,
307 * iupp, and upp.
308 */
309  PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00,
310  &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low,
311  &iupp, &upp );
312 
313  iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp );
314  jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq );
315  pmb = ( IsRowRepl ? Amb : nprow * Amb );
316  qnb = ( IsColRepl ? Anb : npcol * Anb );
317 /*
318 * Handle separately the first row and/or column of the LCM table. Update the
319 * LCM value of the curent block lcmt00, as well as the number of rows and
320 * columns mblks and nblks remaining in the LCM table.
321 */
322  GoSouth = ( lcmt00 > iupp );
323  GoEast = ( lcmt00 < ilow );
324 
325  if( XisRow )
326  {
327 /*
328 * Go through the table looking for blocks owning diagonal entries.
329 */
330  if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
331  {
332 /*
333 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
334 */
335  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA,
336  Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
337  Y+XYii*Yinc, LDY );
338 /*
339 * Decide whether one should go south or east in the table: Go east if
340 * the block below the current one only owns lower entries. If this block,
341 * however, owns diagonals, then go south.
342 */
343  GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
344 
345  if( GoSouth )
346  {
347 /*
348 * When the upper triangular part of sub( A ) should be operated with and
349 * one is planning to go south in the table, it is neccessary to take care
350 * of the remaining columns of these imbloc rows immediately.
351 */
352  if( upper && ( Anq > inbloc ) )
353  {
354  tmp1 = Anq - inbloc;
355  TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
356  Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald,
357  X+(XYjj+inbloc)*Xinc, LDX, Y+XYii*Yinc, LDY );
358  }
359  Aii += imbloc; XYii += imbloc; m1 -= imbloc;
360  }
361  else
362  {
363 /*
364 * When the lower triangular part of sub( A ) should be operated with and
365 * one is planning to go east in the table, it is neccessary to take care
366 * of the remaining rows of these inbloc columns immediately.
367 */
368  if( lower && ( Amp > imbloc ) )
369  {
370  tmp1 = Amp - imbloc;
371  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
372  Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald, X+XYjj*Xinc,
373  LDX, Y+(XYii+imbloc)*Yinc, LDY );
374  }
375  Ajj += inbloc; XYjj += inbloc; n1 -= inbloc;
376  }
377  }
378 
379  if( GoSouth )
380  {
381 /*
382 * Go one step south in the LCM table. Adjust the current LCM value as well as
383 * the local row indexes in A and XC.
384 */
385  lcmt00 -= ( iupp - upp + pmb ); mblks--;
386  Aoffi += imbloc; XYoffi += imbloc;
387 /*
388 * While there are blocks remaining that own upper entries, keep going south.
389 * Adjust the current LCM value as well as the local row indexes in A and XC.
390 */
391  while( ( mblks > 0 ) && ( lcmt00 > upp ) )
392  { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
393 /*
394 * Operate with the upper triangular part of sub( A ) we just skipped when
395 * necessary.
396 */
397  tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
398  if( upper && ( tmp1 > 0 ) )
399  {
400  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
401  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
402  LDX, Y+XYii*Yinc, LDY );
403  Aii += tmp1; XYii += tmp1; m1 -= tmp1;
404  }
405 /*
406 * Return if no more row in the LCM table.
407 */
408  if( mblks <= 0 ) return;
409 /*
410 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
411 * Save the current position in the LCM table. After this column has been
412 * completely taken care of, re-start from this row and the next column of
413 * the LCM table.
414 */
415  lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
416 
417  mbloc = Amb;
418  while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
419  {
420 /*
421 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
422 */
423  if( mblkd == 1 ) mbloc = lmbloc;
424  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt,
425  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
426  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
427  lcmt00 = lcmt; lcmt -= pmb;
428  mblks = mblkd; mblkd--;
429  Aoffi = ioffd; XYoffi = ioffxy;
430  ioffd += mbloc; ioffxy += mbloc;
431  }
432 /*
433 * Operate with the lower triangular part of sub( A ).
434 */
435  tmp1 = m1 - ioffd + Aii - 1;
436  if( lower && ( tmp1 > 0 ) )
437  {
438  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
439  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
440  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
441  }
442  tmp1 = Aoffi - Aii + 1;
443  m1 -= tmp1;
444  n1 -= inbloc;
445  lcmt00 += low - ilow + qnb;
446  nblks--;
447  Aoffj += inbloc;
448  XYoffj += inbloc;
449 /*
450 * Operate with the upper triangular part of sub( A ).
451 */
452  if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
453  {
454  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
455  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
456  LDX, Y+XYii*Yinc, LDY );
457  }
458  Aii = Aoffi + 1; Ajj = Aoffj + 1;
459  XYii = XYoffi + 1; XYjj = XYoffj + 1;
460  }
461  else if( GoEast )
462  {
463 /*
464 * Go one step east in the LCM table. Adjust the current LCM value as well as
465 * the local column index in A and XR.
466 */
467  lcmt00 += low - ilow + qnb; nblks--;
468  Aoffj += inbloc; XYoffj += inbloc;
469 /*
470 * While there are blocks remaining that own lower entries, keep going east.
471 * Adjust the current LCM value as well as the local column index in A and XR.
472 */
473  while( ( nblks > 0 ) && ( lcmt00 < low ) )
474  { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; }
475 /*
476 * Operate with the lower triangular part of sub( A ).
477 */
478  tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1;
479  if( lower && ( tmp1 > 0 ) )
480  {
481  TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
482  Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
483  Y+XYii*Yinc, LDY );
484  Ajj += tmp1; XYjj += tmp1; n1 -= tmp1;
485  }
486 /*
487 * Return if no more column in the LCM table.
488 */
489  if( nblks <= 0 ) return;
490 /*
491 * lcmt00 >= low. The current block owns either diagonals or upper entries.
492 * Save the current position in the LCM table. After this row has been
493 * completely taken care of, re-start from this column and the next row of
494 * the LCM table.
495 */
496  lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj;
497 
498  nbloc = Anb;
499  while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
500  {
501 /*
502 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
503 */
504  if( nblkd == 1 ) nbloc = lnbloc;
505  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt,
506  ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald,
507  X+(joffxy+1)*Xinc, LDX, Y+XYii*Yinc, LDY );
508  lcmt00 = lcmt; lcmt += qnb;
509  nblks = nblkd; nblkd--;
510  Aoffj = joffd; XYoffj = joffxy;
511  joffd += nbloc; joffxy += nbloc;
512  }
513 /*
514 * Operate with the upper triangular part of sub( A ).
515 */
516  tmp1 = n1 - joffd + Ajj - 1;
517  if( upper && ( tmp1 > 0 ) )
518  {
519  TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
520  Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+(joffxy+1)*Xinc,
521  LDX, Y+XYii*Yinc, LDY );
522  }
523  tmp1 = Aoffj - Ajj + 1;
524  m1 -= imbloc;
525  n1 -= tmp1;
526  lcmt00 -= ( iupp - upp + pmb );
527  mblks--;
528  Aoffi += imbloc;
529  XYoffi += imbloc;
530 /*
531 * Operate with the lower triangular part of sub( A ).
532 */
533  if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
534  {
535  TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
536  Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
537  Y+(XYoffi+1)*Yinc, LDY );
538  }
539  Aii = Aoffi + 1; Ajj = Aoffj + 1;
540  XYii = XYoffi + 1; XYjj = XYoffj + 1;
541  }
542 /*
543 * Loop over the remaining columns of the LCM table.
544 */
545  nbloc = Anb;
546  while( nblks > 0 )
547  {
548  if( nblks == 1 ) nbloc = lnbloc;
549 /*
550 * While there are blocks remaining that own upper entries, keep going south.
551 * Adjust the current LCM value as well as the local row index in A and XC.
552 */
553  while( ( mblks > 0 ) && ( lcmt00 > upp ) )
554  { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
555 /*
556 * Operate with the upper triangular part of sub( A ).
557 */
558  tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
559  if( upper && ( tmp1 > 0 ) )
560  {
561  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
562  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
563  LDX, Y+XYii*Yinc, LDY );
564  Aii += tmp1;
565  XYii += tmp1;
566  m1 -= tmp1;
567  }
568 /*
569 * Return if no more row in the LCM table.
570 */
571  if( mblks <= 0 ) return;
572 /*
573 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
574 * Save the current position in the LCM table. After this column has been
575 * completely taken care of, re-start from this row and the next column of
576 * the LCM table.
577 */
578  lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
579 
580  mbloc = Amb;
581  while( ( mblkd > 0 ) && ( lcmt >= low ) )
582  {
583 /*
584 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
585 */
586  if( mblkd == 1 ) mbloc = lmbloc;
587  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt,
588  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
589  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
590  lcmt00 = lcmt; lcmt -= pmb;
591  mblks = mblkd; mblkd--;
592  Aoffi = ioffd; XYoffi = ioffxy;
593  ioffd += mbloc; ioffxy += mbloc;
594  }
595 /*
596 * Operate with the lower triangular part of sub( A ).
597 */
598  tmp1 = m1 - ioffd + Aii - 1;
599  if( lower && ( tmp1 > 0 ) )
600  {
601  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA,
602  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
603  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
604  }
605 
606  tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
607  m1 -= tmp1;
608  n1 -= nbloc;
609  lcmt00 += qnb;
610  nblks--;
611  Aoffj += nbloc;
612  XYoffj += nbloc;
613 /*
614 * Operate with the upper triangular part of sub( A ).
615 */
616  if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
617  {
618  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
619  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
620  LDX, Y+XYii*Yinc, LDY );
621  }
622  Aii = Aoffi + 1; Ajj = Aoffj + 1;
623  XYii = XYoffi + 1; XYjj = XYoffj + 1;
624  }
625  }
626  else
627  {
628 /*
629 * Go through the table looking for blocks owning diagonal entries.
630 */
631  if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
632  {
633 /*
634 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
635 */
636  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA,
637  Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX,
638  Y+XYjj*Yinc, LDY );
639 /*
640 * Decide whether one should go south or east in the table: Go east if
641 * the block below the current one only owns lower entries. If this block,
642 * however, owns diagonals, then go south.
643 */
644  GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
645 
646  if( GoSouth )
647  {
648 /*
649 * When the upper triangular part of sub( A ) should be operated with and
650 * one is planning to go south in the table, it is neccessary to take care
651 * of the remaining columns of these imbloc rows immediately.
652 */
653  if( upper && ( Anq > inbloc ) )
654  {
655  tmp1 = Anq - inbloc;
656  TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
657  Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald, X+XYii*Xinc,
658  LDX, Y+(XYjj+inbloc)*Yinc, LDY );
659  }
660  Aii += imbloc; XYii += imbloc; m1 -= imbloc;
661  }
662  else
663  {
664 /*
665 * When the lower triangular part of sub( A ) should be operated with and
666 * one is planning to go east in the table, it is neccessary to take care
667 * of the remaining rows of these inbloc columns immediately.
668 */
669  if( lower && ( Amp > imbloc ) )
670  {
671  tmp1 = Amp - imbloc;
672  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
673  Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald,
674  X+(XYii+imbloc)*Xinc, LDX, Y+XYjj*Yinc, LDY );
675  }
676  Ajj += inbloc; XYjj += inbloc; n1 -= inbloc;
677  }
678  }
679 
680  if( GoSouth )
681  {
682 /*
683 * Go one step south in the LCM table. Adjust the current LCM value as well as
684 * the local row indexes in A and XC.
685 */
686  lcmt00 -= ( iupp - upp + pmb ); mblks--;
687  Aoffi += imbloc; XYoffi += imbloc;
688 /*
689 * While there are blocks remaining that own upper entries, keep going south.
690 * Adjust the current LCM value as well as the local row indexes in A and XC.
691 */
692  while( ( mblks > 0 ) && ( lcmt00 > upp ) )
693  { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
694 /*
695 * Operate with the upper triangular part of sub( A ) we just skipped when
696 * necessary.
697 */
698  tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
699  if( upper && ( tmp1 > 0 ) )
700  {
701  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
702  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
703  Y+(XYoffj+1)*Yinc, LDY );
704  Aii += tmp1; XYii += tmp1; m1 -= tmp1;
705  }
706 /*
707 * Return if no more row in the LCM table.
708 */
709  if( mblks <= 0 ) return;
710 /*
711 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
712 * Save the current position in the LCM table. After this column has been
713 * completely taken care of, re-start from this row and the next column of
714 * the LCM table.
715 */
716  lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
717 
718  mbloc = Amb;
719  while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
720  {
721 /*
722 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
723 */
724  if( mblkd == 1 ) mbloc = lmbloc;
725  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt,
726  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
727  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
728  lcmt00 = lcmt; lcmt -= pmb;
729  mblks = mblkd; mblkd--;
730  Aoffi = ioffd; XYoffi = ioffxy;
731  ioffd += mbloc; ioffxy += mbloc;
732  }
733 /*
734 * Operate with the lower triangular part of sub( A ).
735 */
736  tmp1 = m1 - ioffd + Aii - 1;
737  if( lower && ( tmp1 > 0 ) )
738  {
739  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
740  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
741  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
742  }
743  tmp1 = Aoffi - Aii + 1;
744  m1 -= tmp1;
745  n1 -= inbloc;
746  lcmt00 += low - ilow + qnb;
747  nblks--;
748  Aoffj += inbloc;
749  XYoffj += inbloc;
750 /*
751 * Operate with the upper triangular part of sub( A ).
752 */
753  if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
754  {
755  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
756  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
757  Y+(XYoffj+1)*Yinc, LDY );
758  }
759  Aii = Aoffi + 1; Ajj = Aoffj + 1;
760  XYii = XYoffi + 1; XYjj = XYoffj + 1;
761  }
762  else if( GoEast )
763  {
764 /*
765 * Go one step east in the LCM table. Adjust the current LCM value as well as
766 * the local column index in A and XR.
767 */
768  lcmt00 += low - ilow + qnb; nblks--;
769  Aoffj += inbloc; XYoffj += inbloc;
770 /*
771 * While there are blocks remaining that own lower entries, keep going east.
772 * Adjust the current LCM value as well as the local column index in A and XR.
773 */
774  while( ( nblks > 0 ) && ( lcmt00 < low ) )
775  { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; }
776 /*
777 * Operate with the lower triangular part of sub( A ).
778 */
779  tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1;
780  if( lower && ( tmp1 > 0 ) )
781  {
782  TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
783  Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX,
784  Y+XYjj*Yinc, LDY );
785  Ajj += tmp1; XYjj += tmp1; n1 -= tmp1;
786  }
787 /*
788 * Return if no more column in the LCM table.
789 */
790  if( nblks <= 0 ) return;
791 /*
792 * lcmt00 >= low. The current block owns either diagonals or upper entries.
793 * Save the current position in the LCM table. After this row has been
794 * completely taken care of, re-start from this column and the next row of
795 * the LCM table.
796 */
797  lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj;
798 
799  nbloc = Anb;
800  while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
801  {
802 /*
803 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
804 */
805  if( nblkd == 1 ) nbloc = lnbloc;
806  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt,
807  ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc,
808  LDX, Y+(joffxy+1)*Yinc, LDY );
809  lcmt00 = lcmt; lcmt += qnb;
810  nblks = nblkd; nblkd--;
811  Aoffj = joffd; XYoffj = joffxy;
812  joffd += nbloc; joffxy += nbloc;
813  }
814 /*
815 * Operate with the upper triangular part of sub( A ).
816 */
817  tmp1 = n1 - joffd + Ajj - 1;
818  if( upper && ( tmp1 > 0 ) )
819  {
820  TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
821  Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
822  Y+(joffxy+1)*Yinc, LDY );
823  }
824  tmp1 = Aoffj - Ajj + 1;
825  m1 -= imbloc;
826  n1 -= tmp1;
827  lcmt00 -= ( iupp - upp + pmb );
828  mblks--;
829  Aoffi += imbloc;
830  XYoffi += imbloc;
831 /*
832 * Operate with the lower triangular part of sub( A ).
833 */
834  if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
835  {
836  TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
837  Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+(XYoffi+1)*Xinc,
838  LDX, Y+XYjj*Yinc, LDY );
839  }
840  Aii = Aoffi + 1; Ajj = Aoffj + 1;
841  XYii = XYoffi + 1; XYjj = XYoffj + 1;
842  }
843 /*
844 * Loop over the remaining columns of the LCM table.
845 */
846  nbloc = Anb;
847  while( nblks > 0 )
848  {
849  if( nblks == 1 ) nbloc = lnbloc;
850 /*
851 * While there are blocks remaining that own upper entries, keep going south.
852 * Adjust the current LCM value as well as the local row index in A and XC.
853 */
854  while( ( mblks > 0 ) && ( lcmt00 > upp ) )
855  { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
856 /*
857 * Operate with the upper triangular part of sub( A ).
858 */
859  tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
860  if( upper && ( tmp1 > 0 ) )
861  {
862  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
863  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
864  Y+(XYoffj+1)*Yinc, LDY );
865  Aii += tmp1;
866  XYii += tmp1;
867  m1 -= tmp1;
868  }
869 /*
870 * Return if no more row in the LCM table.
871 */
872  if( mblks <= 0 ) return;
873 /*
874 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
875 * Save the current position in the LCM table. After this column has been
876 * completely taken care of, re-start from this row and the next column of
877 * the LCM table.
878 */
879  lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
880 
881  mbloc = Amb;
882  while( ( mblkd > 0 ) && ( lcmt >= low ) )
883  {
884 /*
885 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
886 */
887  if( mblkd == 1 ) mbloc = lmbloc;
888  TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt,
889  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
890  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
891  lcmt00 = lcmt; lcmt -= pmb;
892  mblks = mblkd; mblkd--;
893  Aoffi = ioffd; XYoffi = ioffxy;
894  ioffd += mbloc; ioffxy += mbloc;
895  }
896 /*
897 * Operate with the lower triangular part of sub( A ).
898 */
899  tmp1 = m1 - ioffd + Aii - 1;
900  if( lower && ( tmp1 > 0 ) )
901  {
902  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA,
903  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
904  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
905  }
906 
907  tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
908  m1 -= tmp1;
909  n1 -= nbloc;
910  lcmt00 += qnb;
911  nblks--;
912  Aoffj += nbloc;
913  XYoffj += nbloc;
914 /*
915 * Operate with the upper triangular part of sub( A ).
916 */
917  if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
918  {
919  TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
920  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
921  Y+(XYoffj+1)*Yinc, LDY );
922  }
923  Aii = Aoffi + 1; Ajj = Aoffj + 1;
924  XYii = XYoffi + 1; XYjj = XYoffj + 1;
925  }
926  }
927 /*
928 * End of PB_Cptrm
929 */
930 }
TYPE
#define TYPE
Definition: clamov.c:7
PB_Cptrm
void PB_Cptrm(PBTYP_T *TYPE, PBTYP_T *UTYP, char *SIDE, char *UPLO, char *TRANS, char *DIAG, int N, int K, char *ALPHA, char *A, int IA, int JA, int *DESCA, char *X, int LDX, char *Y, int LDY, TZTRM_T TRM)
Definition: PB_Cptrm.c:26
MB_
#define MB_
Definition: PBtools.h:43
NB_
#define NB_
Definition: PBtools.h:44
PB_Cainfog2l
void PB_Cainfog2l()
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cbinfo
void PB_Cbinfo()
CLOWER
#define CLOWER
Definition: PBblas.h:25
TZTRM_T
void(* TZTRM_T)()
Definition: pblas.h:427
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
PBTYP_T::size
int size
Definition: pblas.h:329
ALL
#define ALL
Definition: PBblas.h:50
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CUPPER
#define CUPPER
Definition: PBblas.h:26
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CLEFT
#define CLEFT
Definition: PBblas.h:29
CTXT_
#define CTXT_
Definition: PBtools.h:38