ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cptrsm.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_Cptrsm( PBTYP_T * TYPE, int FBCAST, char * SIDE, char * UPLO,
21  char * TRANS, char * DIAG, int M, int N, char * ALPHA,
22  char * A, int IA, int JA, int * DESCA, char * BC,
23  int LDBC, char * BR, int LDBR )
24 #else
25 void PB_Cptrsm( TYPE, FBCAST, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA,
26  A, IA, JA, DESCA, BC, LDBC, BR, LDBR )
27 /*
28 * .. Scalar Arguments ..
29 */
30  char * ALPHA, * DIAG, * SIDE, * TRANS, * UPLO;
31  int FBCAST, IA, JA, LDBC, LDBR, M, N;
32  PBTYP_T * TYPE;
33 /*
34 * .. Array Arguments ..
35 */
36  int * DESCA;
37  char * A, * BC, * BR;
38 #endif
39 {
40 /*
41 * Purpose
42 * =======
43 *
44 * PB_Cptrsm solves one of the matrix equations
45 *
46 * op( sub( A ) ) * X = B, or X * op( sub( A ) ) = alpha * B,
47 *
48 * where
49 *
50 * sub( A ) denotes A(IA:IA+M-1,JA:JA+M-1) if SIDE = 'L',
51 * A(IA:IA+N-1,JA:JA+N-1) if SIDE = 'R'.
52 *
53 * X and B are m by n submatrices, sub( A ) is a unit, or non-unit,
54 * upper or lower triangular submatrix and op( Y ) is one of
55 *
56 * op( Y ) = Y or op( Y ) = Y' or op( Y ) = conjg( Y' ).
57 *
58 * The submatrix X is overwritten on B.
59 *
60 * No test for singularity or near-singularity is included in this
61 * routine. Such tests must be performed before calling this routine.
62 *
63 * Notes
64 * =====
65 *
66 * A description vector is associated with each 2D block-cyclicly dis-
67 * tributed matrix. This vector stores the information required to
68 * establish the mapping between a matrix entry and its corresponding
69 * process and memory location.
70 *
71 * In the following comments, the character _ should be read as
72 * "of the distributed matrix". Let A be a generic term for any 2D
73 * block cyclicly distributed matrix. Its description vector is DESC_A:
74 *
75 * NOTATION STORED IN EXPLANATION
76 * ---------------- --------------- ------------------------------------
77 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
79 * the NPROW x NPCOL BLACS process grid
80 * A is distributed over. The context
81 * itself is global, but the handle
82 * (the integer value) may vary.
83 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
84 * ted matrix A, M_A >= 0.
85 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
86 * buted matrix A, N_A >= 0.
87 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
88 * block of the matrix A, IMB_A > 0.
89 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
90 * left block of the matrix A,
91 * INB_A > 0.
92 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
93 * bute the last M_A-IMB_A rows of A,
94 * MB_A > 0.
95 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
96 * bute the last N_A-INB_A columns of
97 * A, NB_A > 0.
98 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
99 * row of the matrix A is distributed,
100 * NPROW > RSRC_A >= 0.
101 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
102 * first column of A is distributed.
103 * NPCOL > CSRC_A >= 0.
104 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
105 * array storing the local blocks of
106 * the distributed matrix A,
107 * IF( Lc( 1, N_A ) > 0 )
108 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
109 * ELSE
110 * LLD_A >= 1.
111 *
112 * Let K be the number of rows of a matrix A starting at the global in-
113 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115 * receive if these K rows were distributed over NPROW processes. If K
116 * is the number of columns of a matrix A starting at the global index
117 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
118 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
119 * these K columns were distributed over NPCOL processes.
120 *
121 * The values of Lr() and Lc() may be determined via a call to the func-
122 * tion PB_Cnumroc:
123 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125 *
126 * Arguments
127 * =========
128 *
129 * TYPE (local input) pointer to a PBTYP_T structure
130 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
131 * that contains type information (See pblas.h).
132 *
133 * FBCAST (global input) INTEGER
134 * On entry, FBCAST specifies whether the transposed of the vec-
135 * tor solution should be broadcast or not when there is a pos-
136 * sible ambiguity, i.e. when sub( A ) is just one block. When
137 * FBCAST is zero, the solution vector is not broadcast, and the
138 * the solution vector is broadcast otherwise.
139 *
140 * SIDE (global input) pointer to CHAR
141 * On entry, SIDE specifies whether op( sub( A ) ) appears on
142 * the left or right of X as follows:
143 *
144 * SIDE = 'L' or 'l' op( sub( A ) ) * X = B,
145 *
146 * SIDE = 'R' or 'r' X * op( sub( A ) ) = B.
147 *
148 * UPLO (global input) pointer to CHAR
149 * On entry, UPLO specifies whether the submatrix sub( A ) is
150 * an upper or lower triangular submatrix as follows:
151 *
152 * UPLO = 'U' or 'u' sub( A ) is an upper triangular
153 * submatrix,
154 *
155 * UPLO = 'L' or 'l' sub( A ) is a lower triangular
156 * submatrix.
157 *
158 * TRANS (global input) pointer to CHAR
159 * On entry, TRANS specifies the operation to be performed as
160 * follows:
161 *
162 * TRANS = 'N' or 'n' sub( A ) * X = B,
163 *
164 * TRANS = 'T' or 't' sub( A )' * X = B,
165 *
166 * TRANS = 'C' or 'c' conjg( sub( A )' ) * X = B.
167 *
168 * DIAG (global input) pointer to CHAR
169 * On entry, DIAG specifies whether or not sub( A ) is unit
170 * triangular as follows:
171 *
172 * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian-
173 * gular,
174 *
175 * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri-
176 * angular.
177 *
178 * M (global input) INTEGER
179 * On entry, M specifies the number of rows of the submatrix B.
180 * M must be at least zero.
181 *
182 * N (global input) INTEGER
183 * On entry, N specifies the number of columns of the submatrix
184 * B. N must be at least zero.
185 *
186 * A (local input) pointer to CHAR
187 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
188 * at least Lc( 0, JA+M-1 ) when SIDE = 'L' or 'l' and is at
189 * least Lc( 0, JA+N-1 ) otherwise. Before entry, this array
190 * contains the local entries of the matrix A.
191 * Before entry with UPLO = 'U' or 'u', this array contains the
192 * local entries corresponding to the entries of the upper tri-
193 * angular submatrix sub( A ), and the local entries correspon-
194 * ding to the entries of the strictly lower triangular part of
195 * the submatrix sub( A ) are not referenced.
196 * Before entry with UPLO = 'L' or 'l', this array contains the
197 * local entries corresponding to the entries of the lower tri-
198 * angular submatrix sub( A ), and the local entries correspon-
199 * ding to the entries of the strictly upper triangular part of
200 * the submatrix sub( A ) are not referenced.
201 * Note that when DIAG = 'U' or 'u', the local entries corres-
202 * ponding to the diagonal elements of the submatrix sub( A )
203 * are not referenced either, but are assumed to be unity.
204 *
205 * IA (global input) INTEGER
206 * On entry, IA specifies A's global row index, which points to
207 * the beginning of the submatrix sub( A ).
208 *
209 * JA (global input) INTEGER
210 * On entry, JA specifies A's global column index, which points
211 * to the beginning of the submatrix sub( A ).
212 *
213 * DESCA (global and local input) INTEGER array
214 * On entry, DESCA is an integer array of dimension DLEN_. This
215 * is the array descriptor for the matrix A.
216 *
217 * BC (local input/local output) pointer to CHAR
218 * On entry, BC is an array of dimension (LDBC,Kbc), where Kbc
219 * is at least N when SIDE is 'L' or 'l' and at least M other-
220 * wise. Before entry, when SIDE is 'L' or 'l' and TRANS is 'N'
221 * or 'n' or SIDE is 'R' or 'r' and TRANS is not 'N' or 'n',
222 * this array contains the local entries of the right-hand-side
223 * matrix B. Otherwise, the entries of BC should be zero. On
224 * exit, this array contains the partial solution matrix X.
225 *
226 * LDBC (local input) INTEGER
227 * On entry, LDBC specifies the leading dimension of the array
228 * BC. LDBC must be at least MAX( 1, Lr( IA, M ) ) when SIDE
229 * is 'L' or 'l' and at least MAX( 1, Lr( IA, N ) ) otherwise.
230 *
231 * BR (local input/local output) pointer to CHAR
232 * On entry, BR is an array of dimension (LDBR,Kbr), where Kbr
233 * is at least Lc( JA, M ) when SIDE is 'L' or 'l' and at least
234 * Lc( JA, N ) otherwise. Before entry, when SIDE is 'L' or 'l'
235 * and TRANS is 'N' or 'n' or SIDE is 'R' or 'r' and TRANS is
236 * not 'N' or 'n', the entries of BR should be zero. Otherwise,
237 * this array contains the local entries of the right-hand-side
238 * matrix B. On exit, this array contains the partial solution
239 * matrix X.
240 *
241 * LDBR (local input) INTEGER
242 * On entry, LDBR specifies the leading dimension of the array
243 * BR. LDBR must be at least MAX( 1, N ) when SIDE is 'L' or 'l'
244 * and at least MAX( 1, M ) otherwise.
245 *
246 * -- Written on April 1, 1998 by
247 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
248 *
249 * ---------------------------------------------------------------------
250 */
251 /*
252 * .. Local Scalars ..
253 */
254  char btop, * negone, * one, * talpha1, * talpha2, * zero;
255  int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep,
256  AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp,
257  Anq, Arow, Asrc, ChangeRoc=0, LNorRT, Na, Nb, bcst, ctxt,
258  izero=0, k=0, kb, kbprev=0, kbsize, lside, mb1, mycol, myrow,
259  n1, n1last, n1p, n1pprev=0, nb1, nlast, notran, npcol, nprow,
260  rocprev, size, tmp1, tmp2;
261  MMADD_T add, tadd;
262  TZPAD_T pad;
263  GEMM_T gemm;
264  TRSM_T trsm;
265  GESD2D_T send;
266  GERV2D_T recv;
267  GEBS2D_T bsend;
268  GEBR2D_T brecv;
269 /*
270 * .. Local Arrays ..
271 */
272  char * Aprev = NULL, * Bd = NULL, * Bdprev = NULL,
273  * Bprev = NULL, * work = NULL;
274 /* ..
275 * .. Executable Statements ..
276 *
277 */
278  if( ( M <= 0 ) || ( N <= 0 ) ) return;
279 /*
280 * Retrieve process grid information
281 */
282  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
283 
284  lside = ( Mupcase( SIDE [0] ) == CLEFT );
285  notran = ( Mupcase( TRANS[0] ) == CNOTRAN );
286  LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
287  if( LNorRT ) { Na = M; Nb = N; } else { Na = N; Nb = M; }
288 /*
289 * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
290 */
291  PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow,
292  &Acol );
293 /*
294 * Determine if sub( A ) spans more than one process row, and/or more than one
295 * process column.
296 */
297  Amb = DESCA[MB_]; Anb = DESCA[NB_]; Ald = DESCA[LLD_ ];
298  Aimb1 = PB_Cfirstnb( Na, IA, DESCA[IMB_], Amb );
299  Anp = PB_Cnumroc( Na, 0, Aimb1, Amb, myrow, Arow, nprow );
300  Ais1Row = !( PB_Cspan( Na, 0, Aimb1, Amb, Arow, nprow ) );
301  Ainb1 = PB_Cfirstnb( Na, JA, DESCA[INB_], Anb );
302  Anq = PB_Cnumroc( Na, 0, Ainb1, Anb, mycol, Acol, npcol );
303  Ais1Col = !( PB_Cspan( Na, 0, Ainb1, Anb, Acol, npcol ) );
304 /*
305 * When sub( A ) spans only one process, solve the system locally and return.
306 */
307  if( Ais1Row && Ais1Col )
308  {
309  if( LNorRT )
310  {
311  if( Anq > 0 )
312  {
313  if( Anp > 0 )
314  {
315  TYPE->Ftrsm( C2F_CHAR( ( notran ? SIDE : ( lside ? RIGHT :
316  LEFT ) ) ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
317  C2F_CHAR( DIAG ), &M, &N, ALPHA, Mptr( A, Aii, Ajj,
318  Ald, TYPE->size ), &Ald, BC, &LDBC );
319  TYPE->Fmmtadd( &M, &N, TYPE->one, BC, &LDBC, TYPE->zero, BR,
320  &LDBR );
321  }
322  if( ( Arow >= 0 ) && FBCAST )
323  {
324  btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
325  if( myrow == Arow )
326  TYPE->Cgebs2d( ctxt, COLUMN, &btop, N, M, BR, LDBR );
327  else
328  TYPE->Cgebr2d( ctxt, COLUMN, &btop, N, M, BR, LDBR, Arow,
329  mycol );
330  }
331  }
332  }
333  else
334  {
335  if( Anp > 0 )
336  {
337  if( Anq > 0 )
338  {
339  TYPE->Ftrsm( C2F_CHAR( ( notran ? SIDE : ( lside ? RIGHT :
340  LEFT ) ) ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
341  C2F_CHAR( DIAG ), &M, &N, ALPHA, Mptr( A, Aii, Ajj,
342  Ald, TYPE->size ), &Ald, BR, &LDBR );
343  TYPE->Fmmtadd( &M, &N, TYPE->one, BR, &LDBR, TYPE->zero, BC,
344  &LDBC );
345  }
346  if( ( Acol >= 0 ) && FBCAST )
347  {
348  btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
349  if( mycol == Acol )
350  TYPE->Cgebs2d( ctxt, ROW, &btop, N, M, BC, LDBC );
351  else
352  TYPE->Cgebr2d( ctxt, ROW, &btop, N, M, BC, LDBC, myrow,
353  Acol );
354  }
355  }
356  }
357  return;
358  }
359 /*
360 * Retrieve from TYPE structure useful BLAS and BLACS functions.
361 */
362  size = TYPE->size;
363  negone = TYPE->negone; one = TYPE->one; zero = TYPE->zero;
364  add = TYPE->Fmmadd; tadd = TYPE->Fmmtadd; pad = TYPE->Ftzpad;
365  gemm = TYPE->Fgemm; trsm = TYPE->Ftrsm;
366  send = TYPE->Cgesd2d; recv = TYPE->Cgerv2d;
367  bsend = TYPE->Cgebs2d; brecv = TYPE->Cgebr2d;
368 
369  if( ( Anp > 0 ) && ( Anq > 0 ) ) A = Mptr( A, Aii, Ajj, Ald, size );
370 
371  if( LNorRT )
372  {
373 /*
374 * Left - No tran or Right - (co)Trans
375 */
376  if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) &&
377  ( myrow != Arow ) ) ) ) return;
378  btop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
379  bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) );
380  AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) );
381 
382  if( Mupcase( UPLO[0] ) == CUPPER )
383  {
384 /*
385 * Initiate lookahead
386 */
387  nlast = ( npcol - 1 ) * Anb;
388  n1 = MAX( nlast, Anb );
389  nlast += Ainb1;
390  n1last = n1 - Anb + MAX( Ainb1, Anb );
391  work = PB_Cmalloc( Nb * MIN( n1last, Anp ) * size );
392  tmp1 = Na-1;
393  Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
394  Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
395  rocprev = Alcol; Anpprev = Anp; Bprev = BC; Bdprev = BR;
396  Aprev = A = Mptr( A, 0, Anq, Ald, size );
397  mb1 = PB_Clastnb( Na, 0, Aimb1, Amb );
398  nb1 = PB_Clastnb( Na, 0, Ainb1, Anb );
399  tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
400  n1 = ( ( Ais1Col || ( Na - nb1 < nlast ) ) ? n1last : n1 );
401  tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
402  Asrc = Arow;
403  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
404  nprow );
405  talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Alcol ) ) ?
406  ALPHA : one );
407  while( Na > 0 )
408  {
409  kbsize = kb * size;
410 
411  if( Ais1Col || ( mycol == Alcol ) )
412  { A -= Ald*kbsize; Anq -= kb; Bd = Mptr( BR, 0, Anq, LDBR, size ); }
413  if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; }
414 /*
415 * Partial update of previous block
416 */
417  if( n1pprev > 0 )
418  {
419  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
420  {
421  tmp1 = ( Anpprev - n1pprev ) * size;
422  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &n1pprev, &Nb,
423  &kbprev, negone, Aprev+tmp1, &Ald, Bdprev, &LDBR,
424  talpha1, Bprev+tmp1, &LDBC );
425  }
426 /*
427 * Send partial updated result to current column
428 */
429  if( !( Ais1Col ) && ChangeRoc )
430  {
431  if( mycol == rocprev )
432  {
433  send( ctxt, n1pprev, Nb, Mptr( Bprev, Anpprev-n1pprev, 0,
434  LDBC, size ), LDBC, myrow, Alcol );
435  }
436  else if( mycol == Alcol )
437  {
438  recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev );
439  add( &n1pprev, &Nb, one, work, &n1pprev, one, Mptr( Bprev,
440  Anpprev-n1pprev, 0, LDBC, size ), &LDBC );
441  }
442  }
443  }
444 /*
445 * Solve current diagonal block
446 */
447  if( Ais1Col || ( mycol == Alcol ) )
448  {
449  if( AisRowRep || ( myrow == Alrow ) )
450  {
451  trsm( C2F_CHAR( LEFT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
452  C2F_CHAR( DIAG ), &kb, &Nb, talpha2, Mptr( A, Anp, 0,
453  Ald, size ), &Ald, Mptr( BC, Anp, 0, LDBC, size ),
454  &LDBC );
455  tadd( &kb, &Nb, one, Mptr( BC, Anp, 0, LDBC, size ), &LDBC,
456  zero, Mptr( BR, 0, Anq, LDBR, size ), &LDBR );
457  }
458  if( bcst )
459  {
460  if( myrow == Alrow )
461  bsend( ctxt, COLUMN, &btop, Nb, kb, Mptr( BR, 0, Anq, LDBR,
462  size ), LDBR );
463  else
464  brecv( ctxt, COLUMN, &btop, Nb, kb, Mptr( BR, 0, Anq, LDBR,
465  size ), LDBR, Alrow, mycol );
466  }
467  talpha2 = one;
468  }
469  else
470  {
471  if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) )
472  pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kb, &Nb, &izero,
473  zero, zero, Mptr( BC, Anp, 0, LDBC, size ), &LDBC );
474  }
475 /*
476 * Finish previous update
477 */
478  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
479  {
480  if( ( tmp1 = Anpprev - n1pprev ) > 0 )
481  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &tmp1, &Nb,
482  &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1,
483  Bprev, &LDBC );
484  talpha1 = one;
485  }
486 /*
487 * Save info of current step and update info for the next step
488 */
489  if( Ais1Col || ( mycol == Alcol ) ) { Bdprev = Bd; Aprev = A; }
490  if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; }
491 
492  n1pprev = n1p;
493  rocprev = Alcol;
494  kbprev = kb;
495  k += kb;
496  Na -= kb;
497 
498  mb1 -= kb;
499  if( mb1 == 0 )
500  {
501  if( !( Ais1Row ) && ( Alrow >= 0 ) )
502  Alrow = MModSub1( Alrow, nprow );
503  mb1 = ( Na > Aimb1 ? Amb : Aimb1 );
504  }
505 
506  nb1 -= kb;
507  ChangeRoc = ( nb1 == 0 );
508 
509  if( ChangeRoc )
510  {
511  if( !( Ais1Col ) && ( Alcol >= 0 ) )
512  Alcol = MModSub1( Alcol, npcol );
513  nb1 = ( Na > Ainb1 ? Anb : Ainb1 );
514  }
515  tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
516  n1 = ( ( Ais1Col || ( Na-nb1 < nlast ) ) ? n1last : n1 );
517  tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
518  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
519  nprow );
520  }
521  }
522  else
523  {
524 /*
525 * Initiate lookahead
526 */
527  n1 = ( MAX( npcol, 2 ) - 1 ) * Anb;
528  work = PB_Cmalloc( Nb*MIN( n1, Anp )*size );
529  Aprev = A; Bprev = BC, Bdprev = BR; Anpprev = Anp;
530  mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol;
531  tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
532  Asrc = Arow;
533  n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc,
534  nprow );
535  talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Acol ) ) ?
536  ALPHA : one );
537  while( kb > 0 )
538  {
539  kbsize = kb * size;
540 /*
541 * Partial update of previous block
542 */
543  if( n1pprev > 0 )
544  {
545  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
546  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &n1pprev, &Nb,
547  &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1,
548  Bprev, &LDBC );
549 /*
550 * Send partial updated result to current column
551 */
552  if( !( Ais1Col ) && ChangeRoc )
553  {
554  if( mycol == rocprev )
555  {
556  send( ctxt, n1pprev, Nb, Bprev, LDBC, myrow, Acol );
557  }
558  else if( mycol == Acol )
559  {
560  recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev );
561  add( &n1pprev, &Nb, one, work, &n1pprev, one, Bprev,
562  &LDBC );
563  }
564  }
565  }
566 /*
567 * Solve current diagonal block
568 */
569  if( Ais1Col || ( mycol == Acol ) )
570  {
571  if( AisRowRep || ( myrow == Arow ) )
572  {
573  trsm( C2F_CHAR( LEFT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
574  C2F_CHAR( DIAG ), &kb, &Nb, talpha2, A, &Ald, BC,
575  &LDBC );
576  tadd( &kb, &Nb, one, BC, &LDBC, zero, BR, &LDBR );
577  }
578  if( bcst )
579  {
580  if( myrow == Arow )
581  bsend( ctxt, COLUMN, &btop, Nb, kb, BR, LDBR );
582  else
583  brecv( ctxt, COLUMN, &btop, Nb, kb, BR, LDBR, Arow,
584  mycol );
585  }
586  talpha2 = one;
587  }
588  else
589  {
590  if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) )
591  pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &kb, &Nb, &izero,
592  zero, zero, BC, &LDBC );
593  }
594 /*
595 * Finish previous update
596 */
597  if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
598  {
599  if( ( tmp1 = Anpprev - n1pprev ) > 0 )
600  {
601  tmp2 = n1pprev * size;
602  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRAN ), &tmp1, &Nb,
603  &kbprev, negone, Aprev+tmp2, &Ald, Bdprev, &LDBR,
604  talpha1, Bprev+tmp2, &LDBC );
605  }
606  Aprev += Ald * kbprev * size; talpha1 = one;
607  }
608 /*
609 * Save info of current step and update info for the next step
610 */
611  if( Ais1Col || ( mycol == Acol ) )
612  { A += Ald*kbsize; Bdprev = Bd = BR; BR += LDBR*kbsize; }
613  if( AisRowRep || ( myrow == Arow ) )
614  {
615  Bprev = ( BC += kbsize );
616  A += kbsize;
617  Aprev += kbsize;
618  Anpprev = ( Anp -= kb );
619  }
620  n1pprev = n1p;
621  rocprev = Acol;
622  kbprev = kb;
623  k += kb;
624  Na -= kb;
625 
626  mb1 -= kb;
627  if( mb1 == 0 )
628  {
629  if( !( Ais1Row ) && ( Arow >= 0 ) )
630  Arow = MModAdd1( Arow, nprow );
631  mb1 = MIN( Amb, Na );
632  }
633 
634  nb1 -= kb;
635  ChangeRoc = ( nb1 == 0 );
636 
637  if( ChangeRoc )
638  {
639  if( !( Ais1Col ) && ( Acol >= 0 ) )
640  Acol = MModAdd1( Acol, npcol );
641  nb1 = MIN( Anb, Na );
642  }
643  tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
644  n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k + kb, Aimb1, Amb, myrow,
645  Asrc, nprow );
646  }
647  }
648  }
649  else
650  {
651 /*
652 * Right - No tran or Left - (co)Trans
653 */
654  if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) &&
655  ( mycol != Acol ) ) ) ) return;
656  btop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
657  bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) );
658  AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) );
659 
660  if( Mupcase( UPLO[0] ) == CUPPER )
661  {
662 /*
663 * Initiate lookahead
664 */
665  n1 = ( MAX( nprow, 2 ) - 1 ) * Amb;
666  work = PB_Cmalloc( Nb*MIN( n1, Anq )*size );
667  Aprev = A; Bprev = BR, Bdprev = BC; Anpprev = Anq;
668  mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow;
669  tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
670  Asrc = Acol;
671  n1p = PB_Cnumroc( MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc,
672  npcol );
673  talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Arow ) ) ?
674  ALPHA : one );
675  while( kb > 0 )
676  {
677  kbsize = kb * size;
678 /*
679 * Partial update of previous block
680 */
681  if( n1pprev > 0 )
682  {
683  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
684  gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &n1pprev,
685  &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1,
686  Bprev, &LDBR );
687 /*
688 * Send partial updated result to current row
689 */
690  if( !( Ais1Row ) && ChangeRoc )
691  {
692  if( myrow == rocprev )
693  {
694  send( ctxt, Nb, n1pprev, Bprev, LDBR, Arow, mycol );
695  }
696  else if( myrow == Arow )
697  {
698  recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol );
699  add( &Nb, &n1pprev, one, work, &Nb, one, Bprev, &LDBR );
700  }
701  }
702  }
703 /*
704 * Solve current diagonal block
705 */
706  if( Ais1Row || ( myrow == Arow ) )
707  {
708  if( AisColRep || ( mycol == Acol ) )
709  {
710  trsm( C2F_CHAR( RIGHT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
711  C2F_CHAR( DIAG ), &Nb, &kb, talpha2, A, &Ald, BR,
712  &LDBR );
713  tadd( &Nb, &kb, one, BR, &LDBR, zero, BC, &LDBC );
714  }
715  if( bcst )
716  {
717  if( mycol == Acol )
718  bsend( ctxt, ROW, &btop, kb, Nb, BC, LDBC );
719  else
720  brecv( ctxt, ROW, &btop, kb, Nb, BC, LDBC, myrow, Acol );
721  }
722  talpha2 = one;
723  }
724  else
725  {
726  if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) )
727  pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Nb, &kb, &izero,
728  zero, zero, BR, &LDBR );
729  }
730 /*
731 * Finish previous update
732 */
733  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
734  {
735  if( ( tmp1 = Anpprev - n1pprev ) > 0 )
736  {
737  tmp2 = n1pprev * size;
738  gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &tmp1,
739  &kbprev, negone, Bdprev, &LDBC, Aprev+Ald*tmp2, &Ald,
740  talpha1, Bprev+LDBR*tmp2, &LDBR );
741  }
742  Aprev += kbprev * size; talpha1 = one;
743  }
744 /*
745 * Save info of current step and update info for the next step
746 */
747  if( Ais1Row || ( myrow == Arow ) )
748  { A += kbsize; Bdprev = Bd = BC; BC += kbsize; }
749  if( AisColRep || ( mycol == Acol ) )
750  {
751  Bprev = ( BR += LDBR * kbsize );
752  A += Ald * kbsize;
753  Anpprev = ( Anq -= kb );
754  Aprev += Ald * kbsize;
755  }
756  n1pprev = n1p;
757  rocprev = Arow;
758  kbprev = kb;
759  k += kb;
760  Na -= kb;
761 
762  nb1 -= kb;
763  if( nb1 == 0 )
764  {
765  if( !( Ais1Col ) && ( Acol >= 0 ) )
766  Acol = MModAdd1( Acol, npcol );
767  nb1 = MIN( Anb, Na );
768  }
769 
770  mb1 -= kb;
771  ChangeRoc = ( mb1 == 0 );
772 
773  if( ChangeRoc )
774  {
775  if( !( Ais1Row ) && ( Arow >= 0 ) )
776  Arow = MModAdd1( Arow, nprow );
777  mb1 = MIN( Amb, Na );
778  }
779  tmp1 = Na - ( kb = MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
780  n1p = PB_Cnumroc( MIN( tmp2, tmp1 ), k + kb, Ainb1, Anb, mycol,
781  Asrc, npcol );
782  }
783  }
784  else
785  {
786 /*
787 * Initiate lookahead
788 */
789  nlast = ( nprow - 1 ) * Amb;
790  n1 = MAX( nlast, Amb );
791  nlast += Aimb1;
792  n1last = n1 - Amb + MAX( Aimb1, Amb );
793  work = PB_Cmalloc( Nb * MIN( n1last, Anq ) * size );
794  tmp1 = Na-1;
795  Alrow = PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
796  Alcol = PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
797  rocprev = Alrow; Anpprev = Anq; Bprev = BR; Bdprev = BC;
798  Aprev = A = Mptr( A, Anp, 0, Ald, size );
799  mb1 = PB_Clastnb( Na, 0, Aimb1, Amb );
800  nb1 = PB_Clastnb( Na, 0, Ainb1, Anb );
801  tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
802  n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 );
803  tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
804  Asrc = Acol;
805  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
806  npcol );
807  talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Alrow ) ) ?
808  ALPHA : one );
809  while( Na > 0 )
810  {
811  kbsize = kb * size;
812 
813  if( Ais1Row || ( myrow == Alrow ) )
814  { A -= kbsize; Anp -= kb; Bd = Mptr( BC, Anp, 0, LDBC, size ); }
815  if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; }
816 /*
817 * Partial update of previous block
818 */
819  if( n1pprev > 0 )
820  {
821  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
822  {
823  tmp1 = ( Anpprev - n1pprev ) * size;
824  TYPE->Fgemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ),
825  &Nb, &n1pprev, &kbprev, negone, Bdprev,
826  &LDBC, Aprev+Ald*tmp1, &Ald, talpha1,
827  Bprev+LDBR*tmp1, &LDBR );
828  }
829 /*
830 * Send partial updated result to current row
831 */
832  if( !( Ais1Row ) && ChangeRoc )
833  {
834  if( myrow == rocprev )
835  {
836  send( ctxt, Nb, n1pprev, Mptr( Bprev, 0, Anpprev-n1pprev,
837  LDBR, size ), LDBR, Alrow, mycol );
838  }
839  else if( myrow == Alrow )
840  {
841  recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol );
842  add( &Nb, &n1pprev, one, work, &Nb, one, Mptr( Bprev, 0,
843  Anpprev-n1pprev, LDBR, size ), &LDBR );
844  }
845  }
846  }
847 /*
848 * Solve current diagonal block
849 */
850  if( Ais1Row || ( myrow == Alrow ) )
851  {
852  if( AisColRep || ( mycol == Alcol ) )
853  {
854  trsm( C2F_CHAR( RIGHT ), C2F_CHAR( UPLO ), C2F_CHAR( NOTRAN ),
855  C2F_CHAR( DIAG ), &Nb, &kb, talpha2, Mptr( A, 0, Anq,
856  Ald, size ), &Ald, Mptr( BR, 0, Anq, LDBR, size ),
857  &LDBR );
858  tadd( &Nb, &kb, one, Mptr( BR, 0, Anq, LDBR, size ), &LDBR,
859  zero, Mptr( BC, Anp, 0, LDBC, size ), &LDBC );
860  }
861  if( bcst )
862  {
863  if( mycol == Alcol )
864  bsend( ctxt, ROW, &btop, kb, Nb, Mptr( BC, Anp, 0, LDBC,
865  size ), LDBC );
866  else
867  brecv( ctxt, ROW, &btop, kb, Nb, Mptr( BC, Anp, 0, LDBC,
868  size ), LDBC, myrow, Alcol );
869  }
870  talpha2 = one;
871  }
872  else
873  {
874  if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) )
875  pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Nb, &kb, &izero,
876  zero, zero, Mptr( BR, 0, Anq, LDBR, size ), &LDBR );
877  }
878 /*
879 * Finish previous update
880 */
881  if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
882  {
883  if( ( tmp1 = Anpprev - n1pprev ) > 0 )
884  gemm( C2F_CHAR( TRAN ), C2F_CHAR( NOTRAN ), &Nb, &tmp1,
885  &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1,
886  Bprev, &LDBR );
887  talpha1 = one;
888  }
889 /*
890 * Save info of current step and update info for the next step
891 */
892  if( Ais1Row || ( myrow == Alrow ) ) { Bdprev = Bd; Aprev = A; }
893  if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; }
894 
895  n1pprev = n1p;
896  rocprev = Alrow;
897  kbprev = kb;
898  k += kb;
899  Na -= kb;
900 
901  nb1 -= kb;
902  if( nb1 == 0 )
903  {
904  if( !( Ais1Col ) && ( Alcol >= 0 ) )
905  Alcol = MModSub1( Alcol, npcol );
906  nb1 = ( Na > Ainb1 ? Anb : Ainb1 );
907  }
908 
909  mb1 -= kb;
910  ChangeRoc = ( mb1 == 0 );
911 
912  if( ChangeRoc )
913  {
914  if( !( Ais1Row ) && ( Alrow >= 0 ) )
915  Alrow = MModSub1( Alrow, nprow );
916  mb1 = ( Na > Aimb1 ? Amb : Aimb1 );
917  }
918  tmp1 = Na - ( kb = MIN( mb1, nb1 ) );
919  n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 );
920  tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 = MIN( tmp1, tmp2 ) );
921  n1p = PB_Cnumroc( tmp2, MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
922  npcol );
923  }
924  }
925  }
926  if( work ) free( work );
927 /*
928 * End of PB_Cptrsm
929 */
930 }
GEBR2D_T
void(* GEBR2D_T)()
Definition: pblas.h:281
TYPE
#define TYPE
Definition: clamov.c:7
ROW
#define ROW
Definition: PBblacs.h:46
GESD2D_T
void(* GESD2D_T)()
Definition: pblas.h:278
MB_
#define MB_
Definition: PBtools.h:43
RIGHT
#define RIGHT
Definition: PBblas.h:56
PB_Cptrsm
void PB_Cptrsm(PBTYP_T *TYPE, int FBCAST, char *SIDE, char *UPLO, char *TRANS, char *DIAG, int M, int N, char *ALPHA, char *A, int IA, int JA, int *DESCA, char *BC, int LDBC, char *BR, int LDBR)
Definition: PB_Cptrsm.c:25
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
TRSM_T
F_VOID_FCT(* TRSM_T)()
Definition: pblas.h:321
PB_Cfirstnb
int PB_Cfirstnb()
PB_Clastnb
int PB_Clastnb()
GEMM_T
F_VOID_FCT(* GEMM_T)()
Definition: pblas.h:313
TRAN
#define TRAN
Definition: PBblas.h:46
NOTRAN
#define NOTRAN
Definition: PBblas.h:44
LLD_
#define LLD_
Definition: PBtools.h:47
TZPAD_T
F_VOID_FCT(* TZPAD_T)()
Definition: pblas.h:288
IMB_
#define IMB_
Definition: PBtools.h:41
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
MMADD_T
F_VOID_FCT(* MMADD_T)()
Definition: pblas.h:284
GERV2D_T
void(* GERV2D_T)()
Definition: pblas.h:279
LEFT
#define LEFT
Definition: PBblas.h:55
PB_Cindxg2p
int PB_Cindxg2p()
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
PB_Ctop
char * PB_Ctop()
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
BCAST
#define BCAST
Definition: PBblacs.h:48
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
ALL
#define ALL
Definition: PBblas.h:50
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
INB_
#define INB_
Definition: PBtools.h:42
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
PB_Cspan
int PB_Cspan()
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
MAX
#define MAX(a_, b_)
Definition: PBtools.h:77
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CUPPER
#define CUPPER
Definition: PBblas.h:26
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CLEFT
#define CLEFT
Definition: PBblas.h:29
CTXT_
#define CTXT_
Definition: PBtools.h:38
GEBS2D_T
void(* GEBS2D_T)()
Definition: pblas.h:280