ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CptrsmAB1.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_CptrsmAB1( PBTYP_T * TYPE, char * SIDE, char * UPLO, char * TRANSA,
21  char * DIAG, int M, int N, char * ALPHA, char * A, int IA,
22  int JA, int * DESCA, char * B, int IB, int JB, int * DESCB,
23  char * C, int * DESCC )
24 #else
25 void PB_CptrsmAB1( TYPE, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, IA, JA,
26  DESCA, B, IB, JB, DESCB, C, DESCC )
27 /*
28 * .. Scalar Arguments ..
29 */
30  char * DIAG, * SIDE, * TRANSA, * UPLO;
31  int IA, IB, JA, JB, M, N;
32  char * ALPHA;
33  PBTYP_T * TYPE;
34 /*
35 * .. Array Arguments ..
36 */
37  int * DESCA, * DESCB, * DESCC;
38  char * A, * B, * C;
39 #endif
40 {
41 /*
42 * .. Local Scalars ..
43 */
44  char * negone, * one;
45  int Acol, Acurcol, Acurrow, Aii, Aimb, Aimb1, Ainb, Ainb1, Ajj,
46  Ald, Almb1, Alnb1, Amb, Amp0, An, Anb, Anq0, Anxtrow, Anxtcol,
47  Arow, Bcol, Bii, Bimb, Binb, Bjj, Bld, Bmb, Bmp0, Bnb, Bnq0,
48  Brow, Cld, ctxt, k=1, kb, kblks, lside, mycol, myrow, npcol,
49  nprow, size, upper;
50  MMADD_T mmadd;
51  GERV2D_T recv;
52  GESD2D_T send;
53  GEMM_T gemm;
54  TRSM_T trsm;
55 /*
56 * .. Local Arrays ..
57 */
58  char * Aptr = NULL, * Aptr0 = NULL, * Bptr = NULL, * Bptr0 = NULL,
59  * Cptr = NULL;
60 /* ..
61 * .. Executable Statements ..
62 *
63 */
64  size = TYPE->size;
65  lside = ( Mupcase( SIDE[0] ) == CLEFT );
66 /*
67 * Retrieve process grid information
68 */
69  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
70 /*
71 * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol, Ald
72 */
73  Ald = DESCA[LLD_];
74  PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
75  &Arow, &Acol );
76 /*
77 * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol, Bld ...
78 */
79  Bimb = DESCB[IMB_]; Binb = DESCB[INB_];
80  Bmb = DESCB[MB_ ]; Bnb = DESCB[NB_ ]; Bld = DESCB[LLD_];
81  PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
82  &Brow, &Bcol );
83 /*
84 * Shorcuts when sub( B ) spans only one process row or column
85 */
86  if( lside )
87  {
88  Bnq0 = PB_Cnumroc( N, JB, Binb, Bnb, mycol, DESCB[CSRC_], npcol );
89  if( Bnq0 <= 0 ) return;
90 
91  Bmp0 = PB_Cnumroc( M, IB, Bimb, Bmb, myrow, DESCB[RSRC_], nprow );
92 
93  if( !( PB_Cspan( M, IB, Bimb, Bmb, DESCB[RSRC_], nprow ) ) )
94  {
95  if( Bmp0 > 0 )
96  {
97  Bptr0 = Mptr( B, Bii, Bjj, Bld, size );
98  TYPE->Fmmadd( &M, &Bnq0, TYPE->negone, C, &DESCC[LLD_], ALPHA,
99  Bptr0, &Bld );
100  TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ),
101  C2F_CHAR( DIAG ), &M, &Bnq0, TYPE->one, Mptr( A, Aii,
102  Ajj, Ald, size ), &Ald, Bptr0, &Bld );
103  }
104  return;
105  }
106  if( Bmp0 > 0 ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size );
107  }
108  else
109  {
110  Bmp0 = PB_Cnumroc( M, IB, Bimb, Bmb, myrow, DESCB[RSRC_], nprow );
111  if( Bmp0 <= 0 ) return;
112 
113  Bnq0 = PB_Cnumroc( N, JB, Binb, Bnb, mycol, DESCB[CSRC_], npcol );
114 
115  if( !( PB_Cspan( N, JB, Binb, Bnb, DESCB[CSRC_], npcol ) ) )
116  {
117  if( Bnq0 > 0 )
118  {
119  Bptr0 = Mptr( B, Bii, Bjj, Bld, size );
120  TYPE->Fmmadd( &Bmp0, &N, TYPE->negone, C, &DESCC[LLD_], ALPHA,
121  Bptr0, &Bld );
122  TYPE->Ftrsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ),
123  C2F_CHAR( DIAG ), &Bmp0, &N, TYPE->one, Mptr( A, Aii,
124  Ajj, Ald, size ), &Ald, Bptr0, &Bld );
125  }
126  return;
127  }
128  if( Bnq0 > 0 ) Bptr0 = Mptr( B, Bii, Bjj, Bld, size );
129  }
130 /*
131 * Handle the general case now
132 */
133  An = ( lside ? M : N );
134  upper = ( Mupcase( UPLO[0] ) == CUPPER );
135  negone = TYPE->negone; one = TYPE->one;
136  recv = TYPE->Cgerv2d; send = TYPE->Cgesd2d;
137  mmadd = TYPE->Fmmadd; gemm = TYPE->Fgemm; trsm = TYPE->Ftrsm;
138 /*
139 * Compute more local information for sub( A )
140 */
141  Aimb = DESCA[IMB_]; Ainb = DESCA[INB_];
142  Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ];
143  Aimb1 = PB_Cfirstnb( An, IA, Aimb, Amb );
144  Almb1 = PB_Clastnb ( An, IA, Aimb, Amb );
145  Amp0 = PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
146  Ainb1 = PB_Cfirstnb( An, JA, Ainb, Anb );
147  Alnb1 = PB_Clastnb ( An, JA, Ainb, Anb );
148  Anq0 = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
149  if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 = Mptr( A, Aii, Ajj, Ald, size );
150 
151  Cld = DESCC[LLD_];
152 
153  if( lside )
154  {
155  kblks = ( An > Aimb1 ? ( An - Aimb1 - 1 ) / Amb + 2 : 1 );
156 
157  if( upper )
158  {
159  Acurrow = Arow;
160  Anxtrow = MModAdd1( Acurrow, nprow );
161  Aptr = Aptr0;
162  Bptr = Bptr0;
163  Cptr = C;
164 
165  while( k <= kblks )
166  {
167  kb = ( k == 1 ? Aimb1 : ( k == kblks ? Almb1 : Amb ) );
168  An -= kb;
169 
170  if( myrow == Acurrow )
171  {
172 /*
173 * Add contribution of previous blocks of rows of sub( B ) to part of the
174 * current block of rows of sub( B )
175 */
176  mmadd( &kb, &Bnq0, negone, Cptr, &Cld, ALPHA, Bptr, &Bld );
177 /*
178 * Solve updated and current part of block of rows of sub( B )
179 */
180  trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ),
181  C2F_CHAR( DIAG ), &kb, &Bnq0, one, Aptr, &Ald, Bptr,
182  &Bld );
183 /*
184 * Add contribution of part of the current block of rows of sub( B ) to the
185 * remaining of the contribution of previous blocks of rows of sub( B ). Send
186 * this remaining part to next process row.
187 */
188  if( An > 0 )
189  {
190  gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &An, &Bnq0, &kb,
191  one, Mptr( Aptr, 0, kb, Ald, size ), &Ald, Bptr, &Bld,
192  one, Mptr( Cptr, kb, 0, Cld, size ), &Cld );
193  send( ctxt, An, Bnq0, Mptr( Cptr, kb, 0, Cld, size ), Cld,
194  Anxtrow, mycol );
195  }
196  Aptr = Mptr( Aptr, kb, 0, Ald, size );
197  Bptr = Mptr( Bptr, kb, 0, Bld, size );
198  Cptr = C;
199  }
200  else if( myrow == Anxtrow )
201  {
202 /*
203 * Receive contribution of previous blocks of rows of sub( B ) to be added to
204 * next block of rows of sub( B )
205 */
206  if( An > 0 ) recv( ctxt, An, Bnq0, Cptr, Cld, Acurrow, mycol );
207  }
208 
209  Aptr = Mptr( Aptr, 0, kb, Ald, size );
210  Acurrow = Anxtrow;
211  Anxtrow = MModAdd1( Acurrow, nprow );
212  k += 1;
213  }
214  }
215  else
216  {
217  k = kblks;
218  Acurrow = PB_Cindxg2p( An-1, Aimb1, Amb, Arow, Arow, nprow );
219  Anxtrow = MModSub1( Acurrow, nprow );
220 
221  while( k > 0 )
222  {
223  kb = ( k == 1 ? Aimb1 : ( k == kblks ? Almb1 : Amb ) );
224  An -= kb;
225 
226  if( myrow == Acurrow )
227  {
228  Aptr = Mptr( Aptr0, Amp0 - kb, 0, Ald, size );
229  Bptr = Mptr( Bptr0, Bmp0 - kb, 0, Bld, size );
230  Cptr = Mptr( C, An, 0, Cld, size );
231 /*
232 * Add contribution of previous blocks of rows of sub( B ) to part of the
233 * current block of rows of sub( B )
234 */
235  mmadd( &kb, &Bnq0, negone, Cptr, &Cld, ALPHA, Bptr, &Bld );
236 /*
237 * Solve updated and current part of block of rows of sub( B )
238 */
239  trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ),
240  C2F_CHAR( DIAG ), &kb, &Bnq0, one, Mptr( Aptr, 0, Anq0-kb,
241  Ald, size ), &Ald, Bptr, &Bld );
242 /*
243 * Add contribution of part of the current block of rows of sub( B ) to the
244 * remaining of the contribution of previous blocks of rows of sub( B ). Send
245 * this remaining part to next process row.
246 */
247  if( An > 0 )
248  {
249  gemm( C2F_CHAR( TRANSA ), C2F_CHAR( NOTRAN ), &An, &Bnq0, &kb,
250  one, Aptr, &Ald, Bptr, &Bld, one, C, &Cld );
251  send( ctxt, An, Bnq0, C, Cld, Anxtrow, mycol );
252  }
253  Amp0 -= kb;
254  Bmp0 -= kb;
255  }
256  else if( myrow == Anxtrow )
257  {
258 /*
259 * Receive contribution of previous blocks of rows of sub( B ) to be added to
260 * next block of rows of sub( B )
261 */
262  if( An > 0 ) recv( ctxt, An, Bnq0, C, Cld, Acurrow, mycol );
263  }
264 
265  Anq0 -= kb;
266  Acurrow = Anxtrow;
267  Anxtrow = MModSub1( Acurrow, nprow );
268  k -= 1;
269  }
270  }
271  }
272  else
273  {
274  kblks = ( An > Ainb1 ? ( An - Ainb1 - 1 ) / Anb + 2 : 1 );
275 
276  if( upper )
277  {
278  k = kblks;
279  Acurcol = PB_Cindxg2p( An-1, Ainb1, Anb, Acol, Acol, npcol );
280  Anxtcol = MModSub1( Acurcol, npcol );
281 
282  while( k > 0 )
283  {
284  kb = ( k == 1 ? Ainb1 : ( k == kblks ? Alnb1 : Anb ) );
285  An -= kb;
286 
287  if( mycol == Acurcol )
288  {
289  Aptr = Mptr( Aptr0, 0, Anq0 - kb, Ald, size );
290  Bptr = Mptr( Bptr0, 0, Bnq0 - kb, Bld, size );
291  Cptr = Mptr( C, 0, An, Cld, size );
292 /*
293 * Add contribution of previous blocks of columns of sub( B ) to part of the
294 * current block of columns of sub( B )
295 */
296  mmadd( &Bmp0, &kb, negone, Cptr, &Cld, ALPHA, Bptr, &Bld );
297 /*
298 * Solve updated and current part of block of columns of sub( B )
299 */
300  trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ),
301  C2F_CHAR( DIAG ), &Bmp0, &kb, one, Mptr( Aptr, Amp0-kb, 0,
302  Ald, size ), &Ald, Bptr, &Bld );
303 /*
304 * Add contribution of part of the current block of columns of sub( B ) to the
305 * remaining of the contribution of previous blocks of columns of sub( B ).
306 * Send this remaining part to next process column.
307 */
308  if( An > 0 )
309  {
310  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &An, &kb,
311  one, Bptr, &Bld, Aptr, &Ald, one, C, &Cld );
312  send( ctxt, Bmp0, An, C, Cld, myrow, Anxtcol );
313  }
314  Anq0 -= kb;
315  Bnq0 -= kb;
316  }
317  else if( mycol == Anxtcol )
318  {
319 /*
320 * Receive contribution of previous blocks of columns of sub( B ) to be added
321 * to next block of columns of sub( B )
322 */
323  if( An > 0 ) recv( ctxt, Bmp0, An, C, Cld, myrow, Acurcol );
324  }
325 
326  Amp0 -= kb;
327  Acurcol = Anxtcol;
328  Anxtcol = MModSub1( Acurcol, npcol );
329  k -= 1;
330  }
331  }
332  else
333  {
334  Acurcol = Acol;
335  Anxtcol = MModAdd1( Acurcol, npcol );
336  Aptr = Aptr0;
337  Bptr = Bptr0;
338  Cptr = C;
339 
340  while( k <= kblks )
341  {
342  kb = ( k == 1 ? Ainb1 : ( k == kblks ? Alnb1 : Anb ) );
343  An -= kb;
344 
345  if( mycol == Acurcol )
346  {
347 /*
348 * Add contribution of previous blocks of columns of sub( B ) to part of the
349 * current block of columns of sub( B )
350 */
351  mmadd( &Bmp0, &kb, negone, Cptr, &Cld, ALPHA, Bptr, &Bld );
352 /*
353 * Solve updated and current part of block of columns of sub( B )
354 */
355  trsm( C2F_CHAR( SIDE ), C2F_CHAR( UPLO ), C2F_CHAR( TRANSA ),
356  C2F_CHAR( DIAG ), &Bmp0, &kb, one, Aptr, &Ald, Bptr,
357  &Bld );
358 /*
359 * Add contribution of part of the current block of columns of sub( B ) to the
360 * remaining of the contribution of previous blocks of columns of sub( B ).
361 * Send this remaining part to next process column.
362 */
363  if( An > 0 )
364  {
365  gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( TRANSA ), &Bmp0, &An, &kb,
366  one, Bptr, &Bld, Mptr( Aptr, kb, 0, Ald, size ), &Ald,
367  one, Mptr( Cptr, 0, kb, Cld, size ), &Cld );
368  send( ctxt, Bmp0, An, Mptr( Cptr, 0, kb, Cld, size ), Cld,
369  myrow, Anxtcol );
370  }
371  Aptr = Mptr( Aptr, 0, kb, Ald, size );
372  Bptr = Mptr( Bptr, 0, kb, Bld, size );
373  Cptr = C;
374  }
375  else if( mycol == Anxtcol )
376  {
377 /*
378 * Receive contribution of previous blocks of columns of sub( B ) to be added
379 * to next block of columns of sub( B ).
380 */
381  if( An > 0 ) recv( ctxt, Bmp0, An, Cptr, Cld, myrow, Acurcol );
382  }
383 
384  Aptr = Mptr( Aptr, kb, 0, Ald, size );
385  Acurcol = Anxtcol;
386  Anxtcol = MModAdd1( Acurcol, npcol );
387  k += 1;
388  }
389  }
390  }
391 /*
392 * End of PB_CptrsmAB1
393 */
394 }
TYPE
#define TYPE
Definition: clamov.c:7
GESD2D_T
void(* GESD2D_T)()
Definition: pblas.h:278
MB_
#define MB_
Definition: PBtools.h:43
NB_
#define NB_
Definition: PBtools.h:44
CSRC_
#define CSRC_
Definition: PBtools.h:46
TRSM_T
F_VOID_FCT(* TRSM_T)()
Definition: pblas.h:321
PB_Cfirstnb
int PB_Cfirstnb()
PB_Clastnb
int PB_Clastnb()
GEMM_T
F_VOID_FCT(* GEMM_T)()
Definition: pblas.h:313
NOTRAN
#define NOTRAN
Definition: PBblas.h:44
LLD_
#define LLD_
Definition: PBtools.h:47
PB_CptrsmAB1
void PB_CptrsmAB1(PBTYP_T *TYPE, char *SIDE, char *UPLO, char *TRANSA, char *DIAG, int M, int N, char *ALPHA, char *A, int IA, int JA, int *DESCA, char *B, int IB, int JB, int *DESCB, char *C, int *DESCC)
Definition: PB_CptrsmAB1.c:25
IMB_
#define IMB_
Definition: PBtools.h:41
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
MMADD_T
F_VOID_FCT(* MMADD_T)()
Definition: pblas.h:284
GERV2D_T
void(* GERV2D_T)()
Definition: pblas.h:279
PB_Cindxg2p
int PB_Cindxg2p()
RSRC_
#define RSRC_
Definition: PBtools.h:45
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
INB_
#define INB_
Definition: PBtools.h:42
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
PB_Cspan
int PB_Cspan()
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CUPPER
#define CUPPER
Definition: PBblas.h:26
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CLEFT
#define CLEFT
Definition: PBblas.h:29
CTXT_
#define CTXT_
Definition: PBtools.h:38