14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 int M,
int N,
char * ALPHA,
char * A,
int IA,
int JA,
22 int * DESCA,
char * B,
int IB,
int JB,
int * DESCB,
23 char * * C,
int * DESCC,
int * CFREE )
25 void PB_CptrsmAB0(
TYPE, SIDE, UPLO, DIAG, M, N, ALPHA, A, IA, JA, DESCA,
26 B, IB, JB, DESCB, C, DESCC, CFREE )
30 char * DIAG, * SIDE, * UPLO;
31 int * CFREE, IA, IB, JA, JB, M, N;
37 int * DESCA, * DESCB, * DESCC;
44 char btop, * negone, * one, * talpha, * zero;
45 int Acol, Acurcol, Acurrow, Aii, Aimb, Aimb1, Ainb, Ainb1, Ajj,
46 Ald, Almb1, Alnb1, Amb, Amp0, Anq0, An, Anb, Arow, Bcol, Bii,
47 Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp0, Bnb, Bnq0,
48 Brow, Cld, ctxt, k=1, kb, kblks, kbprev, ktmp, lside, mycol,
49 myrow, npcol, nprow, size, upper;
50 char * Aptr = NULL, * Aptr0 = NULL, * Bptr = NULL, * Bptr0 = NULL,
71 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
76 Bimb = DESCB[
IMB_]; Binb = DESCB[
INB_];
77 Bmb = DESCB[
MB_ ]; Bnb = DESCB[
NB_ ]; Bld = DESCB[
LLD_];
78 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
89 PB_Cdescset( DESCC, M, N, M, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld );
90 Bnq0 =
PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol );
93 ( ( ( Brow >= 0 ) && ( myrow == Brow ) ) || ( Brow < 0 ) ) )
95 *C =
Mptr( B, Bii, Bjj, Bld, size );
98 Ald, size ), &Ald, *C, &Bld );
109 PB_Cdescset( DESCC, M, N, Bimb1, N, Bmb, Bnb, Brow, Bcol, ctxt, Bld );
110 Bmp0 =
PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow );
113 ( ( ( Bcol >= 0 ) && ( mycol == Bcol ) ) || ( Bcol < 0 ) ) )
115 *C =
Mptr( B, Bii, Bjj, Bld, size );
118 Ald, size ), &Ald, *C, &Bld );
126 An = ( lside ? M : N );
129 negone =
TYPE->negone; one =
TYPE->one; zero =
TYPE->zero;
130 brecv =
TYPE->Cgebr2d; bsend =
TYPE->Cgebs2d; mmadd =
TYPE->Fmmadd;
131 gemm =
TYPE->Fgemm; trsm =
TYPE->Ftrsm;
135 Aimb = DESCA[
IMB_]; Ainb = DESCA[
INB_];
136 Amb = DESCA[
MB_ ]; Anb = DESCA[
NB_ ];
139 Amp0 =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
142 Anq0 =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
143 if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 =
Mptr( A, Aii, Ajj, Ald, size );
146 Bmp0 =
PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow );
148 Bnq0 =
PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol );
149 if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 =
Mptr( B, Bii, Bjj, Bld, size );
154 PB_Cdescset( DESCC, M, N, M, Binb1, Bmb, Bnb, -1, Bcol, ctxt, Cld );
155 if( Bnq0 > 0 ) { Cptr = *C =
PB_Cmalloc( M * Bnq0 * size ); *CFREE = 1; }
156 else { *C = NULL; *CFREE = 0;
return; }
158 kblks = ( An > Aimb1 ? ( An - Aimb1 - 1 ) / Amb + 2 : 1 );
163 Acurrow =
PB_Cindxg2p( An-1, Aimb1, Amb, Arow, Arow, nprow );
165 Bptr =
Mptr( Bptr0, Bmp0 - kb, 0, Bld, size );
166 Cptr =
Mptr( *C, An - kb, 0, Cld, size );
171 if( myrow == Acurrow )
174 C2F_CHAR( DIAG ), &kb, &Bnq0, ALPHA,
Mptr( Aptr0, Amp0-kb,
175 Anq0-kb, Ald, size ), &Ald, Bptr, &Bld );
176 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr, Bld );
177 mmadd( &kb, &Bnq0, one, Bptr, &Bld, zero, Cptr, &Cld );
183 brecv( ctxt,
COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow, mycol );
185 Acurrow =
MModSub1( Acurrow, nprow );
186 An -= ( kbprev = kb );
194 kb = ( kblks == 1 ? Aimb1 : Amb );
196 Aptr =
Mptr( Aptr0, 0, Anq0, Ald, size );
197 Bptr =
Mptr( Bptr0, Bmp0 - kb, 0, Bld, size );
198 Cptr =
Mptr( *C, An, 0, Cld, size );
200 if( myrow == Acurrow )
207 &kbprev, negone,
Mptr( Aptr, Amp0-kb, 0, Ald, size ),
208 &Ald, Cptr, &Cld, talpha, Bptr, &Bld );
214 -kb, Ald, size ), &Ald, Bptr, &Bld );
218 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr, Bld );
219 mmadd( &kb, &Bnq0, one, Bptr, &Bld, zero,
Mptr( Cptr, -kb, 0,
225 if( ( ktmp = Amp0 - kb ) > 0 )
227 &kbprev, negone, Aptr, &Ald, Cptr, &Cld, talpha, Bptr0,
240 &kbprev, negone, Aptr, &Ald, Cptr, &Cld, talpha, Bptr0,
245 brecv( ctxt,
COLUMN, &btop, kb, Bnq0,
Mptr( Cptr, -kb, 0, Cld,
246 size ), Cld, Acurrow, mycol );
249 Acurrow =
MModSub1( Acurrow, nprow );
250 An -= ( kbprev = kb );
264 if( myrow == Acurrow )
267 C2F_CHAR( DIAG ), &kb, &Bnq0, ALPHA, Aptr0, &Ald, Bptr0,
269 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr0, Bld );
270 mmadd( &kb, &Bnq0, one, Bptr0, &Bld, zero, Cptr, &Cld );
272 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
273 Bptr0 =
Mptr( Bptr0, kb, 0, Bld, size );
277 brecv( ctxt,
COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow, mycol );
279 Acurrow =
MModAdd1( Acurrow, nprow );
281 Cptr =
Mptr( Cptr, kb, 0, Cld, size );
282 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
289 kb = ( k == kblks ? Almb1 : Amb );
291 if( myrow == Acurrow )
298 &kbprev, negone,
Mptr( Aptr0, 0, -kbprev, Ald, size ),
299 &Ald,
Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha,
305 C2F_CHAR( DIAG ), &kb, &Bnq0, one, Aptr0, &Ald, Bptr0,
310 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr0, Bld );
311 mmadd( &kb, &Bnq0, one, Bptr0, &Bld, zero, Cptr, &Cld );
316 if( ( ktmp = Amp0 - kb ) > 0 )
318 &kbprev, negone,
Mptr( Aptr0, kb, -kbprev, Ald, size ),
319 &Ald,
Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha,
320 Mptr( Bptr0, kb, 0, Bld, size ), &Bld );
322 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
323 Bptr0 =
Mptr( Bptr0, kb, 0, Bld, size );
333 &kbprev, negone,
Mptr( Aptr0, 0, -kbprev, Ald, size ),
334 &Ald,
Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha,
339 brecv( ctxt,
COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow,
343 Acurrow =
MModAdd1( Acurrow, nprow );
345 Cptr =
Mptr( Cptr, kb, 0, Cld, size );
346 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
354 Cld =
MAX( 1, Bmp0 );
355 PB_Cdescset( DESCC, M, N, Bimb1, N, Bmb, Bnb, Brow, -1, ctxt, Cld );
356 if( Bmp0 > 0 ) { Cptr = *C =
PB_Cmalloc( Bmp0 * N * size ); *CFREE = 1; }
357 else { *C = NULL; *CFREE = 0;
return; }
359 kblks = ( An > Ainb1 ? ( An - Ainb1 - 1 ) / Anb + 2 : 1 );
370 if( mycol == Acurcol )
373 C2F_CHAR( DIAG ), &Bmp0, &kb, ALPHA, Aptr0, &Ald, Bptr0,
375 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr0, Bld );
376 mmadd( &Bmp0, &kb, one, Bptr0, &Bld, zero, Cptr, &Cld );
378 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
379 Bptr0 =
Mptr( Bptr0, 0, kb, Bld, size );
383 brecv( ctxt,
ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol );
385 Acurcol =
MModAdd1( Acurcol, npcol );
388 Cptr =
Mptr( Cptr, 0, kb, Cld, size );
389 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
395 kb = ( k == kblks ? Alnb1 : Anb );
397 if( mycol == Acurcol )
404 &kbprev, negone,
Mptr( Cptr, 0, -kbprev, Cld, size ), &Cld,
405 Mptr( Aptr0, -kbprev, 0, Ald, size ), &Ald, talpha, Bptr0,
411 C2F_CHAR( DIAG ), &Bmp0, &kb, one, Aptr0, &Ald, Bptr0,
416 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr0, Bld );
417 mmadd( &Bmp0, &kb, one, Bptr0, &Bld, zero, Cptr, &Cld );
422 if( ( ktmp = Anq0 - kb ) > 0 )
424 &kbprev, negone,
Mptr( Cptr, 0, -kbprev, Cld, size ),
425 &Cld,
Mptr( Aptr0, -kbprev, kb, Ald, size ), &Ald,
426 talpha,
Mptr( Bptr0, 0, kb, Bld, size ), &Bld );
428 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
429 Bptr0 =
Mptr( Bptr0, 0, kb, Bld, size );
439 &kbprev, negone,
Mptr( Cptr, 0, -kbprev, Cld, size ),
440 &Cld,
Mptr( Aptr0, -kbprev, 0, Ald, size ), &Ald,
441 talpha, Bptr0, &Bld );
445 brecv( ctxt,
ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol );
448 Acurcol =
MModAdd1( Acurcol, npcol );
450 Cptr =
Mptr( Cptr, 0, kb, Cld, size );
451 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
458 Acurcol =
PB_Cindxg2p( An-1, Ainb1, Anb, Acol, Acol, npcol );
460 Bptr =
Mptr( Bptr0, 0, Bnq0 - kb, Bld, size );
461 Cptr =
Mptr( *C, 0, An - kb, Cld, size );
466 if( mycol == Acurcol )
469 C2F_CHAR( DIAG ), &Bmp0, &kb, ALPHA,
Mptr( Aptr0, Amp0-kb,
470 Anq0-kb, Ald, size ), &Ald, Bptr, &Bld );
471 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr, Bld );
472 mmadd( &Bmp0, &kb, one, Bptr, &Bld, zero, Cptr, &Cld );
478 brecv( ctxt,
ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol );
480 Acurcol =
MModSub1( Acurcol, npcol );
481 An -= ( kbprev = kb );
489 kb = ( kblks == 1 ? Ainb1 : Anb );
491 Aptr =
Mptr( Aptr0, Amp0, 0, Ald, size );
492 Bptr =
Mptr( Bptr0, 0, Bnq0 - kb, Bld, size );
493 Cptr =
Mptr( *C, 0, An, Cld, size );
495 if( mycol == Acurcol )
502 &kbprev, negone, Cptr, &Cld,
Mptr( Aptr, 0, Anq0-kb, Ald,
503 size ), &Ald, talpha, Bptr, &Bld );
509 Anq0-kb, Ald, size ), &Ald, Bptr, &Bld );
513 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr, Bld );
514 mmadd( &Bmp0, &kb, one, Bptr, &Bld, zero,
Mptr( Cptr, 0, -kb,
520 if( ( ktmp = Anq0 - kb ) > 0 )
522 &kbprev, negone, Cptr, &Cld, Aptr, &Ald, talpha, Bptr0,
535 &kbprev, negone, Cptr, &Cld, Aptr, &Ald, talpha, Bptr0,
540 brecv( ctxt,
ROW, &btop, Bmp0, kb,
Mptr( Cptr, 0, -kb, Cld,
541 size ), Cld, myrow, Acurcol );
544 Acurcol =
MModSub1( Acurcol, npcol );
545 An -= ( kbprev = kb );