40{
41
42
43
44 char btop, * negone, * one, * talpha, * zero;
45 Int Acol, Acurcol, Acurrow, Aii, Aimb, Aimb1, Ainb, Ainb1, Ajj,
46 Ald, Almb1, Alnb1, Amb, Amp0, Anq0, An, Anb, Arow, Bcol, Bii,
47 Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp0, Bnb, Bnq0,
48 Brow, Cld, ctxt, k=1, kb, kblks, kbprev, ktmp, lside, mycol,
49 myrow, npcol, nprow, size, upper;
50 char * Aptr = NULL, * Aptr0 = NULL, * Bptr = NULL, * Bptr0 = NULL,
51 * Cptr = NULL;
57
58
59
60
63
64
65
67
68
69
71 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
72 &Arow, &Acol );
73
74
75
76 Bimb = DESCB[
IMB_]; Binb = DESCB[
INB_];
77 Bmb = DESCB[
MB_ ]; Bnb = DESCB[
NB_ ]; Bld = DESCB[
LLD_];
78 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
79 &Brow, &Bcol );
80
81
82
83 if( lside )
84 {
86 {
87 *CFREE = 0;
89 PB_Cdescset( DESCC, M, N, M, Binb1, Bmb, Bnb, Brow, Bcol, ctxt, Bld );
90 Bnq0 =
PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol );
91
92 if( ( Bnq0 > 0 ) &&
93 ( ( ( Brow >= 0 ) && ( myrow == Brow ) ) || ( Brow < 0 ) ) )
94 {
95 *C =
Mptr( B, Bii, Bjj, Bld, size );
98 Ald, size ), &Ald, *C, &Bld );
99 }
100 return;
101 }
102 }
103 else
104 {
106 {
107 *CFREE = 0;
109 PB_Cdescset( DESCC, M, N, Bimb1, N, Bmb, Bnb, Brow, Bcol, ctxt, Bld );
110 Bmp0 =
PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow );
111
112 if( ( Bmp0 > 0 ) &&
113 ( ( ( Bcol >= 0 ) && ( mycol == Bcol ) ) || ( Bcol < 0 ) ) )
114 {
115 *C =
Mptr( B, Bii, Bjj, Bld, size );
118 Ald, size ), &Ald, *C, &Bld );
119 }
120 return;
121 }
122 }
123
124
125
126 An = ( lside ? M : N );
128 talpha = ALPHA;
129 negone =
TYPE->negone; one =
TYPE->one; zero =
TYPE->zero;
130 brecv =
TYPE->Cgebr2d; bsend =
TYPE->Cgebs2d; mmadd =
TYPE->Fmmadd;
131 gemm =
TYPE->Fgemm; trsm =
TYPE->Ftrsm;
132
133
134
135 Aimb = DESCA[
IMB_]; Ainb = DESCA[
INB_];
136 Amb = DESCA[
MB_ ]; Anb = DESCA[
NB_ ];
139 Amp0 =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
142 Anq0 =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
143 if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 =
Mptr( A, Aii, Ajj, Ald, size );
144
146 Bmp0 =
PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow );
148 Bnq0 =
PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol );
149 if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 =
Mptr( B, Bii, Bjj, Bld, size );
150
151 if( lside )
152 {
153 Cld = M;
154 PB_Cdescset( DESCC, M, N, M, Binb1, Bmb, Bnb, -1, Bcol, ctxt, Cld );
155 if( Bnq0 > 0 ) { Cptr = *C =
PB_Cmalloc( M * Bnq0 * size ); *CFREE = 1; }
156 else { *C = NULL; *CFREE = 0; return; }
157
158 kblks = ( An > Aimb1 ? ( An - Aimb1 - 1 ) / Amb + 2 : 1 );
160
161 if( upper )
162 {
163 Acurrow =
PB_Cindxg2p( An-1, Aimb1, Amb, Arow, Arow, nprow );
164 kb = Almb1;
165 Bptr =
Mptr( Bptr0, Bmp0 - kb, 0, Bld, size );
166 Cptr =
Mptr( *C, An - kb, 0, Cld, size );
167
168
169
170
171 if( myrow == Acurrow )
172 {
174 C2F_CHAR( DIAG ), &kb, &Bnq0, ALPHA,
Mptr( Aptr0, Amp0-kb,
175 Anq0-kb, Ald, size ), &Ald, Bptr, &Bld );
176 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr, Bld );
177 mmadd( &kb, &Bnq0, one, Bptr, &Bld, zero, Cptr, &Cld );
178 Amp0 -= kb;
179 Bmp0 -= kb;
180 }
181 else
182 {
183 brecv( ctxt,
COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow, mycol );
184 }
185 Acurrow =
MModSub1( Acurrow, nprow );
186 An -= ( kbprev = kb );
187 Anq0 -= kb;
188 kblks -= 1;
189
190
191
192 while( kblks > 0 )
193 {
194 kb = ( kblks == 1 ? Aimb1 : Amb );
195
196 Aptr =
Mptr( Aptr0, 0, Anq0, Ald, size );
197 Bptr =
Mptr( Bptr0, Bmp0 - kb, 0, Bld, size );
198 Cptr =
Mptr( *C, An, 0, Cld, size );
199
200 if( myrow == Acurrow )
201 {
202
203
204
205
207 &kbprev, negone,
Mptr( Aptr, Amp0-kb, 0, Ald, size ),
208 &Ald, Cptr, &Cld, talpha, Bptr, &Bld );
209
210
211
214 -kb, Ald, size ), &Ald, Bptr, &Bld );
215
216
217
218 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr, Bld );
219 mmadd( &kb, &Bnq0, one, Bptr, &Bld, zero,
Mptr( Cptr, -kb, 0,
220 Cld, size ), &Cld );
221
222
223
224
225 if( ( ktmp = Amp0 - kb ) > 0 )
227 &kbprev, negone, Aptr, &Ald, Cptr, &Cld, talpha, Bptr0,
228 &Bld );
229 Amp0 -= kb;
230 Bmp0 -= kb;
231 }
232 else
233 {
234
235
236
237
238 if( Amp0 > 0 )
240 &kbprev, negone, Aptr, &Ald, Cptr, &Cld, talpha, Bptr0,
241 &Bld );
242
243
244
245 brecv( ctxt,
COLUMN, &btop, kb, Bnq0,
Mptr( Cptr, -kb, 0, Cld,
246 size ), Cld, Acurrow, mycol );
247 }
248
249 Acurrow =
MModSub1( Acurrow, nprow );
250 An -= ( kbprev = kb );
251 Anq0 -= kb;
252 talpha = one;
253 kblks -= 1;
254 }
255 }
256 else
257 {
258 Acurrow = Arow;
259 kb = Aimb1;
260
261
262
263
264 if( myrow == Acurrow )
265 {
267 C2F_CHAR( DIAG ), &kb, &Bnq0, ALPHA, Aptr0, &Ald, Bptr0,
268 &Bld );
269 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr0, Bld );
270 mmadd( &kb, &Bnq0, one, Bptr0, &Bld, zero, Cptr, &Cld );
271 Amp0 -= kb;
272 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
273 Bptr0 =
Mptr( Bptr0, kb, 0, Bld, size );
274 }
275 else
276 {
277 brecv( ctxt,
COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow, mycol );
278 }
279 Acurrow =
MModAdd1( Acurrow, nprow );
280 kbprev = kb;
281 Cptr =
Mptr( Cptr, kb, 0, Cld, size );
282 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
283 k += 1;
284
285
286
287 while( k <= kblks )
288 {
289 kb = ( k == kblks ? Almb1 : Amb );
290
291 if( myrow == Acurrow )
292 {
293
294
295
296
298 &kbprev, negone,
Mptr( Aptr0, 0, -kbprev, Ald, size ),
299 &Ald,
Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha,
300 Bptr0, &Bld );
301
302
303
305 C2F_CHAR( DIAG ), &kb, &Bnq0, one, Aptr0, &Ald, Bptr0,
306 &Bld );
307
308
309
310 bsend( ctxt,
COLUMN, &btop, kb, Bnq0, Bptr0, Bld );
311 mmadd( &kb, &Bnq0, one, Bptr0, &Bld, zero, Cptr, &Cld );
312
313
314
315
316 if( ( ktmp = Amp0 - kb ) > 0 )
318 &kbprev, negone,
Mptr( Aptr0, kb, -kbprev, Ald, size ),
319 &Ald,
Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha,
320 Mptr( Bptr0, kb, 0, Bld, size ), &Bld );
321 Amp0 -= kb;
322 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
323 Bptr0 =
Mptr( Bptr0, kb, 0, Bld, size );
324 }
325 else
326 {
327
328
329
330
331 if( Amp0 > 0 )
333 &kbprev, negone,
Mptr( Aptr0, 0, -kbprev, Ald, size ),
334 &Ald,
Mptr( Cptr, -kbprev, 0, Cld, size ), &Cld, talpha,
335 Bptr0, &Bld );
336
337
338
339 brecv( ctxt,
COLUMN, &btop, kb, Bnq0, Cptr, Cld, Acurrow,
340 mycol );
341 }
342
343 Acurrow =
MModAdd1( Acurrow, nprow );
344 kbprev = kb;
345 Cptr =
Mptr( Cptr, kb, 0, Cld, size );
346 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
347 talpha = one;
348 k += 1;
349 }
350 }
351 }
352 else
353 {
354 Cld =
MAX( 1, Bmp0 );
355 PB_Cdescset( DESCC, M, N, Bimb1, N, Bmb, Bnb, Brow, -1, ctxt, Cld );
356 if( Bmp0 > 0 ) { Cptr = *C =
PB_Cmalloc( Bmp0 * N * size ); *CFREE = 1; }
357 else { *C = NULL; *CFREE = 0; return; }
358
359 kblks = ( An > Ainb1 ? ( An - Ainb1 - 1 ) / Anb + 2 : 1 );
361
362 if( upper )
363 {
364 Acurcol = Acol;
365 kb = Ainb1;
366
367
368
369
370 if( mycol == Acurcol )
371 {
373 C2F_CHAR( DIAG ), &Bmp0, &kb, ALPHA, Aptr0, &Ald, Bptr0,
374 &Bld );
375 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr0, Bld );
376 mmadd( &Bmp0, &kb, one, Bptr0, &Bld, zero, Cptr, &Cld );
377 Anq0 -= kb;
378 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
379 Bptr0 =
Mptr( Bptr0, 0, kb, Bld, size );
380 }
381 else
382 {
383 brecv( ctxt,
ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol );
384 }
385 Acurcol =
MModAdd1( Acurcol, npcol );
386 kbprev = kb;
387 k += 1;
388 Cptr =
Mptr( Cptr, 0, kb, Cld, size );
389 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
390
391
392
393 while( k <= kblks )
394 {
395 kb = ( k == kblks ? Alnb1 : Anb );
396
397 if( mycol == Acurcol )
398 {
399
400
401
402
404 &kbprev, negone,
Mptr( Cptr, 0, -kbprev, Cld, size ), &Cld,
405 Mptr( Aptr0, -kbprev, 0, Ald, size ), &Ald, talpha, Bptr0,
406 &Bld );
407
408
409
411 C2F_CHAR( DIAG ), &Bmp0, &kb, one, Aptr0, &Ald, Bptr0,
412 &Bld );
413
414
415
416 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr0, Bld );
417 mmadd( &Bmp0, &kb, one, Bptr0, &Bld, zero, Cptr, &Cld );
418
419
420
421
422 if( ( ktmp = Anq0 - kb ) > 0 )
424 &kbprev, negone,
Mptr( Cptr, 0, -kbprev, Cld, size ),
425 &Cld,
Mptr( Aptr0, -kbprev, kb, Ald, size ), &Ald,
426 talpha,
Mptr( Bptr0, 0, kb, Bld, size ), &Bld );
427 Anq0 -= kb;
428 Aptr0 =
Mptr( Aptr0, 0, kb, Ald, size );
429 Bptr0 =
Mptr( Bptr0, 0, kb, Bld, size );
430 }
431 else
432 {
433
434
435
436
437 if( Anq0 > 0 )
439 &kbprev, negone,
Mptr( Cptr, 0, -kbprev, Cld, size ),
440 &Cld,
Mptr( Aptr0, -kbprev, 0, Ald, size ), &Ald,
441 talpha, Bptr0, &Bld );
442
443
444
445 brecv( ctxt,
ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol );
446 }
447
448 Acurcol =
MModAdd1( Acurcol, npcol );
449 kbprev = kb;
450 Cptr =
Mptr( Cptr, 0, kb, Cld, size );
451 Aptr0 =
Mptr( Aptr0, kb, 0, Ald, size );
452 talpha = one;
453 k += 1;
454 }
455 }
456 else
457 {
458 Acurcol =
PB_Cindxg2p( An-1, Ainb1, Anb, Acol, Acol, npcol );
459 kb = Alnb1;
460 Bptr =
Mptr( Bptr0, 0, Bnq0 - kb, Bld, size );
461 Cptr =
Mptr( *C, 0, An - kb, Cld, size );
462
463
464
465
466 if( mycol == Acurcol )
467 {
469 C2F_CHAR( DIAG ), &Bmp0, &kb, ALPHA,
Mptr( Aptr0, Amp0-kb,
470 Anq0-kb, Ald, size ), &Ald, Bptr, &Bld );
471 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr, Bld );
472 mmadd( &Bmp0, &kb, one, Bptr, &Bld, zero, Cptr, &Cld );
473 Anq0 -= kb;
474 Bnq0 -= kb;
475 }
476 else
477 {
478 brecv( ctxt,
ROW, &btop, Bmp0, kb, Cptr, Cld, myrow, Acurcol );
479 }
480 Acurcol =
MModSub1( Acurcol, npcol );
481 An -= ( kbprev = kb );
482 Amp0 -= kb;
483 kblks -= 1;
484
485
486
487 while( kblks > 0 )
488 {
489 kb = ( kblks == 1 ? Ainb1 : Anb );
490
491 Aptr =
Mptr( Aptr0, Amp0, 0, Ald, size );
492 Bptr =
Mptr( Bptr0, 0, Bnq0 - kb, Bld, size );
493 Cptr =
Mptr( *C, 0, An, Cld, size );
494
495 if( mycol == Acurcol )
496 {
497
498
499
500
502 &kbprev, negone, Cptr, &Cld,
Mptr( Aptr, 0, Anq0-kb, Ald,
503 size ), &Ald, talpha, Bptr, &Bld );
504
505
506
509 Anq0-kb, Ald, size ), &Ald, Bptr, &Bld );
510
511
512
513 bsend( ctxt,
ROW, &btop, Bmp0, kb, Bptr, Bld );
514 mmadd( &Bmp0, &kb, one, Bptr, &Bld, zero,
Mptr( Cptr, 0, -kb,
515 Cld, size ), &Cld );
516
517
518
519
520 if( ( ktmp = Anq0 - kb ) > 0 )
522 &kbprev, negone, Cptr, &Cld, Aptr, &Ald, talpha, Bptr0,
523 &Bld );
524 Anq0 -= kb;
525 Bnq0 -= kb;
526 }
527 else
528 {
529
530
531
532
533 if( Anq0 > 0 )
535 &kbprev, negone, Cptr, &Cld, Aptr, &Ald, talpha, Bptr0,
536 &Bld );
537
538
539
540 brecv( ctxt,
ROW, &btop, Bmp0, kb,
Mptr( Cptr, 0, -kb, Cld,
541 size ), Cld, myrow, Acurcol );
542 }
543
544 Acurcol =
MModSub1( Acurcol, npcol );
545 An -= ( kbprev = kb );
546 Amp0 -= kb;
547 talpha = one;
548 kblks -= 1;
549 }
550 }
551 }
552
553
554
555}