40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257 char Broc, TranOp, conjg, * negone, * one, * talpha, * talph0, top,
258 * zero;
259 Int Acol, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Alcmb, Ald, Amb, An,
260 Anb, Anp, Anp0, Anq, Anq0, Arow, Asrc, Astart, BcurrocR, Bfwd,
261 BiiD, BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR,
262 BnD, BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR,
263 BsrcR, LNorRT, WBCfr, WBCld, WBCapbX, WBCsum, WBRfr, WBRld,
264 WBRapbX, WBRsum, ctxt, izero=0, k, kb, kbnext, kbprev, ktmp,
265 lside, mycol, myrow, n, nb, nbb, notran, npcol, nprow, p=0,
266 size, tmp, upper;
270
271
272
274 char * Aptr = NULL, * Bptr = NULL, * WBC = NULL, * WBR = NULL;
275
276
277
278
279
280
281
283
288 LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
289
291 one =
TYPE->one; zero =
TYPE->zero; negone =
TYPE->negone;
292 pad =
TYPE->Ftzpad; gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
294
295
296
297 if( lside )
298 {
299 BnD = An = M; BnR = N; Broc =
CCOLUMN;
300 BmyprocD = myrow; BnprocsD = nprow;
301 BmyprocR = mycol; BnprocsR = npcol;
302 BnbD = DESCB[
MB_ ]; BnbR = DESCB[
NB_ ];
304 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
305 &BiiD, &BiiR, &BrocD, &BrocR );
308 }
309 else
310 {
311 BnD = An = N; BnR = M; Broc =
CROW;
312 BmyprocD = mycol; BnprocsD = npcol;
313 BmyprocR = myrow; BnprocsR = nprow;
314 BnbR = DESCB[
MB_ ]; BnbD = DESCB[
NB_ ];
316 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
317 &BiiR, &BiiD, &BrocR, &BrocD );
320 }
321
322
323
324 PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
325 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
326
327
328
330 {
333 }
334 else { conjg =
CNOCONJG; talpha = ALPHA; }
335
336
337
338 if( LNorRT )
339 {
341 Astart = ( upper ? An - 1 : 0 );
342 }
343 else
344 {
346 Astart = ( upper ? 0 : An - 1 );
347 }
348
349
350
351
352 Alcmb = 2 * nb *
PB_Clcm( ( Arow >= 0 ? nprow : 1 ),
353 ( Acol >= 0 ? npcol : 1 ) );
354
355
356
357
358 if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !( Bfwd ) )
359 {
360 tmp =
PB_Cindxg2p( BnR - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
361 p =
MModSub( tmp, BrocR, BnprocsR );
362 }
363
364
365
366
367 n = BnR;
368
369 while( n > 0 )
370 {
371
372
373
374
375 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, BnprocsR ) );
376 BnpR =
PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
377
378 n -= BnpR;
379
380
381
382
383 if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 );
384
385 while( BnpR )
386 {
387 nbb =
MIN( nbb, BnpR );
388
389
390
391 if( lside )
392 {
393 PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD,
394 BcurrocR, ctxt, Bld );
395 if( BisR || ( BmyprocR == BcurrocR ) )
396 Bptr =
Mptr( B, BiiD, BiiR, Bld, size );
397 }
398 else
399 {
400 PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR,
401 BrocD, ctxt, Bld );
402 if( BisR || ( BmyprocR == BcurrocR ) )
403 Bptr =
Mptr( B, BiiR, BiiD, Bld, size );
404 }
405
406 talph0 = talpha;
407
408 if( LNorRT )
409 {
410
411
412
413
415 0, 0, DBUFB, &Broc, &WBC, WBCd, &WBCfr, &WBCsum,
416 &WBCapbX );
417
418
419
420 PB_COutV(
TYPE,
ROW,
INIT, An, An, Ad0, nbb, &WBR, WBRd, &WBRfr,
421 &WBRsum );
422
423
424
425 Aimb1 = Ad0[
IMB_ ]; Ainb1 = Ad0[
INB_ ];
426 Amb = Ad0[
MB_ ]; Anb = Ad0[
NB_ ];
428
429 Anp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
430 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
431 if( ( Anp > 0 ) && ( Anq > 0 ) )
432 Aptr =
Mptr( A, Aii, Ajj, Ald, size );
433
434 WBCld = WBCd[
LLD_]; WBRld = WBRd[
LLD_];
435
436 if( upper )
437 {
438
439
440
441 for( k = ( Astart / Alcmb ) * Alcmb; k >= 0; k -= Alcmb )
442 {
443 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
444
445
446
447
448 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
449 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
451 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
452 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
453 WBRld );
454
455
456
457
458
459 if( Akp > 0 )
460 {
461 Anq0 =
PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
462 if( WBCsum )
463 {
464 kbprev =
MIN( k, Alcmb );
465 ktmp =
PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb,
466 myrow, Arow, nprow );
467 Akp -= ktmp;
468
469 if( ktmp > 0 )
470 {
471 if( Anq0 > 0 )
473 &nbb, &Anq0, negone,
Mptr( Aptr, Akp, Akq,
474 Ald, size ), &Ald,
Mptr( WBR, 0, Akq, WBRld,
475 size ), &WBRld, talph0,
Mptr( WBC, Akp, 0,
476 WBCld, size ), &WBCld );
478 npcol );
479 gsum2d( ctxt,
ROW, &top, ktmp, nbb,
Mptr( WBC, Akp,
480 0, WBCld, size ), WBCld, myrow, Asrc );
481 if( mycol != Asrc )
483 &nbb, &izero, zero, zero,
Mptr( WBC, Akp, 0,
484 WBCld, size ), &WBCld );
485 }
486 if( ( Akp > 0 ) && ( Anq0 > 0 ) )
488 &nbb, &Anq0, negone,
Mptr( Aptr, 0, Akq, Ald,
489 size ), &Ald,
Mptr( WBR, 0, Akq, WBRld, size ),
490 &WBRld, talph0, WBC, &WBCld );
491 }
492 else
493 {
494 if( Anq0 > 0 )
496 &nbb, &Anq0, negone,
Mptr( Aptr, 0, Akq, Ald,
497 size ), &Ald,
Mptr( WBR, 0, Akq, WBRld, size ),
498 &WBRld, talph0, WBC, &WBCld );
499 }
500 }
501 talph0 = one;
502 }
503 }
504 else
505 {
506
507
508
509 for( k = 0; k < An; k += Alcmb )
510 {
511 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
512
513
514
515
516 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
517 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
519 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
520 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
521 WBRld );
522
523
524
525
526
527 Akp =
PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
528 if( ( Anp0 = Anp - Akp ) > 0 )
529 {
530 Anq0 =
PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
531 if( WBCsum )
532 {
533 kbnext = ktmp - kb;
534 kbnext =
MIN( kbnext, Alcmb );
535 ktmp =
PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow,
536 Arow, nprow );
537 Anp0 -= ktmp;
538
539 if( ktmp > 0 )
540 {
541 if( Anq0 > 0 )
543 &nbb, &Anq0, negone,
Mptr( Aptr, Akp, Akq,
544 Ald, size ), &Ald,
Mptr( WBR, 0, Akq, WBRld,
545 size ), &WBRld, talph0,
Mptr( WBC, Akp, 0,
546 WBCld, size ), &WBCld );
548 npcol );
549 gsum2d( ctxt,
ROW, &top, ktmp, nbb,
Mptr( WBC, Akp,
550 0, WBCld, size ), WBCld, myrow, Asrc );
551 if( mycol != Asrc )
553 &nbb, &izero, zero, zero,
Mptr( WBC, Akp, 0,
554 WBCld, size ), &WBCld );
555 }
556 if( ( Anp0 > 0 ) && ( Anq0 > 0 ) )
558 &nbb, &Anq0, negone,
Mptr( Aptr, Akp+ktmp, Akq,
559 Ald, size ), &Ald,
Mptr( WBR, 0, Akq, WBRld,
560 size ), &WBRld, talph0,
Mptr( WBC, Akp+ktmp, 0,
561 WBCld, size ), &WBCld );
562 }
563 else
564 {
565 if( Anq0 > 0 )
567 &nbb, &Anq0, negone,
Mptr( Aptr, Akp, Akq, Ald,
568 size ), &Ald,
Mptr( WBR, 0, Akq, WBRld, size ),
569 &WBRld, talph0,
Mptr( WBC, Akp, 0, WBCld,
570 size ), &WBCld );
571 }
572 }
573 talph0 = one;
574 }
575 }
576
577
578
579 if( WBCsum && ( Anp > 0 ) )
580 gsum2d( ctxt,
ROW, &top, Anp, nbb, WBC, WBCld, myrow,
582
583
584
585 if( WBCapbX )
586 PB_Cpaxpby(
TYPE, &conjg, An, nbb, one, WBC, 0, 0, WBCd,
COLUMN,
587 zero, Bptr, 0, 0, DBUFB, &Broc );
588 }
589 else
590 {
591
592
593
594
596 0, 0, DBUFB, &Broc, &WBR, WBRd, &WBRfr, &WBRsum,
597 &WBRapbX );
598
599
600
601 PB_COutV(
TYPE,
COLUMN,
INIT, An, An, Ad0, nbb, &WBC, WBCd, &WBCfr,
602 &WBCsum );
603
604
605
606 Aimb1 = Ad0[
IMB_ ]; Ainb1 = Ad0[
INB_ ];
607 Amb = Ad0[
MB_ ]; Anb = Ad0[
NB_ ];
609
610 Anp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
611 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
612 if( ( Anp > 0 ) && ( Anq > 0 ) )
613 Aptr =
Mptr( A, Aii, Ajj, Ald, size );
614
615 WBCld = WBCd[
LLD_]; WBRld = WBRd[
LLD_];
616
617 if( upper )
618 {
619
620
621
622 for( k = 0; k < An; k += Alcmb )
623 {
624 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
625
626
627
628
629 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
630 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
632 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
633 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
634 WBRld );
635
636
637
638
639
640 Akq =
PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol );
641 if( ( Anq0 = Anq - Akq ) > 0 )
642 {
643 Anp0 =
PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow );
644 if( WBRsum )
645 {
646 kbnext = ktmp - kb;
647 kbnext =
MIN( kbnext, Alcmb );
648 ktmp =
PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol,
649 Acol, npcol );
650 Anq0 -= ktmp;
651
652 if( ktmp > 0 )
653 {
654 if( Anp0 > 0 )
656 &ktmp, &Anp0, negone,
Mptr( WBC, Akp, 0,
657 WBCld, size ), &WBCld,
Mptr( Aptr, Akp, Akq,
658 Ald, size ), &Ald, talph0,
Mptr( WBR, 0,
659 Akq, WBRld, size ), &WBRld );
661 nprow );
662 gsum2d( ctxt,
COLUMN, &top, nbb, ktmp,
Mptr( WBR, 0,
663 Akq, WBRld, size ), WBRld, Asrc, mycol );
664 if( myrow != Asrc )
666 &ktmp, &izero, zero, zero,
Mptr( WBR, 0, Akq,
667 WBRld, size ), &WBRld );
668 }
669 if( ( Anp0 > 0 ) && ( Anq0 > 0 ) )
671 &Anq0, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
672 size ), &WBCld,
Mptr( Aptr, Akp, Akq+ktmp, Ald,
673 size ), &Ald, talph0,
Mptr( WBR, 0, Akq+ktmp,
674 WBRld, size ), &WBRld );
675 }
676 else
677 {
678 if( Anp0 > 0 )
680 &Anq0, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
681 size ), &WBCld,
Mptr( Aptr, Akp, Akq, Ald,
682 size ), &Ald, talph0,
Mptr( WBR, 0, Akq, WBRld,
683 size ), &WBRld );
684 }
685 }
686 talph0 = one;
687 }
688 }
689 else
690 {
691
692
693
694 for( k = ( Astart / Alcmb ) * Alcmb; k >= 0; k -= Alcmb )
695 {
696 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
697
698
699
700
701 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
702 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
704 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
705 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
706 WBRld );
707
708
709
710
711
712 if( Akq > 0 )
713 {
714 Anp0 =
PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow );
715 if( WBRsum )
716 {
717 kbprev =
MIN( k, Alcmb );
718 ktmp =
PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb,
719 mycol, Acol, npcol );
720 Akq -= ktmp;
721
722 if( ktmp > 0 )
723 {
724 if( Anp0 > 0 )
726 &ktmp, &Anp0, negone,
Mptr( WBC, Akp, 0,
727 WBCld, size ), &WBCld,
Mptr( Aptr, Akp, Akq,
728 Ald, size ), &Ald, talph0,
Mptr( WBR, 0,
729 Akq, WBRld, size ), &WBRld );
731 nprow );
732 gsum2d( ctxt,
COLUMN, &top, nbb, ktmp,
Mptr( WBR, 0,
733 Akq, WBRld, size ), WBRld, Asrc, mycol );
734 if( myrow != Asrc )
736 &ktmp, &izero, zero, zero,
Mptr( WBR, 0, Akq,
737 WBRld, size ), &WBRld );
738 }
739 if( ( Anp0 > 0 ) && ( Akq > 0 ) )
741 &Akq, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
742 size ), &WBCld,
Mptr( Aptr, Akp, 0, Ald,
743 size ), &Ald, talph0, WBR, &WBRld );
744 }
745 else
746 {
747 if( Anp0 > 0 )
749 &Akq, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
750 size ), &WBCld,
Mptr( Aptr, Akp, 0, Ald,
751 size ), &Ald, talph0, WBR, &WBRld );
752 }
753 }
754 talph0 = one;
755 }
756 }
757
758
759
760 if( WBRsum && ( Anq > 0 ) )
761 gsum2d( ctxt,
COLUMN, &top, nbb, Anq, WBR, WBRld, WBRd[
RSRC_],
762 mycol );
763
764
765
766 if( WBRapbX )
767 PB_Cpaxpby(
TYPE, &conjg, nbb, An, one, WBR, 0, 0, WBRd,
ROW,
768 zero, Bptr, 0, 0, DBUFB, &Broc );
769 }
770
771 if( WBCfr ) free( WBC );
772 if( WBRfr ) free( WBR );
773
774
775
776 BnpR -= nbb;
777
778 if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb;
779 }
780
781
782
783 if( !( BisR ) )
785 }
786
787 if( TranOp ==
CCOTRAN ) free( talpha );
788
789
790
791}