41{
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218 char scope, * top, * zero;
219 Int Acol, Aii, Ainb1D, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR,
220 AnD, AnbD, AnpD, AnprocsD, AprocD, AprocR, Aroc, Arow, Bcol,
221 Bii, BisR, BisRow, Bjj, Bld, Bm, BmyprocD, BmyprocR, Bn,
222 BnprocsD, BprocR, Broc, Brow, RRorCC, ctxt, izero=0, k, kbb,
223 kk, kn, ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p,
224 size;
227
228
229
230 char * buf = NULL;
231
232
233
234
235
236
237
239
240
241
242 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
243 &Arow, &Acol );
244 if( ( AisRow = (
Mupcase( AROC[0] ) ==
CROW ) ) != 0 )
245 {
246 AnD = N; AnbD = DESCA[
NB_]; Ald = DESCA[
LLD_];
247 AprocD = Acol; AprocR = Arow;
248 AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol;
249 AisR = ( ( Arow == -1 ) || ( nprow == 1 ) );
251 }
252 else
253 {
254 AnD = M; AnbD = DESCA[
MB_]; Ald = DESCA[
LLD_];
255 AprocD = Arow; AprocR = Acol;
256 AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow;
257 AisR = ( ( Acol == -1 ) || ( npcol == 1 ) );
259 }
260
261
262
263 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
264 &Brow, &Bcol );
265 if( ( BisRow = (
Mupcase( BROC[0] ) ==
CROW ) ) != 0 )
266 {
268 BmyprocD = mycol; BnprocsD = npcol;
269 BprocR = Brow; BmyprocR = myrow;
270 BisR = ( ( BprocR == -1 ) || ( nprow == 1 ) );
271 }
272 else
273 {
275 BmyprocD = myrow; BnprocsD = nprow;
276 BprocR = Bcol; BmyprocR = mycol;
277 BisR = ( ( BprocR == -1 ) || ( npcol == 1 ) );
278 }
279
280
281
282 RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) );
283
284
285
287
288
289
290 if( !( BisR ) )
291 {
292
293
294
295
296
297 if( AisR ) { AprocR = ( ( RRorCC ) ? BprocR : 0 ); }
298
299
300
301
302 if( ( AmyprocR == AprocR ) || ( BmyprocR == BprocR ) )
303 {
304 if( RRorCC )
305 {
306
307
308
311 else add =
TYPE->Fmmadd;
313
314 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD,
315 AnprocsD );
316
317
318
319 if( AprocR == BprocR )
320 {
321
322
323
324
325
326 if( AnpD > 0 )
327 {
328 Aroc = AprocD;
329 if( BisRow ) { kk = Ajj; ktmp = JB + N; kn = JB + Ainb1D; }
330 else { kk = Aii; ktmp = IB + M; kn = IB + Ainb1D; }
331
332 if( AmyprocD == Aroc )
333 {
334 if( BisRow )
335 add( &M, &Ainb1D, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ),
336 &Ald, BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
337 else
338 add( &Ainb1D, &N, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ),
339 &Ald, BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
340 kk += Ainb1D;
341 }
342 else
343 {
344 if( BisRow )
346 &izero, zero, zero,
Mptr( B, Bii, Bjj, Bld, size ),
347 &Bld );
348 else
350 &izero, zero, zero,
Mptr( B, Bii, Bjj, Bld, size ),
351 &Bld );
352 }
354
355 for( k = kn; k < ktmp; k += AnbD )
356 {
357 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
358
359 if( AmyprocD == Aroc )
360 {
361 if( BisRow )
362 add( &M, &kbb, ALPHA,
Mptr( A, Aii, kk, Ald, size ),
363 &Ald, BETA,
Mptr( B, Bii, k, Bld, size ),
364 &Bld );
365 else
366 add( &kbb, &N, ALPHA,
Mptr( A, kk, Ajj, Ald, size ),
367 &Ald, BETA,
Mptr( B, k, Bjj, Bld, size ),
368 &Bld );
369 kk += kbb;
370 }
371 else
372 {
373 if( BisRow )
375 &izero, zero, zero,
Mptr( B, Bii, k, Bld,
376 size ), &Bld );
377 else
379 &izero, zero, zero,
Mptr( B, k, Bjj, Bld,
380 size ), &Bld );
381 }
383 }
384 }
385 else
386 {
387
388
389
390
392 zero, zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
393 }
394
395
396
399 TYPE->Cgsum2d( ctxt, &scope, top, M, N,
Mptr( B, Bii, Bjj, Bld,
400 size ), Bld, -1, 0 );
401 }
402 else
403 {
404
405
406
407 if( AmyprocR == AprocR )
408 {
409
410
411
412
413 if( AnpD > 0 )
414 {
415 if( AisRow )
416 TYPE->Cgesd2d( ctxt, M, AnpD,
Mptr( A, Aii, Ajj, Ald,
417 size ), Ald, BprocR, BmyprocD );
418 else
419 TYPE->Cgesd2d( ctxt, AnpD, N,
Mptr( A, Aii, Ajj, Ald,
420 size ), Ald, BmyprocD, BprocR );
421 }
422 }
423
424 if( BmyprocR == BprocR )
425 {
426
427
428
429
430 if( AnpD > 0 )
431 {
432 if( BisRow )
433 {
434 ktmp = JB + N;
435 kn = JB + Ainb1D;
437 TYPE->Cgerv2d( ctxt, M, AnpD, buf, M, AprocR,
438 AmyprocD );
439 }
440 else
441 {
442 ktmp = IB + M;
443 kn = IB + Ainb1D;
445 TYPE->Cgerv2d( ctxt, AnpD, N, buf, AnpD, AmyprocD,
446 AprocR );
447 }
448 Aroc = AprocD;
449 kk = 0;
450
451 if( AmyprocD == Aroc )
452 {
453 if( BisRow )
454 add( &M, &Ainb1D, ALPHA, buf, &M, BETA,
Mptr( B,
455 Bii, Bjj, Bld, size ), &Bld );
456 else
457 add( &Ainb1D, &N, ALPHA, buf, &AnpD, BETA,
Mptr( B,
458 Bii, Bjj, Bld, size ), &Bld );
459 kk += Ainb1D;
460 }
461 else
462 {
463 if( BisRow )
465 &Ainb1D, &izero, zero, zero,
Mptr( B, Bii, Bjj,
466 Bld, size ), &Bld );
467 else
469 &N, &izero, zero, zero,
Mptr( B, Bii, Bjj, Bld,
470 size ), &Bld );
471 }
473
474 for( k = kn; k < ktmp; k += AnbD )
475 {
476 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
477
478 if( AmyprocD == Aroc )
479 {
480 if( BisRow )
481 add( &M, &kbb, ALPHA,
Mptr( buf, 0, kk, M, size ),
482 &M, BETA,
Mptr( B, Bii, k, Bld, size ),
483 &Bld );
484 else
485 add( &kbb, &N, ALPHA,
Mptr( buf, kk, 0, AnpD,
486 size ), &AnpD, BETA,
Mptr( B, k, Bjj, Bld,
487 size ), &Bld );
488 kk += kbb;
489 }
490 else
491 {
492 if( BisRow )
494 &kbb, &izero, zero, zero,
Mptr( B, Bii, k,
495 Bld, size ), &Bld );
496 else
498 &N, &izero, zero, zero,
Mptr( B, k, Bjj, Bld,
499 size ), &Bld );
500 }
502 }
503 if( buf ) free( buf );
504 }
505 else
506 {
507
508
509
510
512 zero, zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
513 }
514
515
516
519 TYPE->Cgsum2d( ctxt, &scope, top, M, N,
Mptr( B, Bii, Bjj,
520 Bld, size ), Bld, -1, 0 );
521 }
522 }
523 }
524 else
525 {
526
527
528
531 else add =
TYPE->Fmmtadd;
533
534 Broc = 0;
535 if( BisRow ) { ktmp = JB + M; kn = JB + Ainb1D; }
536 else { ktmp = IB + N; kn = IB + Ainb1D; }
537
538
539
540
541 for( p = 0; p < AnprocsD; p++ )
542 {
543 mydist =
MModSub( p, AprocD, AnprocsD );
544 myproc =
MModAdd( AprocD, mydist, AnprocsD );
545
546 if( ( BprocR == p ) && ( AprocR == Broc ) )
547 {
548 if( BmyprocR == p )
549 {
550
551
552
553 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD,
554 AnprocsD );
555 if( AnpD > 0 )
556 {
557 Aroc = AprocD;
558 kk = ( BisRow ? Aii : Ajj );
559
560 if( myproc == Aroc )
561 {
562 if( BmyprocD == Broc )
563 {
564 if( AisRow )
565 add( &M, &Ainb1D, ALPHA,
Mptr( A, Aii, Ajj,
566 Ald, size ), &Ald, BETA,
Mptr( B, Bii,
567 Bjj, Bld, size ), &Bld );
568 else
569 add( &Ainb1D, &N, ALPHA,
Mptr( A, Aii, Ajj,
570 Ald, size ), &Ald, BETA,
Mptr( B, Bii,
571 Bjj, Bld, size ), &Bld );
572 kk += Ainb1D;
573 }
574 else
575 {
576 if( BisRow )
578 &Ainb1D, &izero, zero, zero,
Mptr( B, Bii,
579 Bjj, Bld, size ), &Bld );
580 else
582 &Ainb1D, &M, &izero, zero, zero,
Mptr( B,
583 Bii, Bjj, Bld, size ), &Bld );
584 }
585 }
587
588 for( k = kn; k < ktmp; k += AnbD )
589 {
590 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
591 if( myproc == Aroc )
592 {
593 if( BmyprocD == Broc )
594 {
595 if( AisRow )
596 add( &M, &kbb, ALPHA,
Mptr( A, Aii, kk, Ald,
597 size ), &Ald, BETA,
Mptr( B, k, Bjj,
598 Bld, size ), &Bld );
599 else
600 add( &kbb, &N, ALPHA,
Mptr( A, kk, Ajj, Ald,
601 size ), &Ald, BETA,
Mptr( B, Bii, k,
602 Bld, size ), &Bld );
603 kk += kbb;
604 }
605 else
606 {
607 if( BisRow )
609 &N, &kbb, &izero, zero, zero,
Mptr( B,
610 Bii, k, Bld, size ), &Bld );
611 else
613 &kbb, &M, &izero, zero, zero,
Mptr( B,
614 k, Bjj, Bld, size ), &Bld );
615 }
616 }
618 }
619 }
620 }
621 }
622 else
623 {
624
625
626
627 if( ( AmyprocR == AprocR ) && ( AmyprocD == p ) )
628 {
629 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD,
630 AnprocsD );
631 if( AnpD > 0 )
632 {
633 if( AisRow )
634 TYPE->Cgesd2d( ctxt, M, AnpD,
Mptr( A, Aii, Ajj, Ald,
635 size ), Ald, Broc, BprocR );
636 else
637 TYPE->Cgesd2d( ctxt, AnpD, N,
Mptr( A, Aii, Ajj, Ald,
638 size ), Ald, BprocR, Broc );
639 }
640 }
641
642 if( BmyprocR == BprocR )
643 {
644 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD,
645 AnprocsD );
646 if( AnpD > 0 )
647 {
648 Aroc = AprocD;
649 kk = 0;
650
651 if( BmyprocD == Broc )
652 {
653 if( AisRow )
654 {
656 TYPE->Cgerv2d( ctxt, M, AnpD, buf, M, AprocR, p );
657 }
658 else
659 {
661 TYPE->Cgerv2d( ctxt, AnpD, N, buf, AnpD, p,
662 AprocR );
663 }
664 }
665
666 if( myproc == Aroc )
667 {
668 if( BmyprocD == Broc )
669 {
670 if( AisRow )
671 add( &M, &Ainb1D, ALPHA, buf, &M, BETA,
672 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
673 else
674 add( &Ainb1D, &N, ALPHA, buf, &AnpD, BETA,
675 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
676 kk += Ainb1D;
677 }
678 else
679 {
680 if( BisRow )
682 &Ainb1D, &izero, zero, zero,
Mptr( B, Bii,
683 Bjj, Bld, size ), &Bld );
684 else
686 &Ainb1D, &M, &izero, zero, zero,
Mptr( B,
687 Bii, Bjj, Bld, size ), &Bld );
688 }
689 }
691
692 for( k = kn; k < ktmp; k += AnbD )
693 {
694 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
695 if( myproc == Aroc )
696 {
697 if( BmyprocD == Broc )
698 {
699 if( AisRow )
700 add( &M, &kbb, ALPHA,
Mptr( buf, 0, kk, M,
701 size ), &M, BETA,
Mptr( B, k, Bjj,
702 Bld, size ), &Bld );
703 else
704 add( &kbb, &N, ALPHA,
Mptr( buf, kk, 0,
705 AnpD, size ), &AnpD, BETA,
Mptr( B,
706 Bii, k, Bld, size ), &Bld );
707 kk += kbb;
708 }
709 else
710 {
711 if( BisRow )
713 &N, &kbb, &izero, zero, zero,
Mptr( B,
714 Bii, k, Bld, size ), &Bld );
715 else
717 &kbb, &M, &izero, zero, zero,
Mptr( B,
718 k, Bjj, Bld, size ), &Bld );
719 }
720 }
722 }
723 if( ( BmyprocD == Broc ) && ( buf ) ) free( buf );
724 }
725 }
726 }
728 }
729
730 if( BmyprocR == BprocR )
731 {
732
733
734
737 TYPE->Cgsum2d( ctxt, &scope, top, N, M,
Mptr( B, Bii, Bjj, Bld,
738 size ), Bld, -1, 0 );
739 }
740 }
741 }
742
743 if( BisR )
744 {
745
746
747
748 if( BisRow )
749 {
750 if( AisRow ) { Bm = M; Bn = N; }
751 else { Bm = N; Bn = M; }
753 if( BmyprocR == BprocR )
755 size ), Bld );
756 else
758 size ), Bld, BprocR, BmyprocD );
759 }
760 else
761 {
762 if( AisRow ) { Bm = N; Bn = M; }
763 else { Bm = M; Bn = N; }
765 if( BmyprocR == BprocR )
766 TYPE->Cgebs2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
767 size ), Bld );
768 else
769 TYPE->Cgebr2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
770 size ), Bld, BmyprocD, BprocR );
771 }
772 }
773 }
774 else
775 {
776
777
778
779
780 if( AisR || ( AmyprocR == AprocR ) )
781 {
783 if( RRorCC )
784 {
786 else add =
TYPE->Fmmadd;
787 }
788 else
789 {
791 else add =
TYPE->Fmmtadd;
792 }
794
795 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD, AnprocsD );
796 if( AnpD > 0 )
797 {
798 Aroc = AprocD;
799 kk = ( AisRow ? Ajj : Aii );
800
801 if( BisRow ) { ktmp = JB + ( RRorCC ? N : M ); kn = JB + Ainb1D; }
802 else { ktmp = IB + ( RRorCC ? M : N ); kn = IB + Ainb1D; }
803
804 if( AmyprocD == Aroc )
805 {
806 if( AisRow )
807 add( &M, &Ainb1D, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ), &Ald,
808 BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
809 else
810 add( &Ainb1D, &N, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ), &Ald,
811 BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
812 kk += Ainb1D;
813 }
814 else
815 {
816 if( RRorCC )
817 {
818 if( AisRow ) { Bm = M; Bn = Ainb1D; }
819 else { Bm = Ainb1D; Bn = N; }
820 }
821 else
822 {
823 if( AisRow ) { Bm = Ainb1D; Bn = M; }
824 else { Bm = N; Bn = Ainb1D; }
825 }
827 zero, zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
828 }
830
831 for( k = kn; k < ktmp; k += AnbD )
832 {
833 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
834
835 if( BisRow ) { buf =
Mptr( B, Bii, k, Bld, size ); }
836 else { buf =
Mptr( B, k, Bjj, Bld, size ); }
837
838 if( AmyprocD == Aroc )
839 {
840 if( AisRow )
841 add( &M, &kbb, ALPHA,
Mptr( A, Aii, kk, Ald, size ), &Ald,
842 BETA, buf, &Bld );
843 else
844 add( &kbb, &N, ALPHA,
Mptr( A, kk, Ajj, Ald, size ), &Ald,
845 BETA, buf, &Bld );
846 kk += kbb;
847 }
848 else
849 {
850 if( RRorCC )
851 {
852 if( AisRow ) { Bm = M; Bn = kbb; }
853 else { Bm = kbb; Bn = N; }
854 }
855 else
856 {
857 if( AisRow ) { Bm = kbb; Bn = M; }
858 else { Bm = N; Bn = kbb; }
859 }
861 zero, zero, buf, &Bld );
862 }
864 }
865 }
866 else
867 {
868 if( RRorCC )
870 zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
871 else
873 zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
874 }
875
876
877
878
881 if( RRorCC )
882 TYPE->Cgsum2d( ctxt, &scope, top, M, N,
Mptr( B, Bii, Bjj, Bld,
883 size ), Bld, -1, 0 );
884 else
885 TYPE->Cgsum2d( ctxt, &scope, top, N, M,
Mptr( B, Bii, Bjj, Bld,
886 size ), Bld, -1, 0 );
887 }
888
889 if( !AisR )
890 {
891
892
893
894
895
896 if( RRorCC ) { Bm = M; Bn = N; }
897 else { Bm = N; Bn = M; }
898
899 if( AisRow )
900 {
902 if( AmyprocR == AprocR )
904 size ), Bld );
905 else
907 size ), Bld, AprocR, AmyprocD );
908 }
909 else
910 {
912 if( AmyprocR == AprocR )
913 TYPE->Cgebs2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
914 size ), Bld );
915 else
916 TYPE->Cgebr2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
917 size ), Bld, AmyprocD, AprocR );
918 }
919 }
920 }
921
922
923
924}