36{
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186 char scope, * top, * zero;
187 Int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, Xm,
188 XmyprocD, XmyprocR, Xn, XnprocsD, XnprocsR, XprocR, Xroc,
189 Xrow, Ycol, Yii, Yinb1D, YisR, YisRow, Yjj, Yld, Ylinc,
190 YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD,
191 YprocR, Yroc, Yrow, ctxt, ione=1, k, kbb, kk, kn, ktmp, mycol,
192 mydist, myproc, myrow, npcol, nprow, p, size;
193
194
195
196 char * buf = NULL;
197
198
199
200
201
202
203
205
206
207
208 PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj,
209 &Xrow, &Xcol );
210 if( ( XisRow = ( INCX == DESCX[
M_] ) ) != 0 )
211 {
212 Xld = DESCX[
LLD_]; Xlinc = Xld;
213 XmyprocD = mycol; XnprocsD = npcol;
214 XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow;
215 XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) );
216 }
217 else
218 {
219 Xld = DESCX[
LLD_]; Xlinc = 1;
220 XmyprocD = myrow; XnprocsD = nprow;
221 XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol;
222 XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) );
223 }
224
225
226
227 PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj,
228 &Yrow, &Ycol );
229 if( ( YisRow = ( INCY == DESCY[
M_] ) ) != 0 )
230 {
231 YnbD = DESCY[
NB_]; Yld = DESCY[
LLD_]; Ylinc = Yld;
232 YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow;
233 YprocD = Ycol; YmyprocD = mycol; YnprocsD = npcol;
235 }
236 else
237 {
238 YnbD = DESCY[
MB_]; Yld = DESCY[
LLD_]; Ylinc = 1;
239 YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol;
240 YprocD = Yrow; YmyprocD = myrow; YnprocsD = nprow;
242 }
243
244 YisR = ( ( YprocR == -1 ) || ( YnprocsR == 1 ) );
245
246
247
248 RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) );
249
250
251
252 size =
TYPE->size; zero =
TYPE->zero;
253
254 if( !( XisR ) )
255 {
256
257
258
259
260
261 if( YisR ) { YprocR = ( ( RRorCC ) ? XprocR : 0 ); }
262
263
264
265
266 if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) )
267 {
268
269
270
271 if( RRorCC )
272 {
273 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD );
274
275
276
277 if( XprocR == YprocR )
278 {
279
280
281
282
283
284 if( YnpD > 0 )
285 {
286 Yroc = YprocD;
287 if( XisRow ) { kk = Yjj; ktmp = JX + N; kn = JX + Yinb1D; }
288 else { kk = Yii; ktmp = IX + N; kn = IX + Yinb1D; }
289
290 if( YmyprocD == Yroc )
291 {
292 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld, size ),
293 &Xlinc,
Mptr( Y, Yii, Yjj, Yld, size ),
294 &Ylinc );
295 kk += Yinb1D;
296 }
297 else
298 {
299 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld, size ),
300 &Xlinc );
301 }
303
304 for( k = kn; k < ktmp; k += YnbD )
305 {
306 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
307 if( YmyprocD == Yroc )
308 {
309 if( XisRow )
310 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ),
311 &Xlinc,
Mptr( Y, Yii, kk, Yld, size ),
312 &Ylinc );
313 else
314 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ),
315 &Xlinc,
Mptr( Y, kk, Yjj, Yld, size ),
316 &Ylinc );
317 kk += kbb;
318 }
319 else
320 {
321 if( XisRow )
322 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k, Xld, size ),
323 &Xlinc );
324 else
325 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj, Xld, size ),
326 &Xlinc );
327 }
329 }
330 }
331 else
332 {
333
334
335
336
337 TYPE->Fset( &N, zero,
Mptr( X, Xii, Xjj, Xld, size ),
338 &Xlinc );
339 }
340
341
342
343 if( XisRow )
344 {
346 TYPE->Cgsum2d( ctxt,
ROW, top, 1, N,
Mptr( X, Xii, Xjj, Xld,
347 size ), Xld, -1, 0 );
348 }
349 else
350 {
353 Xld, size ), Xld, -1, 0 );
354 }
355 }
356 else
357 {
358
359
360
361 if( YmyprocR == YprocR )
362 {
363
364
365
366
367 if( YnpD > 0 )
368 {
369 if( YisRow )
370 {
371 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
372 size ), Yld, XprocR, YmyprocD );
373 TYPE->Cgerv2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
374 size ), Yld, XprocR, YmyprocD );
375 }
376 else
377 {
378 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
379 size ), Yld, YmyprocD, XprocR );
380 TYPE->Cgerv2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
381 size ), Yld, YmyprocD, XprocR );
382 }
383 }
384 }
385
386 if( XmyprocR == XprocR )
387 {
388
389
390
391
392
393
394 if( YnpD > 0 )
395 {
397 if( YisRow )
398 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR,
399 XmyprocD );
400 else
401 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD,
402 YprocR );
403
404 Yroc = YprocD;
405 kk = 0;
406 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
407 else { ktmp = IX + N; kn = IX + Yinb1D; }
408
409 if( YmyprocD == Yroc )
410 {
411 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld, size ),
412 &Xlinc, buf, &ione );
413 kk += Yinb1D;
414 }
415 else
416 {
417 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld,
418 size ), &Xlinc );
419 }
421
422 for( k = kn; k < ktmp; k += YnbD )
423 {
424 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
425
426 if( YmyprocD == Yroc )
427 {
428 if( XisRow )
429 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ),
430 &Xlinc, buf+kk*size, &ione );
431 else
432 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ),
433 &Xlinc, buf+kk*size, &ione );
434 kk += kbb;
435 }
436 else
437 {
438 if( XisRow )
439 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k, Xld,
440 size ), &Xlinc );
441 else
442 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj, Xld,
443 size ), &Xlinc );
444 }
446 }
447 if( YisRow )
448 TYPE->Cgesd2d( ctxt, 1, YnpD, buf, 1, YprocR,
449 XmyprocD );
450 else
451 TYPE->Cgesd2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD,
452 YprocR );
453 if( buf ) free( buf );
454 }
455 else
456 {
457 TYPE->Fset( &N, zero,
Mptr( X, Xii, Xjj, Xld, size ),
458 &Xlinc );
459 }
460
461
462
463 if( XisRow )
464 {
466 TYPE->Cgsum2d( ctxt,
ROW, top, 1, N,
Mptr( X, Xii, Xjj,
467 Xld, size ), Xld, -1, 0 );
468 }
469 else
470 {
473 Xld, size ), Xld, -1, 0 );
474 }
475 }
476 }
477 }
478 else
479 {
480
481
482
483 Xroc = 0;
484 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
485 else { ktmp = IX + N; kn = IX + Yinb1D; }
486
487
488
489
490
491 for( p = 0; p < YnprocsD; p++ )
492 {
493 mydist =
MModSub( p, YprocD, YnprocsD );
494 myproc =
MModAdd( YprocD, mydist, YnprocsD );
495
496 if( ( XprocR == p ) && ( YprocR == Xroc ) )
497 {
498
499
500
501 if( XmyprocR == p )
502 {
503 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
504 YnprocsD );
505 if( YnpD > 0 )
506 {
507 Yroc = YprocD;
508 kk = ( XisRow ? Yii : Yjj );
509
510 if( myproc == Yroc )
511 {
512 if( XmyprocD == Xroc )
513 {
514 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld,
515 size ), &Xlinc,
Mptr( Y, Yii, Yjj,
516 Yld, size ), &Ylinc );
517 kk += Yinb1D;
518 }
519 else
520 {
521 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld,
522 size ), &Xlinc );
523 }
524 }
526
527 for( k = kn; k < ktmp; k += YnbD )
528 {
529 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
530 if( myproc == Yroc )
531 {
532 if( XmyprocD == Xroc )
533 {
534 if( XisRow )
535 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld,
536 size ), &Xlinc,
Mptr( Y, kk,
537 Yjj, Yld, size ), &Ylinc );
538 else
539 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld,
540 size ), &Xlinc,
Mptr( Y, Yii,
541 kk, Yld, size ), &Ylinc );
542 kk += kbb;
543 }
544 else
545 {
546 if( XisRow )
547 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k,
548 Xld, size ), &Xlinc );
549 else
550 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj,
551 Xld, size ), &Xlinc );
552 }
553 }
555 }
556 }
557 }
558 }
559 else
560 {
561
562
563
564 if( ( YmyprocR == YprocR ) && ( YmyprocD == p ) )
565 {
566 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
567 YnprocsD );
568 if( YnpD > 0 )
569 {
570 if( XisRow )
571 {
572 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj,
573 Yld, size ), Yld, XprocR, Xroc );
574 TYPE->Cgerv2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj,
575 Yld, size ), Yld, XprocR, Xroc );
576 }
577 else
578 {
579 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj,
580 Yld, size ), Yld, Xroc, XprocR );
581 TYPE->Cgerv2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj,
582 Yld, size ), Yld, Xroc, XprocR );
583 }
584 }
585 }
586
587 if( XmyprocR == XprocR )
588 {
589 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
590 YnprocsD );
591 if( YnpD > 0 )
592 {
593 Yroc = YprocD;
594 kk = 0;
595
596
597
598 if( XmyprocD == Xroc )
599 {
601 if( XisRow )
602 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD,
603 p, YprocR );
604 else
605 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1,
606 YprocR, p );
607 }
608
609 if( myproc == Yroc )
610 {
611 if( XmyprocD == Xroc )
612 {
613 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld,
614 size ), &Xlinc, buf, &ione );
615 kk += Yinb1D;
616 }
617 else
618 {
619 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj,
620 Xld, size ), &Xlinc );
621 }
622 }
624
625 for( k = kn; k < ktmp; k += YnbD )
626 {
627 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
628 if( myproc == Yroc )
629 {
630 if( XmyprocD == Xroc )
631 {
632 if( XisRow )
633 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld,
634 size ), &Xlinc, buf+kk*size,
635 &ione );
636 else
637 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld,
638 size ), &Xlinc, buf+kk*size,
639 &ione );
640 kk += kbb;
641 }
642 else
643 {
644 if( XisRow )
645 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k,
646 Xld, size ), &Xlinc );
647 else
648 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj,
649 Xld, size ), &Xlinc );
650 }
651 }
653 }
654
655 if( XmyprocD == Xroc )
656 {
657 if( XisRow )
658 TYPE->Cgesd2d( ctxt, YnpD, 1, buf, YnpD,
659 p, YprocR );
660 else
661 TYPE->Cgesd2d( ctxt, 1, YnpD, buf, 1,
662 YprocR, p );
663 if( buf ) free( buf );
664 }
665 }
666 }
667 }
669 }
670
671
672
673 if( XmyprocR == XprocR )
674 {
675 if( XisRow )
676 {
678 TYPE->Cgsum2d( ctxt,
ROW, top, 1, N,
Mptr( X, Xii, Xjj,
679 Xld, size ), Xld, -1, 0 );
680 }
681 else
682 {
685 Xld, size ), Xld, -1, 0 );
686 }
687 }
688 }
689 }
690
691 if( YisR )
692 {
693
694
695
696 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD );
697 if( YnpD > 0 )
698 {
699 if( YisRow )
700 {
702 if( YmyprocR == YprocR )
704 Yld, size ), Yld );
705 else
707 Yld, size ), Yld, YprocR, YmyprocD );
708 }
709 else
710 {
712 if( YmyprocR == YprocR )
713 TYPE->Cgebs2d( ctxt,
ROW, top, YnpD, 1,
Mptr( Y, Yii, Yjj,
714 Yld, size ), Yld );
715 else
716 TYPE->Cgebr2d( ctxt,
ROW, top, YnpD, 1,
Mptr( Y, Yii, Yjj,
717 Yld, size ), Yld, YmyprocD, YprocR );
718 }
719 }
720 }
721 }
722 else
723 {
724
725
726
727
728 if( YisR || ( YmyprocR == YprocR ) )
729 {
730 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD );
731
732 if( YnpD > 0 )
733 {
734 Yroc = YprocD;
735 kk = ( YisRow ? Yjj : Yii );
736
737 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
738 else { ktmp = IX + N; kn = IX + Yinb1D; }
739
740 if( YmyprocD == Yroc )
741 {
742 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
743 Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc );
744 kk += Yinb1D;
745 }
746 else
747 {
748 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld, size ),
749 &Xlinc );
750 }
752
753 for( k = kn; k < ktmp; k += YnbD )
754 {
755 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
756 if( YmyprocD == Yroc )
757 {
758 if( YisRow )
759 {
760 if( XisRow )
761 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ), &Xlinc,
762 Mptr( Y, Yii, kk, Yld, size ), &Ylinc );
763 else
764 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ), &Xlinc,
765 Mptr( Y, Yii, kk, Yld, size ), &Ylinc );
766 }
767 else
768 {
769 if( XisRow )
770 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ), &Xlinc,
771 Mptr( Y, kk, Yjj, Yld, size ), &Ylinc );
772 else
773 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ), &Xlinc,
774 Mptr( Y, kk, Yjj, Yld, size ), &Ylinc );
775 }
776 kk += kbb;
777 }
778 else
779 {
780 if( XisRow )
781 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k, Xld, size ),
782 &Xlinc );
783 else
784 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj, Xld, size ),
785 &Xlinc );
786 }
788 }
789 }
790 else
791 {
792
793
794
795 TYPE->Fset( &N, zero,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc );
796 }
797
798
799
800
803 if( XisRow )
804 TYPE->Cgsum2d( ctxt, &scope, top, 1, N,
Mptr( X, Xii, Xjj, Xld,
805 size ), Xld, -1, 0 );
806 else
807 TYPE->Cgsum2d( ctxt, &scope, top, N, 1,
Mptr( X, Xii, Xjj, Xld,
808 size ), Xld, -1, 0 );
809 }
810
811 if( !YisR )
812 {
813
814
815
816
817
818 if( XisRow ) { Xm = 1; Xn = N; }
819 else { Xm = N; Xn = 1; }
820
821 if( YisRow )
822 {
824 if( YmyprocR == YprocR )
826 size ), Xld );
827 else
829 size ), Xld, YprocR, YmyprocD );
830 }
831 else
832 {
834 if( YmyprocR == YprocR )
835 TYPE->Cgebs2d( ctxt,
ROW, top, Xm, Xn,
Mptr( X, Xii, Xjj, Xld,
836 size ), Xld );
837 else
838 TYPE->Cgebr2d( ctxt,
ROW, top, Xm, Xn,
Mptr( X, Xii, Xjj, Xld,
839 size ), Xld, YmyprocD, YprocR );
840 }
841 }
842 }
843
844
845
846}