40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 char * top;
223 Int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, XmyprocD,
224 XmyprocR, XnprocsD, XnprocsR, XprocR, Xroc, Xrow, Ycol, Yii,
225 Yinb1D, YisR, YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR,
226 YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow,
227 ctxt, ione=1, k, kbb, kk, kn, ktmp, mycol, mydist, myproc,
228 myrow, npcol, nprow, p, size;
229
230
231
232 char * Xptr = NULL, * Yptr = NULL, * buf = NULL;
233
234
235
236
237
238
239
241
242
243
244 PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj,
245 &Xrow, &Xcol );
246 if( ( XisRow = ( INCX == DESCX[
M_] ) ) != 0 )
247 {
248 Xld = DESCX[
LLD_]; Xlinc = Xld;
249 XmyprocD = mycol; XnprocsD = npcol;
250 XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow;
251 XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) );
252 }
253 else
254 {
255 Xld = DESCX[
LLD_]; Xlinc = 1;
256 XmyprocD = myrow; XnprocsD = nprow;
257 XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol;
258 XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) );
259 }
260
261
262
263 PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj,
264 &Yrow, &Ycol );
265 if( ( YisRow = ( INCY == DESCY[
M_] ) ) != 0 )
266 {
267 YnbD = DESCY[
NB_]; Yld = DESCY[
LLD_]; Ylinc = Yld;
268 YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow;
269 YprocD = Ycol; YmyprocD = mycol; YnprocsD = npcol;
271 }
272 else
273 {
274 YnbD = DESCY[
MB_]; Yld = DESCY[
LLD_]; Ylinc = 1;
275 YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol;
276 YprocD = Yrow; YmyprocD = myrow; YnprocsD = nprow;
278 }
279 YisR = ( ( YprocR == -1 ) || ( YnprocsR == 1 ) );
280
281
282
283 RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) );
284
285
286
287 if( !( XisR ) )
288 {
289
290
291
292
293
294 if( YisR ) { YprocR = ( ( RRorCC ) ? XprocR : 0 ); }
295
296
297
298
299 if( RRorCC )
300 {
301
302
303
304 if( XprocR == YprocR )
305 {
306
307
308
309 if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) )
310 {
312 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD,
313 YnprocsD );
314
315
316
317
318
319 if( YnpD > 0 )
320 {
321 Yroc = YprocD;
322 if( XisRow ) { kk = Yjj; ktmp = JX + N; kn = JX + Yinb1D; }
323 else { kk = Yii; ktmp = IX + N; kn = IX + Yinb1D; }
324
325 if( YmyprocD == Yroc )
326 {
327 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
328 Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc );
329 kk += Yinb1D;
330 }
332
333 for( k = kn; k < ktmp; k += YnbD )
334 {
335 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
336 if( YmyprocD == Yroc )
337 {
338 if( XisRow )
339 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
340 &Xlinc,
Mptr( Y, Yii, kk, Yld, size ),
341 &Ylinc );
342 else
343 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
344 &Xlinc,
Mptr( Y, kk, Yjj, Yld, size ),
345 &Ylinc );
346 kk += kbb;
347 }
349 }
350 }
351
352
353
354 if( XisRow )
355 {
357 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
358 }
359 else
360 {
362 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
363 }
364 }
365 }
366 else
367 {
368
369
370
371 if( YmyprocR == YprocR )
372 {
374 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD,
375 YnprocsD );
376
377
378
379
380 if( YisRow )
381 {
382 if( YnpD > 0 )
383 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
384 size ), Yld, XprocR, YmyprocD );
385 TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XprocR, XmyprocD );
386 }
387 else
388 {
389 if( YnpD > 0 )
390 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
391 size ), Yld, YmyprocD, XprocR );
392 TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XmyprocD, XprocR );
393 }
394 }
395
396 if( XmyprocR == XprocR )
397 {
399 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD,
400 YnprocsD );
401
402
403
404
405
406
407 if( YnpD > 0 )
408 {
410 if( YisRow )
411 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR,
412 XmyprocD );
413 else
414 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD,
415 YprocR );
416
417 Yroc = YprocD;
418 kk = 0;
419 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
420 else { ktmp = IX + N; kn = IX + Yinb1D; }
421
422 if( YmyprocD == Yroc )
423 {
424 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ),
425 &Xlinc, buf, &ione );
426 kk += Yinb1D;
427 }
429
430 for( k = kn; k < ktmp; k += YnbD )
431 {
432 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
433 if( YmyprocD == Yroc )
434 {
435 if( XisRow )
436 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
437 &Xlinc, buf+kk*size, &ione );
438 else
439 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
440 &Xlinc, buf+kk*size, &ione );
441 kk += kbb;
442 }
444 }
445 if( buf ) free( buf );
446 }
447
448
449
450
451
452 if( XisRow )
453 {
455 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
456 if( !YisR )
457 TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YprocR, YmyprocD );
458 }
459 else
460 {
462 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
463 if( !YisR )
464 TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YmyprocD, YprocR );
465 }
466 }
467 }
468
469 if( YisR )
470 {
471
472
473
474 if( XisRow )
475 {
477 if( XmyprocR == XprocR )
478 TYPE->Cgebs2d( ctxt,
COLUMN, top, 1, 1, DOT, 1 );
479 else
480 TYPE->Cgebr2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, XprocR,
481 XmyprocD );
482 }
483 else
484 {
486 if( XmyprocR == XprocR )
487 TYPE->Cgebs2d( ctxt,
ROW, top, 1, 1, DOT, 1 );
488 else
489 TYPE->Cgebr2d( ctxt,
ROW, top, 1, 1, DOT, 1, XmyprocD,
490 XprocR );
491 }
492 }
493 }
494 else
495 {
496
497
498
499 if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) )
500 {
502 Xroc = 0;
503 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
504 else { ktmp = IX + N; kn = IX + Yinb1D; }
505
506
507
508
509
510 for( p = 0; p < YnprocsD; p++ )
511 {
512 mydist =
MModSub( p, YprocD, YnprocsD );
513 myproc =
MModAdd( YprocD, mydist, YnprocsD );
514
515 if( ( XprocR == p ) && ( YprocR == Xroc ) )
516 {
517
518
519
520
521 if( ( XmyprocR == p ) && ( XmyprocD == Xroc ) )
522 {
523 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
524 YnprocsD );
525 if( YnpD > 0 )
526 {
527 Yroc = YprocD;
528 kk = ( XisRow ? Yii : Yjj );
529
530 if( myproc == Yroc )
531 {
532 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ),
533 &Xlinc,
Mptr( Y, Yii, Yjj, Yld, size ),
534 &Ylinc );
535 kk += Yinb1D;
536 }
538
539 for( k = kn; k < ktmp; k += YnbD )
540 {
541 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
542 if( myproc == Yroc )
543 {
544 if( XisRow )
545 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
546 &Xlinc,
Mptr( Y, kk, Yjj, Yld, size ),
547 &Ylinc );
548 else
549 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
550 &Xlinc,
Mptr( Y, Yii, kk, Yld, size ),
551 &Ylinc );
552 kk += kbb;
553 }
555 }
556 }
557 }
558 }
559 else
560 {
561
562
563
564 if( ( YmyprocR == YprocR ) && ( YmyprocD == p ) )
565 {
566 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
567 YnprocsD );
568 if( YnpD > 0 )
569 {
570 if( XisRow )
571 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
572 size ), Yld, XprocR, Xroc );
573 else
574 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
575 size ), Yld, Xroc, XprocR );
576 }
577 }
578
579 if( ( XmyprocR == XprocR ) && ( XmyprocD == Xroc ) )
580 {
581 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
582 YnprocsD );
583 if( YnpD > 0 )
584 {
586 Yroc = YprocD;
587 kk = 0;
588
589
590
591 if( XisRow )
592 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, p, YprocR );
593 else
594 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR, p );
595
596 if( myproc == Yroc )
597 {
598 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ),
599 &Xlinc, buf, &ione );
600 kk += Yinb1D;
601 }
603
604 for( k = kn; k < ktmp; k += YnbD )
605 {
606 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
607 if( myproc == Yroc )
608 {
609 if( XisRow )
610 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
611 &Xlinc, buf+kk*size, &ione );
612 else
613 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
614 &Xlinc, buf+kk*size, &ione );
615 kk += kbb;
616 }
618 }
619 if( buf ) free( buf );
620 }
621 }
622 }
624 }
625
626
627
628 if( XmyprocR == XprocR )
629 {
630 if( XisRow )
631 {
633 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
634 }
635 else
636 {
638 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
639 }
640 }
641 }
642
643
644
645 if( YisR || ( YmyprocR == YprocR ) )
646 {
647 if( YisRow )
648 {
650 if( XmyprocR == XprocR )
651 TYPE->Cgebs2d( ctxt,
ROW, top, 1, 1, DOT, 1 );
652 else
653 TYPE->Cgebr2d( ctxt,
ROW, top, 1, 1, DOT, 1, YmyprocR,
654 XprocR );
655 }
656 else
657 {
659 if( XmyprocR == XprocR )
660 TYPE->Cgebs2d( ctxt,
COLUMN, top, 1, 1, DOT, 1 );
661 else
662 TYPE->Cgebr2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, XprocR,
663 YmyprocR );
664 }
665 }
666 }
667 }
668 else
669 {
670
671
672
673
674
675 if( YisR || ( YmyprocR == YprocR ) )
676 {
678 Yroc = YprocD;
679 kk = ( YisRow ? Yjj : Yii );
680
681 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
682 else { ktmp = IX + N; kn = IX + Yinb1D; }
683
684 if( YmyprocD == Yroc )
685 {
686 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
Mptr( Y,
687 Yii, Yjj, Yld, size ), &Ylinc );
688 kk += Yinb1D;
689 }
691
692 for( k = kn; k < ktmp; k += YnbD )
693 {
694 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
695 if( YmyprocD == Yroc )
696 {
697 if( XisRow ) { Xptr =
Mptr( X, Xii, k, Xld, size ); }
698 else { Xptr =
Mptr( X, k, Xjj, Xld, size ); }
699 if( YisRow ) { Yptr =
Mptr( Y, Yii, kk, Yld, size ); }
700 else { Yptr =
Mptr( Y, kk, Yjj, Yld, size ); }
701 FDOT( &kbb, DOT, Xptr, &Xlinc, Yptr, &Ylinc );
702 kk += kbb;
703 }
705 }
706 }
707
708 if( YisR )
709 {
710
711
712
713 if( YisRow )
714 {
716 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
717 }
718 else
719 {
721 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
722 }
723 }
724 else
725 {
726
727
728
730 TYPE->Cgsum2d( ctxt,
ALL, top, 1, 1, DOT, 1, -1, 0 );
731 }
732 }
733
734
735
736}