39{
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 char btop, * negone, * one, * zero;
230 Int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep,
231 AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp,
232 Anq, Arow, Asrc, ChangeRoc=0, bcst, ctxt, ione=1, k=0, kb,
233 kbprev=0, kbsize, mb1, mycol, myrow, n1, n1last, n1p,
234 n1pprev=0, nb1, nlast, npcol, nprow, rocprev, size, tmp1,
235 tmp2;
245
246
247
248 char * Aprev = NULL, * Xd = NULL, * Xdprev = NULL,
249 * Xprev = NULL, * work = NULL;
250
251
252
253
254 if( N <= 0 ) return;
255
256
257
259
260
261
262 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow,
263 &Acol );
264
265
266
267
268 Amb = DESCA[
MB_]; Anb = DESCA[
NB_]; Ald = DESCA[
LLD_ ];
270 Anp =
PB_Cnumroc( N, 0, Aimb1, Amb, myrow, Arow, nprow );
271 Ais1Row = !(
PB_Cspan( N, 0, Aimb1, Amb, Arow, nprow ) );
273 Anq =
PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
274 Ais1Col = !(
PB_Cspan( N, 0, Ainb1, Anb, Acol, npcol ) );
275
276
277
278 if( Ais1Row && Ais1Col )
279 {
281 {
282 if( Anq > 0 )
283 {
284 if( Anp > 0 )
285 {
288 TYPE->size ), &Ald, XC, &INCXC );
289 TYPE->Fcopy( &Anp, XC, &INCXC, XR, &INCXR );
290 }
291 if( ( Arow >= 0 ) && FBCAST )
292 {
294 if( myrow == Arow )
295 TYPE->Cgebs2d( ctxt,
COLUMN, &btop, 1, Anq, XR, INCXR );
296 else
297 TYPE->Cgebr2d( ctxt,
COLUMN, &btop, 1, Anq, XR, INCXR, Arow,
298 mycol );
299 }
300 }
301 }
302 else
303 {
304 if( Anp > 0 )
305 {
306 if( Anq > 0 )
307 {
310 TYPE->size ), &Ald, XR, &INCXR );
311 TYPE->Fcopy( &Anq, XR, &INCXR, XC, &INCXC );
312 }
313 if( Acol >= 0 && FBCAST )
314 {
316 if( mycol == Acol )
317 TYPE->Cgebs2d( ctxt,
ROW, &btop, Anp, 1, XC, Anp );
318 else
319 TYPE->Cgebr2d( ctxt,
ROW, &btop, Anp, 1, XC, Anp, myrow,
320 Acol );
321 }
322 }
323 }
324 return;
325 }
326
327
328
330 negone =
TYPE->negone; one =
TYPE->one; zero =
TYPE->zero;
331 axpy =
TYPE->Faxpy; copy =
TYPE->Fcopy; set =
TYPE->Fset;
332 gemv =
TYPE->Fgemv; trsv =
TYPE->Ftrsv;
333 send =
TYPE->Cgesd2d; recv =
TYPE->Cgerv2d;
334 bsend =
TYPE->Cgebs2d; brecv =
TYPE->Cgebr2d;
335
336 if( ( Anp > 0 ) && ( Anq > 0 ) ) A =
Mptr( A, Aii, Ajj, Ald, size );
337
339 {
340 if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) &&
341 ( myrow != Arow ) ) ) ) return;
343 bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) );
344 AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) );
345
347 {
348
349
350
351 nlast = ( npcol - 1 ) * Anb;
352 n1 =
MAX( nlast, Anb );
353 nlast += Ainb1;
354 n1last = n1 - Anb +
MAX( Ainb1, Anb );
356 tmp1 = N-1;
357 Alrow =
PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
358 Alcol =
PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
359 rocprev = Alcol; Anpprev = Anp; Xprev = XC; Xdprev = XR;
360 Aprev = A =
Mptr( A, 0, Anq, Ald, size );
363 tmp1 = N - ( kb =
MIN( mb1, nb1 ) );
364 n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 );
365 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
366 Asrc = Arow;
367 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
368 nprow );
369 while( N > 0 )
370 {
371 kbsize = kb * size;
372
373 if( Ais1Col || ( mycol == Alcol ) )
374 {
375 A -= Ald * kbsize;
376 Anq -= kb;
377 Xd =
Mptr( XR, 0, Anq, INCXR, size );
378 }
379 if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; }
380
381
382
383 if( n1pprev > 0 )
384 {
385 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
386 {
387 tmp1 = ( Anpprev - n1pprev ) * size;
388 gemv(
C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone,
389 Aprev+tmp1, &Ald, Xdprev, &INCXR, one, Xprev+tmp1,
390 &INCXC );
391 }
392
393
394
395 if( !( Ais1Col ) && ChangeRoc )
396 {
397 if( mycol == rocprev )
398 {
399 send( ctxt, n1pprev, 1, Xprev+(Anpprev-n1pprev)*size,
400 n1pprev, myrow, Alcol );
401 }
402 else if( mycol == Alcol )
403 {
404 recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev );
405 axpy( &n1pprev, one, work, &ione,
Mptr( Xprev,
406 Anpprev-n1pprev, 0, INCXC, size ), &INCXC );
407 }
408 }
409 }
410
411
412
413 if( Ais1Col || ( mycol == Alcol ) )
414 {
415 if( AisRowRep || ( myrow == Alrow ) )
416 {
418 &kb,
Mptr( A, Anp, 0, Ald, size ), &Ald,
Mptr( XC, Anp,
419 0, INCXC, size ), &INCXC );
420 copy( &kb,
Mptr( XC, Anp, 0, INCXC, size ), &INCXC,
Mptr( XR,
421 0, Anq, INCXR, size ), &INCXR );
422 }
423 if( bcst )
424 {
425 if( myrow == Alrow )
426 bsend( ctxt,
COLUMN, &btop, 1, kb,
Mptr( XR, 0, Anq, INCXR,
427 size ), INCXR );
428 else
429 brecv( ctxt,
COLUMN, &btop, 1, kb,
Mptr( XR, 0, Anq, INCXR,
430 size ), INCXR, Alrow, mycol );
431 }
432 }
433 else
434 {
435 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) )
436 set( &kb, zero,
Mptr( XC, Anp, 0, INCXC, size ), &ione );
437 }
438
439
440
441 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) &&
442 ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
443 gemv(
C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev, &Ald,
444 Xdprev, &INCXR, one, Xprev, &INCXC );
445
446
447
448 if( Ais1Col || ( mycol == Alcol ) ) { Xdprev = Xd; Aprev = A; }
449 if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; }
450
451 n1pprev = n1p;
452 rocprev = Alcol;
453 kbprev = kb;
454 k += kb;
455 N -= kb;
456
457 mb1 -= kb;
458 if( mb1 == 0 )
459 {
460 if( !( Ais1Row ) && ( Alrow >= 0 ) )
462 mb1 = ( N > Aimb1 ? Amb : Aimb1 );
463 }
464
465 nb1 -= kb;
466 ChangeRoc = ( nb1 == 0 );
467
468 if( ChangeRoc )
469 {
470 if( !( Ais1Col ) && ( Alcol >= 0 ) )
472 nb1 = ( N > Ainb1 ? Anb : Ainb1 );
473 }
474 tmp1 = N - ( kb =
MIN( mb1, nb1 ) );
475 n1 = ( ( Ais1Col || ( N - nb1 < nlast ) ) ? n1last : n1 );
476 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
477 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
478 nprow );
479 }
480 }
481 else
482 {
483
484
485
486 n1 = (
MAX( npcol, 2 ) - 1 ) * Anb;
488 Aprev = A; Xprev = XC; Xdprev = XR; Anpprev = Anp;
489 mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol;
490 tmp1 = N - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
491 Asrc = Arow;
492 n1p =
PB_Cnumroc(
MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc,
493 nprow );
494 while( kb > 0 )
495 {
496 kbsize = kb * size;
497
498
499
500 if( n1pprev > 0 )
501 {
502 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
503 gemv(
C2F_CHAR( TRANS ), &n1pprev, &kbprev, negone, Aprev,
504 &Ald, Xdprev, &INCXR, one, Xprev, &INCXC );
505
506
507
508 if( !( Ais1Col ) && ChangeRoc )
509 {
510 if( mycol == rocprev )
511 {
512 send( ctxt, n1pprev, 1, Xprev, n1pprev, myrow, Acol );
513 }
514 else if( mycol == Acol )
515 {
516 recv( ctxt, n1pprev, 1, work, n1pprev, myrow, rocprev );
517 axpy( &n1pprev, one, work, &ione, Xprev, &INCXC );
518 }
519 }
520 }
521
522
523
524 if( Ais1Col || ( mycol == Acol ) )
525 {
526 if( AisRowRep || ( myrow == Arow ) )
527 {
529 &kb, A, &Ald, XC, &INCXC );
530 copy( &kb, XC, &INCXC, XR, &INCXR );
531 }
532 if( bcst )
533 {
534 if( myrow == Arow )
535 bsend( ctxt,
COLUMN, &btop, 1, kb, XR, INCXR );
536 else
537 brecv( ctxt,
COLUMN, &btop, 1, kb, XR, INCXR, Arow,
538 mycol );
539 }
540 }
541 else
542 {
543 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) )
544 set( &kb, zero, XC, &INCXC );
545 }
546
547
548
549 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
550 {
551 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
552 {
553 tmp2 = n1pprev * size;
554 gemv(
C2F_CHAR( TRANS ), &tmp1, &kbprev, negone, Aprev+tmp2,
555 &Ald, Xdprev, &INCXR, one, Xprev+tmp2, &INCXC );
556 }
557 Aprev += Ald * kbprev * size;
558 }
559
560
561
562 if( Ais1Col || ( mycol == Acol ) )
563 { A += Ald*kbsize; Xdprev = Xd = XR; XR += INCXR*kbsize; }
564 if( AisRowRep || ( myrow == Arow ) )
565 {
566 Xprev = ( XC += kbsize );
567 A += kbsize;
568 Aprev += kbsize;
569 Anpprev = ( Anp -= kb );
570 }
571 n1pprev = n1p;
572 rocprev = Acol;
573 kbprev = kb;
574 k += kb;
575 N -= kb;
576
577 mb1 -= kb;
578 if( mb1 == 0 )
579 {
580 if( !( Ais1Row ) && ( Arow >= 0 ) )
583 }
584
585 nb1 -= kb;
586 ChangeRoc = ( nb1 == 0 );
587
588 if( ChangeRoc )
589 {
590 if( !( Ais1Col ) && ( Acol >= 0 ) )
593 }
594 tmp1 = N - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
595 n1p =
PB_Cnumroc(
MIN( tmp2, tmp1 ), k+kb, Aimb1, Amb, myrow, Asrc,
596 nprow );
597 }
598 }
599 }
600 else
601 {
602 if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) &&
603 ( mycol != Acol ) ) ) ) return;
605 bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) );
606 AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) );
607
609 {
610
611
612
613 n1 = (
MAX( nprow, 2 ) - 1 ) * Amb;
615 Aprev = A; Xprev = XR; Xdprev = XC; Anpprev = Anq;
616 mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow;
617 tmp1 = N - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
618 Asrc = Acol;
619 n1p =
PB_Cnumroc(
MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc,
620 npcol );
621 while( kb > 0 )
622 {
623 kbsize = kb * size;
624
625
626
627 if( n1pprev > 0 )
628 {
629 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
630 gemv(
C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone, Aprev,
631 &Ald, Xdprev, &INCXC, one, Xprev, &INCXR );
632
633
634
635 if( !( Ais1Row ) && ChangeRoc )
636 {
637 if( myrow == rocprev )
638 {
639 send( ctxt, 1, n1pprev, Xprev, INCXR, Arow, mycol );
640 }
641 else if( myrow == Arow )
642 {
643 recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol );
644 axpy( &n1pprev, one, work, &ione, Xprev, &INCXR );
645 }
646 }
647 }
648
649
650
651 if( Ais1Row || ( myrow == Arow ) )
652 {
653 if( AisColRep || ( mycol == Acol ) )
654 {
656 &kb, A, &Ald, XR, &INCXR );
657 copy( &kb, XR, &INCXR, XC, &INCXC );
658 }
659 if( bcst )
660 {
661 if( mycol == Acol )
662 bsend( ctxt,
ROW, &btop, kb, 1, XC, kb );
663 else
664 brecv( ctxt,
ROW, &btop, kb, 1, XC, kb, myrow, Acol );
665 }
666 }
667 else
668 {
669 if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) )
670 set( &kb, zero, XR, &INCXR );
671 }
672
673
674
675 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
676 {
677 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
678 {
679 tmp2 = n1pprev * size;
680 gemv(
C2F_CHAR( TRANS ), &kbprev, &tmp1, negone,
681 Aprev+Ald*tmp2, &Ald, Xdprev, &INCXC, one,
682 Xprev+INCXR*tmp2, &INCXR );
683 }
684 Aprev += kbprev * size;
685 }
686
687
688
689 if( Ais1Row || ( myrow == Arow ) )
690 { A += kbsize; Xdprev = Xd = XC; XC += kbsize; }
691 if( AisColRep || ( mycol == Acol ) )
692 {
693 Xprev = ( XR += INCXR * kbsize );
694 A += Ald * kbsize;
695 Anpprev = ( Anq -= kb );
696 Aprev += Ald * kbsize;
697 }
698 n1pprev = n1p;
699 rocprev = Arow;
700 kbprev = kb;
701 k += kb;
702 N -= kb;
703
704 nb1 -= kb;
705 if( nb1 == 0 )
706 {
707 if( !( Ais1Col ) && ( Acol >= 0 ) )
710 }
711
712 mb1 -= kb;
713 ChangeRoc = ( mb1 == 0 );
714
715 if( ChangeRoc )
716 {
717 if( !( Ais1Row ) && ( Arow >= 0 ) )
720 }
721 tmp1 = N - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
722 n1p =
PB_Cnumroc(
MIN( tmp2, tmp1 ), k+kb, Ainb1, Anb, mycol, Asrc,
723 npcol );
724 }
725 }
726 else
727 {
728
729
730
731 nlast = ( nprow - 1 ) * Amb;
732 n1 =
MAX( nlast, Amb );
733 nlast += Aimb1;
734 n1last = n1 - Amb +
MAX( Aimb1, Amb );
736 tmp1 = N-1;
737 Alrow =
PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
738 Alcol =
PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
739 rocprev = Alrow; Anpprev = Anq; Xprev = XR; Xdprev = XC;
740 Aprev = A =
Mptr( A, Anp, 0, Ald, size );
743 tmp1 = N - ( kb =
MIN( mb1, nb1 ) );
744 n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 );
745 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
746 Asrc = Acol;
747 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
748 npcol );
749 while( N > 0 )
750 {
751 kbsize = kb * size;
752
753 if( Ais1Row || ( myrow == Alrow ) )
754 {
755 A -= kbsize;
756 Anp -= kb;
757 Xd =
Mptr( XC, Anp, 0, INCXC, size );
758 }
759 if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; }
760
761
762
763 if( n1pprev > 0 )
764 {
765 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
766 {
767 tmp1 = ( Anpprev - n1pprev ) * size;
768 gemv(
C2F_CHAR( TRANS ), &kbprev, &n1pprev, negone,
769 Aprev+Ald*tmp1, &Ald, Xdprev, &INCXC, one,
770 Xprev+INCXR*tmp1, &INCXR );
771 }
772
773
774
775 if( !( Ais1Row ) && ChangeRoc )
776 {
777 if( myrow == rocprev )
778 {
779 send( ctxt, 1, n1pprev,
Mptr( Xprev, 0, Anpprev-n1pprev,
780 INCXR, size ), INCXR, Alrow, mycol );
781 }
782 else if( myrow == Alrow )
783 {
784 recv( ctxt, 1, n1pprev, work, 1, rocprev, mycol );
785 axpy( &n1pprev, one, work, &ione,
Mptr( Xprev, 0,
786 Anpprev-n1pprev, INCXR, size ), &INCXR );
787 }
788 }
789 }
790
791
792
793 if( Ais1Row || ( myrow == Alrow ) )
794 {
795 if( AisColRep || ( mycol == Alcol ) )
796 {
798 &kb,
Mptr( A, 0, Anq, Ald, size ), &Ald,
Mptr( XR, 0,
799 Anq, INCXR, size ), &INCXR );
800 copy( &kb,
Mptr( XR, 0, Anq, INCXR, size ), &INCXR,
Mptr( XC,
801 0, Anp, INCXC, size ), &INCXC );
802 }
803 if( bcst )
804 {
805 if( mycol == Alcol )
806 bsend( ctxt,
ROW, &btop, kb, 1,
Mptr( XC, 0, Anp, INCXC,
807 size ), kb );
808 else
809 brecv( ctxt,
ROW, &btop, kb, 1,
Mptr( XC, 0, Anp, INCXC,
810 size ), kb, myrow, Alcol );
811 }
812 }
813 else
814 {
815 if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) )
816 set( &kb, zero,
Mptr( XR, 0, Anq, INCXR, size ), &INCXR );
817 }
818
819
820
821 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) &&
822 ( ( tmp1 = Anpprev - n1pprev ) > 0 ) )
823 gemv(
C2F_CHAR( TRANS ), &kbprev, &tmp1, negone, Aprev, &Ald,
824 Xdprev, &INCXC, one, Xprev, &INCXR );
825
826
827
828 if( Ais1Row || ( myrow == Alrow ) ) { Xdprev = Xd; Aprev = A; }
829 if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; }
830
831 n1pprev = n1p;
832 rocprev = Alrow;
833 kbprev = kb;
834 k += kb;
835 N -= kb;
836
837 nb1 -= kb;
838 if( nb1 == 0 )
839 {
840 if( !( Ais1Col ) && ( Alcol >= 0 ) )
842 nb1 = ( N > Ainb1 ? Anb : Ainb1 );
843 }
844
845 mb1 -= kb;
846 ChangeRoc = ( mb1 == 0 );
847
848 if( ChangeRoc )
849 {
850 if( !( Ais1Row ) && ( Alrow >= 0 ) )
852 mb1 = ( N > Aimb1 ? Amb : Aimb1 );
853 }
854 tmp1 = N - ( kb =
MIN( mb1, nb1 ) );
855 n1 = ( ( Ais1Row || ( N - mb1 < nlast ) ) ? n1last : n1 );
856 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
857 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
858 npcol );
859 }
860 }
861 }
862 if( work ) free( work );
863
864
865
866}