42{
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260 char Broc, GemmTa, GemmTb, TrA, TrB, * one, * talpha, * tbeta,
261 top, * zero;
262 Int Acol, Aii, Aimb1, Ainb1, Ajj, Ald, Am, Amb, Amp, An, Anb,
263 Anq, Arow, Bbufld, BcurrocR, Bfr, Bfwd, BiD, BiR, BiiD, BiiR,
264 BinbD, BinbR, Binb1D, Binb1R, BisR, Bkk, Bld, BmyprocD,
265 BmyprocR, BnbD, BnbR, BnpD, BnpR, BnprocsD, BnprocsR, Boff,
266 BrocD, BrocR, BsrcR, Bsrc_, Cbufld, Ccol, Ccurcol, Cfr, Cfwd,
267 Cii, Cimb, Cimb1, Cinb, Cinb1, CisR, Cjj, Ckk, Cld, Cmb, Cmp,
268 Cnb, Cnq, Coff, Crow, Csrc, WBfr, WCfr, WCsum, ctxt, lcmb,
269 maxp, maxpm1, maxq, mycol, myrow, n, nb, nbb, ncpq, nota,
270 notb, npcol, npq=0, nprow, nrpq, p=0, q=0, size, tmp;
273
274
275
279 char * Aptr = NULL, * Bbuf = NULL, * Cbuf = NULL, * WB = NULL,
280 * WC = NULL;
281
282
283
284
286
291
293 gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
295
296
297
298 if( notb )
299 {
300 BiD = IB; BiR = JB;
302 BinbD = DESCB[
IMB_ ]; BinbR = DESCB[
INB_];
303 BnbD = DESCB[
MB_ ]; BnbR = DESCB[
NB_ ];
304 BsrcR = DESCB[Bsrc_]; Bld = DESCB[
LLD_];
305 BmyprocD = myrow; BnprocsD = nprow;
306 BmyprocR = mycol; BnprocsR = npcol;
307 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
308 &BiiD, &BiiR, &BrocD, &BrocR );
309 }
310 else
311 {
312 BiD = JB; BiR = IB;
314 BinbR = DESCB[
IMB_ ]; BinbD = DESCB[
INB_];
315 BnbR = DESCB[
MB_ ]; BnbD = DESCB[
NB_ ];
316 BsrcR = DESCB[Bsrc_]; Bld = DESCB[
LLD_];
317 BmyprocD = mycol; BnprocsD = npcol;
318 BmyprocR = myrow; BnprocsR = nprow;
319 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
320 &BiiR, &BiiD, &BrocR, &BrocD );
321 }
323 BnpD =
PB_Cnumroc( K, 0, Binb1D, BnbD, BmyprocD, BrocD, BnprocsD );
325
326 Cimb = DESCC[
IMB_ ]; Cinb = DESCC[
INB_];
327 Cmb = DESCC[
MB_ ]; Cnb = DESCC[
NB_ ];
329 PB_Cinfog2l( IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
330 &Crow, &Ccol );
332 Cmp =
PB_Cnumroc( M, 0, Cimb1, Cmb, myrow, Crow, nprow );
334
335
336
337
338
339 if( nota )
340 {
341 Am = M; An = K;
343 talpha = ALPHA; GemmTa =
CNOTRAN; GemmTb = ( notb ?
CTRAN : TrB );
344 }
345 else
346 {
347 Am = K; An = M;
350 {
353 }
354 else
355 {
356 talpha = ALPHA;
358 }
360 }
361
362
363
364 PB_Cdescribe( Am, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
365 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
366
367 Amp =
PB_Cnumroc( Am, 0, Aimb1, Amb, myrow, Arow, nprow );
368 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
369 if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr =
Mptr( A, Aii, Ajj, Ald, size ); }
370
371
372
373
374 if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd )
375 {
376 tmp =
PB_Cindxg2p( N - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
377 q =
MModSub( tmp, BrocR, BnprocsR );
378 }
379
380
381
382
383 if( !( CisR = ( ( Ccol < 0 ) || ( npcol == 1 ) ) ) && !Cfwd )
384 {
385 tmp =
PB_Cindxg2p( N - 1, Cinb1, Cnb, Ccol, Ccol, npcol );
386 p =
MModSub( tmp, Ccol, npcol );
387 }
388
389
390
391
392 lcmb =
PB_Clcm( ( maxp = ( CisR ? 1 : npcol ) ) * Cnb,
393 ( maxq = ( BisR ? 1 : BnprocsR ) ) * BnbR );
394 n = N;
395 maxpm1 = maxp - 1;
396
397 while( n > 0 )
398 {
399
400
401
402 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, q, BnprocsR ) );
403 Bkk =
PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BnprocsR );
404 BnpR =
PB_Cnumroc( N, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
405
406 Ccurcol = ( CisR ? -1 :
MModAdd( Ccol, p, npcol ) );
407 Ckk =
PB_Cg2lrem( JC, Cinb, Cnb, Ccurcol, Csrc, npcol );
408 Cnq =
PB_Cnumroc( N, 0, Cinb1, Cnb, Ccurcol, Ccol, npcol );
409
410 PB_CVMinit( &VM, 0, Cnq, BnpR, Cinb1, Binb1R, Cnb, BnbR, p, q,
411 maxp, maxq, lcmb );
412
413
414
416
417 n -= npq;
418
419
420
421
422 if( npq ) nbb = npq / ( ( npq - 1 ) / nb + 1 );
423
424 while( npq )
425 {
426 nbb =
MIN( nbb, npq );
427
428
429
431
432 if( notb )
433 {
434
435
436
437
438 if( ( Bfr = ( ncpq < nbb ) ) != 0 )
439 {
440
441
442
443
444 Bbufld =
MAX( 1, BnpD );
445 if( BisR || ( BmyprocR == BcurrocR ) )
446 {
449 BnpD, one,
Mptr( B, BiiD, Bkk, Bld, size ), Bld,
450 zero, Bbuf, Bbufld );
451 }
452 }
453 else
454 {
455
456
457
458 Bbufld = Bld;
459 if( BisR || ( BmyprocR == BcurrocR ) )
460 Bbuf =
Mptr( B, BiiD, Bkk+Boff, Bld, size );
461 }
462 PB_Cdescset( DBUFB, K, nbb, Binb1D, nbb, BnbD, nbb, BrocD,
463 BcurrocR, ctxt, Bbufld );
464 }
465 else
466 {
467
468
469
470
471 if( ( Bfr = ( ncpq < nbb ) ) != 0 )
472 {
473
474
475
476
477 Bbufld = nbb;
478 if( BisR || ( BmyprocR == BcurrocR ) )
479 {
482 BnpD, one,
Mptr( B, Bkk, BiiD, Bld, size ), Bld,
483 zero, Bbuf, Bbufld );
484 }
485 }
486 else
487 {
488
489
490
491 Bbufld = Bld;
492 if( BisR || ( BmyprocR == BcurrocR ) )
493 Bbuf =
Mptr( B, Bkk+Boff, BiiD, Bld, size );
494 }
495 PB_Cdescset( DBUFB, nbb, K, nbb, Binb1D, nbb, BnbD, BcurrocR,
496 BrocD, ctxt, Bbufld );
497 }
498
499 if( nota )
500 {
501
502
503
504 PB_CInV(
TYPE,
NOCONJG,
ROW, Am, An, Ad0, nbb, Bbuf, 0, 0,
505 DBUFB, &Broc, &WB, WBd, &WBfr );
506
507
508
509 PB_COutV(
TYPE,
COLUMN,
INIT, Am, An, Ad0, nbb, &WC, WCd, &WCfr,
510 &WCsum );
511
512
513
514 if( Amp > 0 && Anq > 0 )
516 &Anq, talpha, Aptr, &Ald, WB, &WBd[
LLD_], zero,
518 if( WBfr ) free( WB );
519 if( Bfr && ( BisR || ( BmyprocR == BcurrocR ) ) )
520 if( Bbuf ) free( Bbuf );
521
522
523
524 if( WCsum )
525 {
526 WCd[
CSRC_] = Ccurcol;
527 if( Amp > 0 )
528 gsum2d( ctxt,
ROW, &top, Amp, nbb, WC, WCd[
LLD_], myrow,
530 }
531
532
533
534
535 if( ( Cfr = ( nrpq < nbb ) ) != 0 )
536 {
537
538
539
540 Cbufld =
MAX( 1, Cmp ); tbeta = zero;
541 if( CisR || ( mycol == Ccurcol ) )
543 }
544 else
545 {
546
547
548
549 Cbufld = Cld; tbeta = BETA;
550 if( CisR || ( mycol == Ccurcol ) )
551 Cbuf =
Mptr( C, Cii, Ckk+Coff, Cld, size );
552 }
553 PB_Cdescset( DBUFC, M, nbb, Cimb1, nbb, Cmb, nbb, Crow, Ccurcol,
554 ctxt, Cbufld );
555
556
557
558 PB_Cpaxpby(
TYPE,
NOCONJG, M, nbb, one, WC, 0, 0, WCd,
COLUMN,
559 tbeta, Cbuf, 0, 0, DBUFC,
COLUMN );
560
561
562
563 if( Cfr && ( CisR || ( mycol == Ccurcol ) ) )
564 {
566 BETA,
Mptr( C, Cii, Ckk, Cld, size ), Cld, one, Cbuf,
567 Cbufld );
568 if( Cbuf ) free( Cbuf );
569 }
570 if( WCfr ) free( WC );
571 }
572 else
573 {
574
575
576
577 PB_CInV(
TYPE,
NOCONJG,
COLUMN, Am, An, Ad0, nbb, Bbuf, 0, 0,
578 DBUFB, &Broc, &WB, WBd, &WBfr );
579
580
581
582 PB_COutV(
TYPE,
ROW,
INIT, Am, An, Ad0, nbb, &WC, WCd, &WCfr,
583 &WCsum );
584
585
586
587 if( Amp > 0 && Anq > 0 )
589 &Amp, talpha, WB, &WBd[
LLD_], Aptr, &Ald, zero, WC,
591 if( WBfr ) free( WB );
592 if( Bfr && ( BisR || ( BmyprocR == BcurrocR ) ) )
593 if( Bbuf ) free( Bbuf );
594
595
596
597 if( WCsum )
598 {
600 if( Anq > 0 )
601 gsum2d( ctxt,
COLUMN, &top, nbb, Anq, WC, WCd[
LLD_],
603 }
604
605
606
607
608 if( ( Cfr = ( nrpq < nbb ) ) != 0 )
609 {
610
611
612
613 Cbufld =
MAX( 1, Cmp ); tbeta = zero;
614 if( CisR || ( mycol == Ccurcol ) )
616 }
617 else
618 {
619
620
621
622 Cbufld = Cld; tbeta = BETA;
623 if( CisR || ( mycol == Ccurcol ) )
624 Cbuf =
Mptr( C, Cii, Ckk+Coff, Cld, size );
625 }
626 PB_Cdescset( DBUFC, M, nbb, Cimb1, nbb, Cmb, nbb, Crow, Ccurcol,
627 ctxt, Cbufld );
628
629
630
632 one, WC, 0, 0, WCd,
ROW, tbeta, Cbuf, 0, 0, DBUFC,
634
635
636
637 if( Cfr && ( CisR || ( mycol == Ccurcol ) ) )
638 {
640 BETA,
Mptr( C, Cii, Ckk, Cld, size ), Cld, one, Cbuf,
641 Cbufld );
642 if( Cbuf ) free( Cbuf );
643 }
644 if( WCfr ) free( WC );
645 }
646
647
648
650
651 npq -= nbb;
652 }
653
654
655
656 if( ( Cfwd && ( p == maxpm1 ) ) ||
657 ( !( Cfwd ) && ( p == 0 ) ) )
660 }
661
662 if( TrA ==
CCOTRAN ) free( talpha );
663
664
665
666}