40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257 char Broc, GemmTa, GemmTb, TranOp, WBroc, WCroc, conjg, * one,
258 * talpha, * tbeta, top, * zero;
259 Int Acol, Aii, Aimb1, Ainb1, Ajj, Alcmb, Ald, Alp, Alp0, Alq,
260 Alq0, Amb, Amp, An, Anb, Anq, Arow, BcurrocR, Bfwd, BiiD,
261 BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR, BnD,
262 BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR,
263 BsrcR, LNorRT, WBfr, WBld, WCfr, WCld, WCpbY, WCsum, ctxt,
264 l, lb, lside, ltmp, mycol, myrow, n, nb, nbb, notran, npcol,
265 nprow, p=0, size, tmp, upper;
268
269
270
272 char * Aptr = NULL, * Bptr = NULL, * WB = NULL, * WC = NULL;
273
274
275
276
277
278
279
281
286 LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
287
289 gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
291
292
293
294 if( lside )
295 {
296 BnD = An = M; BnR = N; Broc =
CCOLUMN;
297 BmyprocD = myrow; BnprocsD = nprow;
298 BmyprocR = mycol; BnprocsR = npcol;
299 BnbD = DESCB[
MB_ ]; BnbR = DESCB[
NB_ ];
301 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
302 &BiiD, &BiiR, &BrocD, &BrocR );
305 }
306 else
307 {
308 BnD = An = N; BnR = M; Broc =
CROW;
309 BmyprocD = mycol; BnprocsD = npcol;
310 BmyprocR = myrow; BnprocsR = nprow;
311 BnbR = DESCB[
MB_ ]; BnbD = DESCB[
NB_ ];
313 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
314 &BiiR, &BiiD, &BrocR, &BrocD );
317 }
318
319
320
321 PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
322 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
323
324 Amp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
325 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
326 if( ( Amp > 0 ) && ( Anq > 0 ) ) Aptr =
Mptr( A, Aii, Ajj, Ald, size );
327
328
329
331 {
334 }
335 else { conjg =
CNOCONJG; talpha = ALPHA; }
336
337
338
339
340
341 if( LNorRT )
342 {
346 }
347 else
348 {
352 }
353
354
355
356
357 Alcmb = 2 * nb *
PB_Clcm( ( Arow >= 0 ? nprow : 1 ),
358 ( Acol >= 0 ? npcol : 1 ) );
359
360
361
362
363 if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd )
364 {
365 tmp =
PB_Cindxg2p( BnR-1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
366 p =
MModSub( tmp, BrocR, BnprocsR );
367 }
368
369
370
371
372 n = BnR;
373
374 while( n > 0 )
375 {
376
377
378
379
380 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, BnprocsR ) );
381 BnpR =
PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
382
383 n -= BnpR;
384
385
386
387
388 if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 );
389
390 while( BnpR )
391 {
392 nbb =
MIN( nbb, BnpR );
393
394
395
396 if( lside )
397 {
398 PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD,
399 BcurrocR, ctxt, Bld );
400 if( BisR || ( BmyprocR == BcurrocR ) )
401 Bptr =
Mptr( B, BiiD, BiiR, Bld, size );
402 }
403 else
404 {
405 PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR,
406 BrocD, ctxt, Bld );
407 if( BisR || ( BmyprocR == BcurrocR ) )
408 Bptr =
Mptr( B, BiiR, BiiD, Bld, size );
409 }
410
411
412
413 PB_CInV(
TYPE,
NOCONJG, &WBroc, An, An, Ad0, nbb, Bptr, 0, 0, DBUFB,
414 &Broc, &WB, WBd, &WBfr );
415
416
417
418
419 PB_CInOutV(
TYPE, &WCroc, An, An, Ad0, nbb, one, Bptr, 0, 0, DBUFB,
420 &Broc, &tbeta, &WC, WCd, &WCfr, &WCsum, &WCpbY );
421
422
423
424 if( notran )
426 Bptr, 0, 0, DBUFB );
427
428
429
430 Aimb1 = Ad0[
IMB_ ]; Ainb1 = Ad0[
INB_ ]; Amb = Ad0[
MB_]; Anb = Ad0[
NB_];
432 Amp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
433 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
434
436
437 if( ( Amp > 0 ) && ( Anq > 0 ) )
438 {
440
441 if( upper )
442 {
443
444
445
446 if( LNorRT )
447 {
448 for( l = 0; l < An; l += Alcmb )
449 {
450 lb = An - l; lb =
MIN( lb, Alcmb );
451 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
452 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
453 if( Alp > 0 )
454 {
455 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
456 npcol );
458 &nbb, &Alq0, talpha,
Mptr( Aptr, 0, Alq, Ald,
459 size ), &Ald,
Mptr( WB, 0, Alq, WBld, size ),
460 &WBld, one, WC, &WCld );
461 }
463 talpha, Aptr, l, l, Ad0,
Mptr( WB, 0, Alq, WBld,
464 size ), WBld,
Mptr( WC, Alp, 0, WCld, size ),
466 }
467 }
468 else
469 {
470 for( l = 0; l < An; l += Alcmb )
471 {
472 lb = An - l; lb =
MIN( lb, Alcmb );
473 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
474 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
475 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol );
476 if( Alq0 > 0 )
478 &Alq0, &Alp, talpha, WB, &WBld,
Mptr( Aptr, 0,
479 Alq, Ald, size ), &Ald, one,
Mptr( WC, 0, Alq,
480 WCld, size ), &WCld );
482 talpha, Aptr, l, l, Ad0,
Mptr( WB, Alp, 0, WBld,
483 size ), WBld,
Mptr( WC, 0, Alq, WCld, size ),
485 }
486 }
487 }
488 else
489 {
490
491
492
493 if( LNorRT )
494 {
495 for( l = 0; l < An; l += Alcmb )
496 {
497 lb = An - l; ltmp = l + ( lb =
MIN( lb, Alcmb ) );
498 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
499 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
501 talpha, Aptr, l, l, Ad0,
Mptr( WB, 0, Alq, WBld,
502 size ), WBld,
Mptr( WC, Alp, 0, WCld, size ),
504 Alp =
PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
505 nprow );
506 Alp0 = Amp - Alp;
507 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
508 npcol );
509 if( Alp0 > 0 )
511 &nbb, &Alq0, talpha,
Mptr( Aptr, Alp, Alq, Ald,
512 size ), &Ald,
Mptr( WB, 0, Alq, WBld, size ),
513 &WBld, one,
Mptr( WC, Alp, 0, WCld, size ),
514 &WCld );
515 }
516 }
517 else
518 {
519 for( l = 0; l < An; l += Alcmb )
520 {
521 lb = An - l; ltmp = l + ( lb =
MIN( lb, Alcmb ) );
522 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
523 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
525 talpha, Aptr, l, l, Ad0,
Mptr( WB, Alp, 0, WBld,
526 size ), WBld,
Mptr( WC, 0, Alq, WCld, size ),
528 Alp =
PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
529 nprow );
530 Alp0 = Amp - Alp;
531 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
532 npcol );
533 if( Alq0 > 0 )
535 &Alq0, &Alp0, talpha,
Mptr( WB, Alp, 0, WBld,
536 size ), &WBld,
Mptr( Aptr, Alp, Alq, Ald, size ),
537 &Ald, one,
Mptr( WC, 0, Alq, WCld, size ),
538 &WCld );
539 }
540 }
541 }
542 }
543 if( WBfr ) free( WB );
544
545 if( LNorRT )
546 {
547
548
549
550 if( WCsum && ( Amp > 0 ) )
551 gsum2d( ctxt,
ROW, &top, Amp, nbb, WC, WCld, myrow, WCd[
CSRC_] );
552
553
554
555 if( WCpbY )
556 PB_Cpaxpby(
TYPE, &conjg, An, nbb, one, WC, 0, 0, WCd, &WCroc,
557 zero, Bptr, 0, 0, DBUFB, &Broc );
558 }
559 else
560 {
561
562
563
564 if( WCsum && ( Anq > 0 ) )
565 gsum2d( ctxt,
COLUMN, &top, nbb, Anq, WC, WCld, WCd[
RSRC_],
566 mycol );
567
568
569
570 if( WCpbY )
571 PB_Cpaxpby(
TYPE, &conjg, nbb, An, one, WC, 0, 0, WCd, &WCroc,
572 zero, Bptr, 0, 0, DBUFB, &Broc );
573 }
574 if( WCfr ) free( WC );
575
576
577
578 BnpR -= nbb;
579
580 if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb;
581 }
582
583
584
585 if( !BisR )
587 }
588
589 if( TranOp ==
CCOTRAN ) free( talpha );
590
591
592
593}