40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274 char GatherDir, ScatterDir, * one, top, tran, * zero;
275 Int Acol, Acurcol, Acurimb1, Acurinb1, Acurrow, Afr, Aii, Aimb,
276 Aimb1, Ainb, Ainb1, Ajj, Ald, Am, Amb, Amp, Amp0, An, Anb,
277 Anq, Anq0, Arow, Ccsrc, Cimb, Cinb, Cmb, Cnb, Crsrc, WAfr,
278 WCfr, WCsum, conjg, ctxt, fwd, k, kb, kbb, kend, kstart,
279 kstep, ktmp, mycol, myrow, notran, npcol, nprow, size, upper;
282
283
284
286 char * Aptr = NULL, * Aptr0 = NULL, * WA = NULL, * WC = NULL;
287
288
289
290
291
292
293
295
296
297
299
305
307 gsum2d =
TYPE->Cgsum2d; gemm =
TYPE->Fgemm;
308
309
310
312 if( fwd )
313 {
314 kstart = 0; kend = ( ( N - 1 ) / kb + 1 ) * kb; kstep = kb;
316 }
317 else
318 {
319 kstart = ( ( N - 1 ) / kb ) * kb; kend = kstep = -kb;
321 }
322
323
324
325 if( notran ) { Am = N; An = K; } else { Am = K; An = N; }
326 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
327 &Arow, &Acol );
328 Aimb = DESCA[
IMB_]; Ainb = DESCA[
INB_];
329 Amb = DESCA[
MB_ ]; Anb = DESCA[
NB_ ]; Ald = DESCA[
LLD_];
330
332 Amp0 =
PB_Cnumroc( Am, 0, Aimb1, Amb, myrow, Arow, nprow );
334 Anq0 =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
335 if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 =
Mptr( A, Aii, Ajj, Ald, size );
336
337 if( notran )
338 {
340 Cinb = DESCC[
INB_]; Cnb = DESCC[
NB_]; Ccsrc = DESCC[
CSRC_];
341
342 if( upper )
343 {
344 for( k = kstart; k != kend; k += kstep )
345 {
346 kbb = N - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
347
348
349
351 ROW, &Aptr, DBUFA, &Afr );
352
353
354
355 PB_Cdescset( Ad0, ktmp, An, Aimb1, Ainb1, Amb, Anb, Arow, Acol,
356 ctxt, Ald );
357 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, An, Ad0, kbb, Aptr, 0, 0, DBUFA,
358 ROW, &WA, WAd, &WAfr );
359
360
361
362 PB_COutV(
TYPE,
COLUMN,
INIT, ktmp, An, Ad0, kbb, &WC, WCd, &WCfr,
363 &WCsum );
364 Amp =
PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
365 if( ( Amp > 0 ) && ( Anq0 > 0 ) )
367 ALPHA, Aptr0, &Ald, WA, &WAd[
LLD_], zero, WC, &WCd[
LLD_] );
368 if( WAfr ) free( WA );
369 if( Afr ) free( Aptr );
370
371 if( WCsum )
372 {
374 Cnb, Ccsrc, Ccsrc, npcol );
375 if( Amp > 0 )
376 gsum2d( ctxt,
ROW, &top, Amp, kbb, WC, WCd[
LLD_], myrow,
378 }
379
380
381
382 if( conjg )
384 k, 0, WCd );
385 else if( kbb > 1 )
387 k+1, 0, WCd );
388
389
390
392 one, C, IC, JC+k, DESCC,
COLUMN );
393 if( WCfr ) free( WC );
394 }
395 }
396 else
397 {
398 for( k = kstart; k != kend; k += kstep )
399 {
400 ktmp = N - k; kbb =
MIN( ktmp, kb );
401
402
403
405 ROW, &Aptr, DBUFA, &Afr );
406
407
408
410 Acurrow =
PB_Cindxg2p( k, Aimb1, Amb, Arow, Arow, nprow );
411 PB_Cdescset( Ad0, ktmp, An, Acurimb1, Ainb1, Amb, Anb, Acurrow,
412 Acol, ctxt, Ald );
413 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, An, Ad0, kbb, Aptr, 0, 0, DBUFA,
414 ROW, &WA, WAd, &WAfr );
415
416
417
418 PB_COutV(
TYPE,
COLUMN,
INIT, ktmp, An, Ad0, kbb, &WC, WCd, &WCfr,
419 &WCsum );
420 Amp =
PB_Cnumroc( ktmp, k, Aimb1, Amb, myrow, Arow, nprow );
421 if( ( Amp > 0 ) && ( Anq0 > 0 ) )
423 ALPHA,
Mptr( Aptr0, Amp0-Amp, 0, Ald, size ), &Ald, WA,
425 if( WAfr ) free( WA );
426 if( Afr ) free( Aptr );
427
428 if( WCsum )
429 {
431 Cnb, Ccsrc, Ccsrc, npcol );
432 if( Amp > 0 )
433 gsum2d( ctxt,
ROW, &top, Amp, kbb, WC, WCd[
LLD_], myrow,
435 }
436
437
438
439 if( conjg )
441 0, 0, WCd );
442 else if( kbb > 1 )
444 0, 1, WCd );
445
446
447
449 one, C, IC+k, JC+k, DESCC,
COLUMN );
450 if( WCfr ) free( WC );
451 }
452 }
453 }
454 else
455 {
457 Cimb = DESCC[
IMB_]; Cmb = DESCC[
MB_]; Crsrc = DESCC[
RSRC_];
458
459 if( upper )
460 {
461 for( k = kstart; k != kend; k += kstep )
462 {
463 ktmp = N - k; kbb =
MIN( ktmp, kb );
464
465
466
468 COLUMN, &Aptr, DBUFA, &Afr );
469
470
471
473 Acurcol =
PB_Cindxg2p( k, Ainb1, Anb, Acol, Acol, npcol );
474 PB_Cdescset( Ad0, Am, ktmp, Aimb1, Acurinb1, Amb, Anb, Arow,
475 Acurcol, ctxt, Ald );
476 PB_CInV(
TYPE,
NOCONJG,
COLUMN, Am, ktmp, Ad0, kbb, Aptr, 0, 0,
477 DBUFA,
COLUMN, &WA, WAd, &WAfr );
478
479
480
481 PB_COutV(
TYPE,
ROW,
INIT, Am, ktmp, Ad0, kbb, &WC, WCd, &WCfr,
482 &WCsum );
483 Anq =
PB_Cnumroc( ktmp, k, Ainb1, Anb, mycol, Acol, npcol );
484 if( ( Anq > 0 ) && ( Amp0 > 0 ) )
486 ALPHA, WA, &WAd[
LLD_],
Mptr( Aptr0, 0, Anq0-Anq, Ald,
487 size ), &Ald, zero, WC, &WCd[
LLD_] );
488 if( WAfr ) free( WA );
489 if( Afr ) free( Aptr );
490
491 if( WCsum )
492 {
494 Cmb, Crsrc, Crsrc, nprow );
495 if( Anq > 0 )
496 gsum2d( ctxt,
COLUMN, &top, kbb, Anq, WC, WCd[
LLD_],
498 }
499
500
501
502 if( conjg )
504 0, 0, WCd );
505 else if( kbb > 1 )
507 1, 0, WCd );
508
509
510
511 PB_CScatterV(
TYPE, &ScatterDir, kbb, ktmp, WC, 0, 0, WCd,
ROW, one,
512 C, IC+k, JC+k, DESCC,
ROW );
513 if( WCfr ) free( WC );
514 }
515 }
516 else
517 {
518 for( k = kstart; k != kend; k += kstep )
519 {
520 kbb = N - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
521
522
523
525 COLUMN, &Aptr, DBUFA, &Afr );
526
527
528
529 PB_Cdescset( Ad0, Am, ktmp, Aimb1, Ainb1, Amb, Anb, Arow, Acol,
530 ctxt, Ald );
531 PB_CInV(
TYPE,
NOCONJG,
COLUMN, Am, ktmp, Ad0, kbb, Aptr, 0, 0,
532 DBUFA,
COLUMN, &WA, WAd, &WAfr );
533
534
535
536 PB_COutV(
TYPE,
ROW,
INIT, Am, ktmp, Ad0, kbb, &WC, WCd, &WCfr,
537 &WCsum );
538 Anq =
PB_Cnumroc( ktmp, 0, Ainb1, Anb, mycol, Acol, npcol );
539 if( ( Anq > 0 ) && ( Amp0 > 0 ) )
541 ALPHA, WA, &WAd[
LLD_], Aptr0, &Ald, zero, WC, &WCd[
LLD_] );
542 if( WAfr ) free( WA );
543 if( Afr ) free( Aptr );
544
545 if( WCsum )
546 {
548 Cmb, Crsrc, Crsrc, nprow );
549 if( Anq > 0 )
550 gsum2d( ctxt,
COLUMN, &top, kbb, Anq, WC, WCd[
LLD_],
552 }
553
554
555
556 if( conjg )
558 0, k, WCd );
559 else if( kbb > 1 )
561 0, k+1, WCd );
562
563
564
565 PB_CScatterV(
TYPE, &ScatterDir, kbb, ktmp, WC, 0, 0, WCd,
ROW, one,
566 C, IC+k, JC, DESCC,
ROW );
567 if( WCfr ) free( WC );
568 }
569 }
570 }
571
572
573
574}