40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274 char * one;
275 Int AcurrocR, Afwd, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D,
276 Ainb1R, AisR, Ald, AmyprocD, AmyprocR, AnbD, AnbR, AnpR,
277 AnprocsD, AnprocsR, ArocD, ArocR, Arocs, AsrcR, Ccol, Cii,
278 Cimb1, Cinb1, Cjj, Clcmb, Cld, Clp, Clq, Cnq0, Cmb, Cmp,
279 Cmp0, Cnb, Cnq, Crow, WACfr, WACld, WACsum, WARfr, WARld,
280 WARsum, Wkbb=0, ctxt, k, kb, kbb, l, lb, ltmp, maxp, mycol,
281 myrow, notran, npcol, nprow, p=0, size, tmp, upper;
284
285
286
288 char * Aptr = NULL, * Cptr = NULL, * WAC = NULL, * WAR = NULL;
289
290
291
292
293
294
295
297
298
299
301
304
305
306
307 PB_Cdescribe( N, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
308 &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 );
309
310 Cmp =
PB_Cnumroc( N, 0, Cimb1, Cmb, myrow, Crow, nprow );
311 Cnq =
PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol );
312
313 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
314 {
315 Cptr =
Mptr( C, Cii, Cjj, Cld, size );
318
319
320
321
322 Clcmb = 2 * kb *
PB_Clcm( ( Crow >= 0 ? nprow : 1 ),
323 ( Ccol >= 0 ? npcol : 1 ) );
324 }
325
326
327
329 {
330 AiR = JA; AnprocsR = npcol; AinbR = DESCA[
INB_]; AnbR = DESCA[
NB_];
331 AsrcR = DESCA[
CSRC_];
332 }
333 else
334 {
335 AiR = IA; AnprocsR = nprow; AinbR = DESCA[
IMB_]; AnbR = DESCA[
MB_];
336 AsrcR = DESCA[
RSRC_];
337 }
338
339
340
341
342 if( !(
PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, AnprocsR ) ) )
343 {
344
345
346
347 if( notran )
348 {
349 PB_CInV(
TYPE,
NOCONJG,
COLUMN, N, N, Cd0, K, A, IA, JA, DESCA,
350 COLUMN, &WAC, WACd0, &WACfr );
351 PB_CInV(
TYPE, CONJUG,
ROW, N, N, Cd0, K, WAC, 0, 0, WACd0,
352 COLUMN, &WAR, WARd0, &WARfr );
353 }
354 else
355 {
356 PB_CInV(
TYPE,
NOCONJG,
ROW, N, N, Cd0, K, A, IA, JA, DESCA,
357 ROW, &WAR, WARd0, &WARfr );
358 PB_CInV(
TYPE, CONJUG,
COLUMN, N, N, Cd0, K, WAR, 0, 0, WARd0,
359 ROW, &WAC, WACd0, &WACfr );
360 }
361
362
363
364 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
365 {
366 WACld = WACd0[
LLD_]; WARld = WARd0[
LLD_];
367
369 {
370 for( l = 0; l < N; l += Clcmb )
371 {
372 lb = N - l; lb =
MIN( lb, Clcmb );
373 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
374 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
375 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
376 if( Clp > 0 && Cnq0 > 0 )
378 ALPHA, WAC, &WACld,
Mptr( WAR, 0, Clq, WARld, size ),
379 &WARld, one,
Mptr( Cptr, 0, Clq, Cld, size ), &Cld );
381 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ), WARld,
382 Cptr, l, l, Cd0, tzsyrk );
383 }
384 }
385 else
386 {
387 for( l = 0; l < N; l += Clcmb )
388 {
389 lb = N - l; ltmp = l + ( lb =
MIN( lb, Clcmb ) );
390 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
391 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
393 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ), WARld,
394 Cptr, l, l, Cd0, tzsyrk );
395 Clp =
PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
396 Cmp0 = Cmp - Clp;
397 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
398 if( Cmp0 > 0 && Cnq0 > 0 )
400 &K, ALPHA,
Mptr( WAC, Clp, 0, WACld, size ), &WACld,
401 Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
402 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
403 }
404 }
405 }
406
407 if( WACfr ) free( WAC );
408 if( WARfr ) free( WAR );
409
410 return;
411 }
412
413
414
417
418 if( notran )
419 {
420 AiD = IA; AinbD = DESCA[
IMB_]; AnbD = DESCA[
MB_]; Ald = DESCA[
LLD_];
421 AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow;
422 PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR,
423 &AiiD, &AiiR, &ArocD, &ArocR );
424 }
425 else
426 {
427 AiD = JA; AinbD = DESCA[
INB_]; AnbD = DESCA[
NB_]; Ald = DESCA[
LLD_];
428 AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol;
429 PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD,
430 &AiiR, &AiiD, &ArocR, &ArocD );
431 }
434 AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) );
435
436
437
438
439 if( !( AisR ) && !( Afwd ) )
440 {
441 tmp =
PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR );
442 p =
MModSub( tmp, ArocR, AnprocsR );
443 }
444
445
446
447 PB_COutV(
TYPE,
COLUMN,
NOINIT, N, N, Cd0, kb, &WAC, WACd0, &WACfr,
448 &WACsum );
449 PB_COutV(
TYPE,
ROW,
NOINIT, N, N, Cd0, kb, &WAR, WARd0, &WARfr,
450 &WARsum );
451
452
453
454 maxp = ( AisR ? 1 : AnprocsR );
455 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, p, AnprocsR ) );
456 AnpR =
PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR );
457
458 for( k = 0; k < K; k += kb )
459 {
460 kbb = K - k; kbb =
MIN( kbb, kb );
461
462 while( Wkbb != kbb )
463 {
464
465
466
467
468 while( AnpR == 0 )
469 {
471 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, p, AnprocsR ) );
472 AnpR =
PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR,
473 AnprocsR );
474 }
475
476
477
478
479 if( Wkbb == 0 ) { Arocs = ( AnpR < kbb ? AnpR : kbb ); }
480 else { Arocs = kbb - Wkbb; Arocs =
MIN( Arocs, AnpR ); }
481
482
483
484
485 if( notran )
486 {
487 if( AisR || ( AmyprocR == AcurrocR ) )
488 { Aptr =
Mptr( A, AiiD, AiiR, Ald, size ); AiiR += Arocs; }
489 PB_Cdescset( DBUFA, N, Arocs, Ainb1D, Arocs, AnbD, Arocs,
490 ArocD, AcurrocR, ctxt, Ald );
491
492
493
494 PB_CInV2(
TYPE,
NOCONJG,
COLUMN, N, N, Cd0, Arocs, Aptr, 0, 0,
495 DBUFA,
COLUMN, WAC, Wkbb, WACd0 );
496 }
497 else
498 {
499 if( AisR || ( AmyprocR == AcurrocR ) )
500 { Aptr =
Mptr( A, AiiR, AiiD, Ald, size ); AiiR += Arocs; }
501 PB_Cdescset( DBUFA, Arocs, N, Arocs, Ainb1D, Arocs, AnbD,
502 AcurrocR, ArocD, ctxt, Ald );
503
504
505
506 PB_CInV2(
TYPE,
NOCONJG,
ROW, N, N, Cd0, Arocs, Aptr, 0, 0,
507 DBUFA,
ROW, WAR, Wkbb, WARd0 );
508 }
509
510
511
512
513
514 AnpR -= Arocs;
515 Wkbb += Arocs;
516 }
517
518 if( notran )
519 {
520
521
522
523 PB_CInV2(
TYPE, CONJUG,
ROW, N, N, Cd0, kbb, WAC, 0, 0, WACd0,
525 }
526 else
527 {
528
529
530
531 PB_CInV2(
TYPE, CONJUG,
COLUMN, N, N, Cd0, kbb, WAR, 0, 0, WARd0,
532 ROW, WAC, 0, WACd0 );
533 }
534
535
536
537 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
538 {
539 WACld = WACd0[
LLD_]; WARld = WARd0[
LLD_];
540
541 if( upper )
542 {
543 for( l = 0; l < N; l += Clcmb )
544 {
545 lb = N - l; lb =
MIN( lb, Clcmb );
546 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
547 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
548 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
549 if( Clp > 0 && Cnq0 > 0 )
551 &kbb, ALPHA, WAC, &WACld,
Mptr( WAR, 0, Clq, WARld,
552 size ), &WARld, one,
Mptr( Cptr, 0, Clq, Cld, size ),
553 &Cld );
555 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ), WARld,
556 Cptr, l, l, Cd0, tzsyrk );
557 }
558 }
559 else
560 {
561 for( l = 0; l < N; l += Clcmb )
562 {
563 lb = N - l; ltmp = l + ( lb =
MIN( lb, Clcmb ) );
564 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
565 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
567 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ), WARld,
568 Cptr, l, l, Cd0, tzsyrk );
569 Clp =
PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
570 Cmp0 = Cmp - Clp;
571 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
572 if( Cmp0 > 0 && Cnq0 > 0 )
574 &kbb, ALPHA,
Mptr( WAC, Clp, 0, WACld, size ), &WACld,
575 Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
576 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
577 }
578 }
579 }
580
581 Wkbb = 0;
582 }
583
584 if( WACfr ) free( WAC );
585 if( WARfr ) free( WAR );
586
587
588
589}