40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216 char ACroc, * one, * talpha, * tbeta, * zero;
217 Int ACmyprocD, ACmyprocR, ACnD, ACnR, ACnprocsD, ACnprocsR,
218 Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR, AinbD,
219 AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AnbD, AnbR, AnpD,
220 AnpR, Aoff, ArocD, ArocR, AsrcR, Cbufld, CcurrocR, Cfr,
221 Cfwd, CiD, CiR, CiiD, CiiR, CinbD, CinbR, Cinb1D, Cinb1R,
222 CisR, Ckk, Cld, CnbD, CnbR, CnpD, CnpR, Coff, CrocD, CrocR,
223 CsrcR, ctxt, k, kb, kbb, lcmb, maxp, maxpm1, maxpq, maxq,
224 mycol, myrow, npcol, npq, nprow, ncpq, nrpq, p=0, q=0,
225 row2row, size, tmp;
227
228
229
231 char * Abuf = NULL, * Cbuf = NULL;
232
233
234
235
236
237
238
240
241
242
243
244 row2row = ( ( M <= N ) || ( npcol == 1 ) || ( DESCA[
CSRC_] == -1 ) );
245
246 if( row2row )
247 {
248 AinbR = DESCA[
IMB_]; AnbR = DESCA[
MB_]; AsrcR = DESCA[
RSRC_];
249 CinbR = DESCC[
IMB_]; CnbR = DESCC[
MB_]; CsrcR = DESCC[
RSRC_];
250
251
252
253
254 if( !(
PB_Cspan( M, IA, AinbR, AnbR, AsrcR, nprow ) ) &&
255 !(
PB_Cspan( M, IC, CinbR, CnbR, CsrcR, nprow ) ) )
256 {
257 PB_Cpaxpby(
TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA,
ROW, BETA,
258 C, IC, JC, DESCC,
ROW );
259 return;
260 }
261
262
263
264 ACnR = M; ACnD = N;
265 ACmyprocR = myrow; ACnprocsR = nprow;
266 ACmyprocD = mycol; ACnprocsD = npcol; ACroc =
CROW;
267 AiR = IA; AiD = JA;
268 AinbD = DESCA[
INB_]; AnbD = DESCA[
NB_]; Ald = DESCA[
LLD_];
269 PB_Cinfog2l( IA, JA, DESCA, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD,
270 &AiiR, &AiiD, &ArocR, &ArocD );
271 CiR = IC; CiD = JC;
272 CinbD = DESCC[
INB_]; CnbD = DESCC[
NB_]; Cld = DESCC[
LLD_];
273 PB_Cinfog2l( IC, JC, DESCC, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD,
274 &CiiR, &CiiD, &CrocR, &CrocD );
275 }
276 else
277 {
278 AinbR = DESCA[
INB_]; AnbR = DESCA[
NB_]; AsrcR = DESCA[
CSRC_];
279 CinbR = DESCC[
INB_]; CnbR = DESCC[
NB_]; CsrcR = DESCC[
CSRC_];
280
281
282
283
284 if( !(
PB_Cspan( N, JA, AinbR, AnbR, AsrcR, npcol ) ) &&
285 !(
PB_Cspan( N, JC, CinbR, CnbR, CsrcR, npcol ) ) )
286 {
287 PB_Cpaxpby(
TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA,
COLUMN, BETA,
288 C, IC, JC, DESCC,
COLUMN );
289 return;
290 }
291
292
293
294 ACnR = N; ACnD = M;
295 ACmyprocR = mycol; ACnprocsR = npcol;
296 ACmyprocD = myrow; ACnprocsD = nprow; ACroc =
CCOLUMN;
297 AiR = JA; AiD = IA;
298 AinbD = DESCA[
IMB_]; AnbD = DESCA[
MB_]; Ald = DESCA[
LLD_];
299 PB_Cinfog2l( IA, JA, DESCA, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR,
300 &AiiD, &AiiR, &ArocD, &ArocR );
301 CiR = JC; CiD = IC;
302 CinbD = DESCC[
IMB_]; CnbD = DESCC[
MB_]; Cld = DESCC[
LLD_];
303 PB_Cinfog2l( IC, JC, DESCC, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR,
304 &CiiD, &CiiR, &CrocD, &CrocR );
305 }
306
309
311 AnpD =
PB_Cnumroc( ACnD, 0, Ainb1D, AnbD, ACmyprocD, ArocD, ACnprocsD );
313 AisR = ( ( AsrcR < 0 ) || ( ACnprocsR == 1 ) );
314
316 CnpD =
PB_Cnumroc( ACnD, 0, Cinb1D, CnbD, ACmyprocD, CrocD, ACnprocsD );
318 CisR = ( ( CsrcR < 0 ) || ( ACnprocsR == 1 ) );
319
320 lcmb =
PB_Clcm( ( maxp = ( CisR ? 1 : ACnprocsR ) ) * CnbR,
321 ( maxq = ( AisR ? 1 : ACnprocsR ) ) * AnbR );
322
325
326
327
328
329 if( !( AisR ) && !( Afwd ) )
330 {
331 tmp =
PB_Cindxg2p( ACnR-1, Ainb1R, AnbR, ArocR, ArocR, ACnprocsR );
332 q =
MModSub( tmp, ArocR, ACnprocsR );
333 }
334
335
336
337
338 if( !( CisR ) && !( Cfwd ) )
339 {
340 tmp =
PB_Cindxg2p( ACnR-1, Cinb1R, CnbR, CrocR, CrocR, ACnprocsR );
341 p =
MModSub( tmp, CrocR, ACnprocsR );
342 }
343
344
345
346 maxpm1 = maxp - 1; maxpq = maxp * maxq;
347
348 for( k = 0; k < maxpq; k++ )
349 {
350 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, q, ACnprocsR ) );
351 CcurrocR = ( CisR ? -1 :
MModAdd( CrocR, p, ACnprocsR ) );
352
353 if( ( AisR || ( ACmyprocR == AcurrocR ) ) ||
354 ( CisR || ( ACmyprocR == CcurrocR ) ) )
355 {
356 Ckk = CiiR; Akk = AiiR;
357
358
359
360 AnpR =
PB_Cnumroc( ACnR, 0, Ainb1R, AnbR, AcurrocR, ArocR, ACnprocsR );
361 CnpR =
PB_Cnumroc( ACnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, ACnprocsR );
362 PB_CVMinit( &VM, 0, CnpR, AnpR, Cinb1R, Ainb1R, CnbR, AnbR, p, q,
363 maxp, maxq, lcmb );
364
365
366
368
369
370
371
372 if( npq ) kbb = npq / ( ( npq - 1 ) / kb + 1 );
373
374 if( row2row )
375 {
376 while( npq )
377 {
378 kbb =
MIN( kbb, npq );
379
380
381
383
384
385
386
387 if( ( Afr = ( ncpq < kbb ) ) != 0 )
388 {
389
390
391
392
393 Abufld = kbb;
394 if( AisR || ( ACmyprocR == AcurrocR ) )
395 {
398 kbb, AnpD, one,
Mptr( A, Akk, AiiD, Ald,
399 size ), Ald, zero, Abuf, Abufld );
400 }
401 }
402 else
403 {
404
405
406
407 Abufld = Ald;
408 if( AisR || ( ACmyprocR == AcurrocR ) )
409 Abuf =
Mptr( A, Akk+Aoff, AiiD, Ald, size );
410 }
411 PB_Cdescset( DBUFA, kbb, ACnD, kbb, Ainb1D, kbb, AnbD, AcurrocR,
412 ArocD, ctxt, Abufld );
413
414
415
416
417 if( ( Cfr = ( nrpq < kbb ) ) != 0 )
418 {
419
420
421
422 Cbufld = kbb; talpha = one; tbeta = zero;
423 if( CisR || ( ACmyprocR == CcurrocR ) )
425 }
426 else
427 {
428
429
430
431 Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
432 if( CisR || ( ACmyprocR == CcurrocR ) )
433 Cbuf =
Mptr( C, Ckk+Coff, CiiD, Cld, size );
434 }
435 PB_Cdescset( DBUFC, kbb, ACnD, kbb, Cinb1D, kbb, CnbD, CcurrocR,
436 CrocD, ctxt, Cbufld );
437
438
439
441 &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc );
442
443
444
445 if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) )
446 if( Abuf ) free( Abuf );
447
448
449
450 if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) )
451 {
453 CnpD, BETA,
Mptr( C, Ckk, CiiD, Cld, size ), Cld,
454 ALPHA, Cbuf, Cbufld );
455 if( Cbuf ) free( Cbuf );
456 }
457
458
459
461 npq -= kbb;
462 }
463 }
464 else
465 {
466 while( npq )
467 {
468 kbb =
MIN( kbb, npq );
469
470
471
473
474
475
476
477 if( ( Afr = ( ncpq < kbb ) ) != 0 )
478 {
479
480
481
482
483 Abufld =
MAX( 1, AnpD );
484 if( AisR || ( ACmyprocR == AcurrocR ) )
485 {
488 kbb, AnpD, one,
Mptr( A, AiiD, Akk, Ald,
489 size ), Ald, zero, Abuf, Abufld );
490 }
491 }
492 else
493 {
494
495
496
497 Abufld = Ald;
498 if( AisR || ( ACmyprocR == AcurrocR ) )
499 Abuf =
Mptr( A, AiiD, Akk+Aoff, Ald, size );
500 }
501 PB_Cdescset( DBUFA, ACnD, kbb, Ainb1D, kbb, AnbD, kbb, ArocD,
502 AcurrocR, ctxt, Abufld );
503
504
505
506
507 if( ( Cfr = ( nrpq < kbb ) ) != 0 )
508 {
509
510
511
512 Cbufld =
MAX( 1, CnpD ); talpha = one; tbeta = zero;
513 if( CisR || ( ACmyprocR == CcurrocR ) )
515 }
516 else
517 {
518 Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
519 if( CisR || ( ACmyprocR == CcurrocR ) )
520 Cbuf =
Mptr( C, CiiD, Ckk+Coff, Cld, size );
521 }
522 PB_Cdescset( DBUFC, ACnD, kbb, Cinb1D, kbb, CnbD, kbb, CrocD,
523 CcurrocR, ctxt, Cbufld );
524
525
526
528 &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc );
529
530
531
532 if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) )
533 if( Abuf ) free( Abuf );
534
535
536
537 if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) )
538 {
540 CnpD, BETA,
Mptr( C, CiiD, Ckk, Cld, size ), Cld,
541 ALPHA, Cbuf, Cbufld );
542 if( Cbuf ) free( Cbuf );
543 }
544
545
546
548 npq -= kbb;
549 }
550 }
551 }
552
553
554
555 if( ( Cfwd && ( p == maxpm1 ) ) || ( !( Cfwd ) && ( p == 0 ) ) )
558 }
559
560
561
562}