39{
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204 char Aroc, Croc, * one, * talpha, * tbeta, * zero;
205 Int ACnD, ACnR, Abufld, AcurrocR, Afr, AiD, AiR, AiiD, AiiR,
206 AinbD, AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AmyprocD,
207 AmyprocR, AnbD, AnbR, AnpD, AnpR, AnprocsD, AnprocsR, Aoff,
208 ArocD, ArocR, AsrcR, Cbufld, CcurrocR, Cfr, CiD, CiR, CiiD,
209 CiiR, CinbD, CinbR, Cinb1D, Cinb1R, CisR, Ckk, Cld, CmyprocD,
210 CmyprocR, CnbD, CnbR, CnpD, CnpR, CnprocsD, CnprocsR, Coff,
211 CrocD, CrocR, CsrcR, ctxt, col2row, gcdPQ, k, kb, kbb, l,
212 lcmPQ, lcmb, maxp, maxq, mycol, myrow, ncpq, npcol, npq,
213 nprow, nrpq, p, q, size;
215
216
217
219 char * Abuf = NULL, * Cbuf = NULL;
220
221
222
223
224
225
226
228
229
230
231
232 col2row = ( ( M <= N ) || ( nprow == 1 ) || ( DESCA[
RSRC_] == -1 ) );
233
234 if( col2row )
235 {
236 AinbR = DESCA[
INB_]; AnbR = DESCA[
NB_]; AsrcR = DESCA[
CSRC_];
237 CinbR = DESCC[
IMB_]; CnbR = DESCC[
MB_]; CsrcR = DESCC[
RSRC_];
238
239
240
241
242 if( !(
PB_Cspan( M, JA, AinbR, AnbR, AsrcR, npcol ) ) &&
243 !(
PB_Cspan( M, IC, CinbR, CnbR, CsrcR, nprow ) ) )
244 {
245 PB_Cpaxpby(
TYPE, CONJUG, N, M, ALPHA, A, IA, JA, DESCA,
COLUMN, BETA,
246 C, IC, JC, DESCC,
ROW );
247 return;
248 }
249
250
251
252 ACnR = M; ACnD = N;
253 AmyprocD = CmyprocR = myrow; AnprocsD = CnprocsR = nprow;
254 AmyprocR = CmyprocD = mycol; CnprocsD = AnprocsR = npcol;
255 AiD = IA; AiR = JA; Aroc =
CCOLUMN;
256 AinbD = DESCA[
IMB_]; AnbD = DESCA[
MB_]; Ald = DESCA[
LLD_];
257 PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR,
258 &AiiD, &AiiR, &ArocD, &ArocR );
259 CiD = JC; CiR = IC; Croc =
CROW;
260 CinbD = DESCC[
INB_]; CnbD = DESCC[
NB_]; Cld = DESCC[
LLD_];
261 PB_Cinfog2l( IC, JC, DESCC, CnprocsR, CnprocsD, CmyprocR, CmyprocD,
262 &CiiR, &CiiD, &CrocR, &CrocD );
263 }
264 else
265 {
266 AinbR = DESCA[
IMB_]; AnbR = DESCA[
MB_]; AsrcR = DESCA[
RSRC_];
267 CinbR = DESCC[
INB_]; CnbR = DESCC[
NB_]; CsrcR = DESCC[
CSRC_];
268
269
270
271
272 if( !(
PB_Cspan( N, IA, AinbR, AnbR, AsrcR, nprow ) ) &&
273 !(
PB_Cspan( N, JC, CinbR, CnbR, CsrcR, npcol ) ) )
274 {
275 PB_Cpaxpby(
TYPE, CONJUG, N, M, ALPHA, A, IA, JA, DESCA,
ROW, BETA, C,
277 return;
278 }
279
280
281
282 ACnD = M; ACnR = N;
283 AmyprocR = CmyprocD = myrow; AnprocsR = CnprocsD = nprow;
284 AmyprocD = CmyprocR = mycol; AnprocsD = CnprocsR = npcol;
285
286 AiD = JA; AiR = IA; Aroc =
CROW;
287 AinbD = DESCA[
INB_]; AnbD = DESCA[
NB_]; Ald = DESCA[
LLD_];
288 PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD,
289 &AiiR, &AiiD, &ArocR, &ArocD );
290 CiD = IC; CiR = JC; Croc =
CCOLUMN;
291 CinbD = DESCC[
IMB_]; CnbD = DESCC[
MB_]; Cld = DESCC[
LLD_];
292 PB_Cinfog2l( IC, JC, DESCC, CnprocsD, CnprocsR, CmyprocD, CmyprocR,
293 &CiiD, &CiiR, &CrocD, &CrocR );
294 }
295
298
300 AnpD =
PB_Cnumroc( ACnD, 0, Ainb1D, AnbD, AmyprocD, ArocD, AnprocsD );
302 AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) );
303
305 CnpD =
PB_Cnumroc( ACnD, 0, Cinb1D, CnbD, CmyprocD, CrocD, CnprocsD );
307 CisR = ( ( CsrcR < 0 ) || ( CnprocsR == 1 ) );
308
309 lcmb =
PB_Clcm( ( maxp = ( CisR ? 1 : CnprocsR ) ) * CnbR,
310 ( maxq = ( AisR ? 1 : AnprocsR ) ) * AnbR );
312 lcmPQ = ( maxp / gcdPQ ) * maxq;
313
314
315
316 for( k = 0; k < gcdPQ; k++ )
317 {
318 p = 0; q = k;
319
320 for( l = 0; l < lcmPQ; l++ )
321 {
322 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, q, AnprocsR ) );
323 CcurrocR = ( CisR ? -1 :
MModAdd( CrocR, p, CnprocsR ) );
324
325 if( ( AisR || ( AmyprocR == AcurrocR ) ) ||
326 ( CisR || ( CmyprocR == CcurrocR ) ) )
327 {
328 Ckk = CiiR; Akk = AiiR;
329
330
331
332 CnpR =
PB_Cnumroc( ACnR, 0, Cinb1R, CnbR, CcurrocR, CrocR,
333 CnprocsR );
334 AnpR =
PB_Cnumroc( ACnR, 0, Ainb1R, AnbR, AcurrocR, ArocR,
335 AnprocsR );
336 PB_CVMinit( &VM, 0, CnpR, AnpR, Cinb1R, Ainb1R, CnbR, AnbR, p, q,
337 maxp, maxq, lcmb );
338
339
340
342
343
344
345
346 if( npq ) kbb = npq / ( ( npq - 1 ) / kb + 1 );
347
348 if( col2row )
349 {
350 while( npq )
351 {
352 kbb =
MIN( kbb, npq );
353
354
355
357
358
359
360
361 if( ( Afr = ( ncpq < kbb ) ) != 0 )
362 {
363
364
365
366
367 Abufld =
MAX( 1, AnpD );
368 if( AisR || ( AmyprocR == AcurrocR ) )
369 {
372 kbb, AnpD, one,
Mptr( A, AiiD, Akk, Ald,
373 size ), Ald, zero, Abuf, Abufld );
374 }
375 }
376 else
377 {
378
379
380
381 Abufld = Ald;
382 if( AisR || ( AmyprocR == AcurrocR ) )
383 Abuf =
Mptr( A, AiiD, Akk+Aoff, Ald, size );
384 }
385 PB_Cdescset( DBUFA, ACnD, kbb, Ainb1D, kbb, AnbD, kbb, ArocD,
386 AcurrocR, ctxt, Abufld );
387
388
389
390
391 if( ( Cfr = ( nrpq < kbb ) ) != 0 )
392 {
393
394
395
396 Cbufld = kbb; talpha = one; tbeta = zero;
397 if( CisR || ( CmyprocR == CcurrocR ) )
399 }
400 else
401 {
402
403
404
405 Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
406 if( CisR || ( CmyprocR == CcurrocR ) )
407 Cbuf =
Mptr( C, Ckk+Coff, CiiD, Cld, size );
408 }
409 PB_Cdescset( DBUFC, kbb, ACnD, kbb, Cinb1D, kbb, CnbD,
410 CcurrocR, CrocD, ctxt, Cbufld );
411
412
413
415 DBUFA, &Aroc, tbeta, Cbuf, 0, 0, DBUFC, &Croc );
416
417
418
419 if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) )
420 if( Abuf ) free( Abuf );
421
422
423
424 if( Cfr && ( CisR || ( CmyprocR == CcurrocR ) ) )
425 {
427 kbb, CnpD, BETA,
Mptr( C, Ckk, CiiD, Cld,
428 size ), Cld, ALPHA, Cbuf, Cbufld );
429 if( Cbuf ) free( Cbuf );
430 }
431
432
433
435 npq -= kbb;
436 }
437 }
438 else
439 {
440 while( npq )
441 {
442 kbb =
MIN( kbb, npq );
443
444
445
447
448
449
450
451 if( ( Afr = ( ncpq < kbb ) ) != 0 )
452 {
453
454
455
456
457 Abufld = kbb;
458 if( AisR || ( AmyprocR == AcurrocR ) )
459 {
462 kbb, AnpD, one,
Mptr( A, Akk, AiiD, Ald,
463 size ), Ald, zero, Abuf, Abufld );
464 }
465 }
466 else
467 {
468
469
470
471 Abufld = Ald;
472 if( AisR || ( AmyprocR == AcurrocR ) )
473 Abuf =
Mptr( A, Akk+Aoff, AiiD, Ald, size );
474 }
475 PB_Cdescset( DBUFA, kbb, ACnD, kbb, Ainb1D, kbb, AnbD,
476 AcurrocR, ArocD, ctxt, Abufld );
477
478
479
480
481 if( ( Cfr = ( nrpq < kbb ) ) != 0 )
482 {
483
484
485
486 Cbufld =
MAX( 1, CnpD ); talpha = one; tbeta = zero;
487 if( CisR || ( CmyprocR == CcurrocR ) )
489 }
490 else
491 {
492 Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
493 if( CisR || ( CmyprocR == CcurrocR ) )
494 Cbuf =
Mptr( C, CiiD, Ckk+Coff, Cld, size );
495 }
496 PB_Cdescset( DBUFC, ACnD, kbb, Cinb1D, kbb, CnbD, kbb, CrocD,
497 CcurrocR, ctxt, Cbufld );
498
499
500
502 DBUFA, &Aroc, tbeta, Cbuf, 0, 0, DBUFC, &Croc );
503
504
505
506 if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) )
507 if( Abuf ) free( Abuf );
508
509
510
511 if( Cfr && ( CisR || ( CmyprocR == CcurrocR ) ) )
512 {
514 kbb, CnpD, BETA,
Mptr( C, CiiD, Ckk, Cld,
515 size ), Cld, ALPHA, Cbuf, Cbufld );
516 if( Cbuf ) free( Cbuf );
517 }
518
519
520
522 npq -= kbb;
523 }
524 }
525 }
528 }
529 }
530
531
532
533}