40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136 Int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
137 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
138 * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
139 nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
140 char * aptrd;
142
143
144
145
147
148
149
150 if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
151
152
153
159
161 {
162
163
164
166 {
167
168
169
170 notran = 1; add =
TYPE->Fmmadd;
171 }
173 {
174
175
176
177 notran = 1; add =
TYPE->Fmmcadd;
178 }
180 {
181
182
183
184 notran = 0; add =
TYPE->Fmmtadd;
185 }
186 else
187 {
188
189
190
191 notran = 0; add =
TYPE->Fmmtcadd;
192 }
193 }
194 else
195 {
196
197
198
200 {
201
202
203
204 notran = 1; add =
TYPE->Fmmdda;
205 }
207 {
208
209
210
211 notran = 1; add =
TYPE->Fmmddac;
212 }
214 {
215
216
217
218 notran = 0; add =
TYPE->Fmmddat;
219 }
220 else
221 {
222
223
224
225 notran = 0; add =
TYPE->Fmmddact;
226 }
227 }
228
231
233 {
234
235
236
237 if( rows )
238 {
239
240
241
242 inca = size;
243 incb = ( notran ? size : LDB * size );
244 m = &tmp2;
245 n = &K;
246 }
247 else
248 {
249
250
251
252 inca = LDA * size;
253 incb = ( notran ? LDB * size : size );
254 m = &K;
255 n = &tmp2;
256 }
257 kb = MN;
258
259
260
261
262
263 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
264 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
265 {
267 {
268 npq = ( ( mblks < 2 ) ? imbloc :
269 imbloc + ( mblks - 2 ) * mb + lmbloc );
270 npq =
MIN( npq, kb );
271 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
272 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
273 }
274 return( npq );
275 }
276 pmb = nprow * mb;
277 qnb = npcol * nb;
278
279
280
281
282
283 GoSouth = ( lcmt00 > iupp );
284 GoEast = ( lcmt00 < ilow );
285
286 if( !( GoSouth ) && !( GoEast ) )
287 {
288
289
290
291 if( lcmt00 >= 0 )
292 {
293 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
294 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
295 add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
296 }
297 else
298 {
299 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
300 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
301 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
302 }
303 if( ( kb -= tmp2 ) == 0 ) return( npq );
304 B += tmp2 * incb;
305
306
307
308
309
310 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
311 }
312
313 if( GoSouth )
314 {
315
316
317
318
319 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
320
321
322
323
324 while( mblks && ( lcmt00 > upp ) )
325 { lcmt00 -= pmb; mblks--; A += mb * inca; }
326
327
328
329 if( mblks <= 0 ) return( npq );
330
331
332
333
334
335
336 lcmt = lcmt00; mblkd = mblks; aptrd = A;
337
338 while( mblkd && ( lcmt >= ilow ) )
339 {
340
341
342
343 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
344 if( lcmt >= 0 )
345 {
346 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
347 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
348 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
349 }
350 else
351 {
352 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
353 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
354 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
355 }
356 if( ( kb -= tmp2 ) == 0 ) return( npq );
357
358
359
360 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
361 }
362
363
364
365 lcmt00 += low - ilow + qnb; nblks--;
366 }
367 else if( GoEast )
368 {
369
370
371
372
373 lcmt00 += low - ilow + qnb; nblks--;
374
375
376
377
378
379 while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; }
380
381
382
383 if( nblks <= 0 ) return( npq );
384
385
386
387
388
389 lcmt = lcmt00; nblkd = nblks;
390
391 while( nblkd && ( lcmt <= iupp ) )
392 {
393
394
395
396 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
397 if( lcmt >= 0 )
398 {
399 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
400 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
401 add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, B, &LDB );
402 }
403 else
404 {
405 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
406 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
407 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
408 }
409 if( ( kb -= tmp2 ) == 0 ) return( npq );
410
411
412
413 lcmt += qnb; nblkd--; B += tmp2 * incb;
414 }
415
416
417
418 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
419 }
420
421
422
423 do
424 {
425
426
427
428
429 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
430 {
431 while( mblks && nblks )
432 {
433 while( mblks && ( lcmt00 > upp ) )
434 { lcmt00 -= pmb; mblks--; A += mb*inca; }
435 if( lcmt00 >= low ) break;
436 while( nblks && ( lcmt00 < low ) )
437 { lcmt00 += qnb; nblks--; }
438 if( lcmt00 <= upp ) break;
439 }
440 }
441 if( !( mblks ) || !( nblks ) ) return( npq );
442
443
444
445
446
447 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
448 lcmt = lcmt00; mblkd = mblks; aptrd = A;
449
450 while( mblkd && ( lcmt >= low ) )
451 {
452
453
454
455 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
456 if( lcmt >= 0 )
457 {
458 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
459 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
460 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
461 }
462 else
463 {
464 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
465 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
466 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
467 }
468 if( ( kb -= tmp2 ) == 0 ) return( npq );
469
470
471
472 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
473 }
474
475
476
477 lcmt00 += qnb; nblks--;
478
479
480
481 } while( nblks > 0 );
482
483
484
485 return( npq );
486 }
487 else
488 {
489
490
491
492 if( rows )
493 {
494
495
496
497 inca = size;
498 incb = ( notran ? size : LDB * size );
499 m = &tmp2;
500 n = &K;
501 }
502 else
503 {
504
505
506
507 inca = LDA * size;
508 incb = ( notran ? LDB * size : size );
509 m = &K;
510 n = &tmp2;
511 }
512 kb = MN;
513
514
515
516
517
518 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
519 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
520 {
522 {
523 npq = ( ( nblks < 2 ) ? inbloc :
524 inbloc + ( nblks - 2 ) * nb + lnbloc );
525 npq =
MIN( npq, kb );
526 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
527 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
528 }
529 return( npq );
530 }
531 pmb = nprow * mb;
532 qnb = npcol * nb;
533
534
535
536
537
538 GoSouth = ( lcmt00 > iupp );
539 GoEast = ( lcmt00 < ilow );
540
541 if( !( GoSouth ) && !( GoEast ) )
542 {
543
544
545
546 if( lcmt00 >= 0 )
547 {
548 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
549 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
550 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
551 }
552 else
553 {
554 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
555 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
556 add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
557 }
558 if( ( kb -= tmp2 ) == 0 ) return( npq );
559 B += tmp2 * incb;
560
561
562
563
564
565 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
566 }
567
568 if( GoSouth )
569 {
570
571
572
573
574 lcmt00 -= iupp - upp + pmb; mblks--;
575
576
577
578
579 while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; }
580
581
582
583 if( mblks <= 0 ) return( npq );
584
585
586
587
588
589
590 lcmt = lcmt00; mblkd = mblks;
591
592 while( mblkd && ( lcmt >= ilow ) )
593 {
594
595
596
597 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
598 if( lcmt >= 0 )
599 {
600 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
601 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
602 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
603 }
604 else
605 {
606 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
607 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
608 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
609 }
610 if( ( kb -= tmp2 ) == 0 ) return( npq );
611
612
613
614 lcmt -= pmb; mblkd--; B += tmp2 * incb;
615 }
616
617
618
619 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
620 }
621 else if( GoEast )
622 {
623
624
625
626
627 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
628
629
630
631
632
633 while( nblks && ( lcmt00 < low ) )
634 { lcmt00 += qnb; nblks--; A += nb * inca; }
635
636
637
638 if( nblks <= 0 ) return( npq );
639
640
641
642
643
644 lcmt = lcmt00; nblkd = nblks; aptrd = A;
645
646 while( nblkd && ( lcmt <= iupp ) )
647 {
648
649
650
651 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
652 if( lcmt >= 0 )
653 {
654 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
655 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
656 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
657 }
658 else
659 {
660 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
661 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
662 add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
663 }
664 if( ( kb -= tmp2 ) == 0 ) return( npq );
665
666
667
668 lcmt += qnb; nblkd--; aptrd += nbloc * inca; B += tmp2 * incb;
669 }
670
671
672
673 lcmt00 -= iupp - upp + pmb; mblks--;
674 }
675
676
677
678 do
679 {
680
681
682
683
684 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
685 {
686 while( mblks && nblks )
687 {
688 while( mblks && ( lcmt00 > upp ) )
689 { lcmt00 -= pmb; mblks--; }
690 if( lcmt00 >= low ) break;
691 while( nblks && ( lcmt00 < low ) )
692 { lcmt00 += qnb; nblks--; A += nb*inca; }
693 if( lcmt00 <= upp ) break;
694 }
695 }
696 if( !( mblks ) || !( nblks ) ) return( npq );
697
698
699
700
701
702 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
703 lcmt = lcmt00; mblkd = mblks;
704
705 while( mblkd && ( lcmt >= low ) )
706 {
707
708
709
710 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
711 if( lcmt >= 0 )
712 {
713 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
714 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
715 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
716 }
717 else
718 {
719 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
720 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
721 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
722 }
723 if( ( kb -= tmp2 ) == 0 ) return( npq );
724
725
726
727 lcmt -= pmb; mblkd--; B += tmp2 * incb;
728 }
729
730
731
732 lcmt00 += qnb; nblks--; A += nbloc * inca;
733
734
735
736 } while( nblks > 0 );
737
738
739
740 return( npq );
741 }
742
743
744
745}