40{
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139 Int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
140 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
141 * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
142 nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
144 char * aptrd, * bptrd;
145
146
147
148
150
151
152
153 if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
154
155
156
162
164 {
165
166
167
169 {
170
171
172
173 notran = 1;
175 }
177 {
178
179
180
181 notran = 1;
183 }
185 {
186
187
188
189 notran = 0;
191 }
192 else
193 {
194
195
196
197 notran = 0;
198 add =
TYPE->Fmmtcadd;
199 }
200 }
201 else
202 {
203
204
205
207 {
208
209
210
211 notran = 1;
213 }
215 {
216
217
218
219 notran = 1;
221 }
223 {
224
225
226
227 notran = 0;
229 }
230 else
231 {
232
233
234
235 notran = 0;
236 add =
TYPE->Fmmddact;
237 }
238 }
239
242
244 {
245
246
247
248 if( rows )
249 {
250
251
252
253 inca = size;
254 incb = ( notran ? size : LDB * size );
255 m = &tmp2;
256 n = &K;
257 }
258 else
259 {
260
261
262
263 inca = LDA * size;
264 incb = ( notran ? LDB * size : size );
265 m = &K;
266 n = &tmp2;
267 }
268 kb = MN;
269
270
271
272
273
274 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
275 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
276 {
278 {
279 npq = ( ( mblks < 2 ) ? imbloc :
280 imbloc + ( mblks - 2 ) * mb + lmbloc );
281 npq =
MIN( npq, kb );
282 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
283 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
284 }
285 return( npq );
286 }
287 pmb = nprow * mb;
288 qnb = npcol * nb;
289
290
291
292
293
294 GoSouth = ( lcmt00 > iupp );
295 GoEast = ( lcmt00 < ilow );
296
297
298
299 if( !( GoSouth ) && !( GoEast ) )
300 {
301
302
303
304 if( lcmt00 >= 0 )
305 {
306 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
307 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
308 add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
309 }
310 else
311 {
312 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
313 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
314 add( m, n, ALPHA, A, &LDA, BETA, B-lcmt00*incb, &LDB );
315 }
316 if( ( kb -= tmp2 ) == 0 ) return( npq );
317
318
319
320
321
322 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
323 }
324
325 if( GoSouth )
326 {
327
328
329
330
331 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
332
333
334
335
336 while( mblks && ( lcmt00 > upp ) )
337 { lcmt00 -= pmb; mblks--; A += mb * inca; }
338
339
340
341 if( mblks <= 0 ) return( npq );
342
343
344
345
346
347
348 lcmt = lcmt00; mblkd = mblks; aptrd = A;
349
350 while( mblkd && ( lcmt >= ilow ) )
351 {
352
353
354
355 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
356 if( lcmt >= 0 )
357 {
358 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
359 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
360 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
361 }
362 else
363 {
364 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
365 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
366 add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
367 }
368 if( ( kb -= tmp2 ) == 0 ) return( npq );
369
370
371
372 lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
373 }
374
375
376
377 lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
378 }
379 else if( GoEast )
380 {
381
382
383
384
385 lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
386
387
388
389
390
391 while( nblks && ( lcmt00 < low ) )
392 { lcmt00 += qnb; nblks--; B += nb * incb; }
393
394
395
396 if( nblks <= 0 ) return( npq );
397
398
399
400
401
402 lcmt = lcmt00; nblkd = nblks; bptrd = B;
403
404 while( nblkd && ( lcmt <= iupp ) )
405 {
406
407
408
409 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
410 if( lcmt >= 0 )
411 {
412 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
413 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
414 add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, bptrd, &LDB );
415 }
416 else
417 {
418 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
419 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
420 add( m, n, ALPHA, A, &LDA, BETA, bptrd-lcmt*incb, &LDB );
421 }
422 if( ( kb -= tmp2 ) == 0 ) return( npq );
423
424
425
426 lcmt += qnb; nblkd--; bptrd += nbloc * incb;
427 }
428
429
430
431 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
432 }
433
434
435
436 do
437 {
438
439
440
441
442 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
443 {
444 while( mblks && nblks )
445 {
446 while( mblks && ( lcmt00 > upp ) )
447 { lcmt00 -= pmb; mblks--; A += mb * inca; }
448 if( lcmt00 >= low ) break;
449 while( nblks && ( lcmt00 < low ) )
450 { lcmt00 += qnb; nblks--; B += nb * incb; }
451 if( lcmt00 <= upp ) break;
452 }
453 }
454 if( !( mblks ) || !( nblks ) ) return( npq );
455
456
457
458
459
460 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
461 lcmt = lcmt00; mblkd = mblks; aptrd = A;
462
463 while( mblkd && ( lcmt >= low ) )
464 {
465
466
467
468 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
469 if( lcmt >= 0 )
470 {
471 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
472 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
473 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
474 }
475 else
476 {
477 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
478 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
479 add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
480 }
481 if( ( kb -= tmp2 ) == 0 ) return( npq );
482
483
484
485 lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
486 }
487
488
489
490 lcmt00 += qnb; nblks--; B += nbloc * incb;
491
492
493
494 } while( nblks > 0 );
495
496
497
498 return( npq );
499 }
500 else
501 {
502
503
504
505 if( rows )
506 {
507
508
509
510 inca = size;
511 incb = ( notran ? size : LDB * size );
512 m = &tmp2;
513 n = &K;
514 }
515 else
516 {
517
518
519
520 inca = LDA * size;
521 incb = ( notran ? LDB * size : size );
522 m = &K;
523 n = &tmp2;
524 }
525 kb = MN;
526
527
528
529
530
531 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
532 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
533 {
535 {
536 npq = ( ( nblks < 2 ) ? inbloc :
537 inbloc + ( nblks - 2 ) * nb + lnbloc );
538 npq =
MIN( npq, kb );
539 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
540 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
541 }
542 return( npq );
543 }
544 pmb = nprow * mb;
545 qnb = npcol * nb;
546
547
548
549
550
551 GoSouth = ( lcmt00 > iupp );
552 GoEast = ( lcmt00 < ilow );
553
554 if( !( GoSouth ) && !( GoEast ) )
555 {
556
557
558
559 if( lcmt00 >= 0 )
560 {
561 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
562 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
563 add( m, n, ALPHA, A, &LDA, BETA, B+lcmt00*incb, &LDB );
564 }
565 else
566 {
567 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
568 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
569 add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
570 }
571 if( ( kb -= tmp2 ) == 0 ) return( npq );
572
573
574
575
576
577 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
578 }
579
580 if( GoSouth )
581 {
582
583
584
585
586 lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
587
588
589
590
591 while( mblks && ( lcmt00 > upp ) )
592 { lcmt00 -= pmb; mblks--; B += mb * incb; }
593
594
595
596 if( mblks <= 0 ) return( npq );
597
598
599
600
601
602
603 lcmt = lcmt00; mblkd = mblks; bptrd = B;
604
605 while( mblkd && ( lcmt >= ilow ) )
606 {
607
608
609
610 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
611 if( lcmt >= 0 )
612 {
613 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
614 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
615 add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
616 }
617 else
618 {
619 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
620 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
621 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
622 }
623 if( ( kb -= tmp2 ) == 0 ) return( npq );
624
625
626
627 lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
628 }
629
630
631
632 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
633 }
634 else if( GoEast )
635 {
636
637
638
639
640 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
641
642
643
644
645
646 while( nblks && ( lcmt00 < low ) )
647 { lcmt00 += qnb; nblks--; A += nb * inca; }
648
649
650
651 if( nblks <= 0 ) return( npq );
652
653
654
655
656
657 lcmt = lcmt00; nblkd = nblks; aptrd = A;
658
659 while( nblkd && ( lcmt <= iupp ) )
660 {
661
662
663
664 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
665 if( lcmt >= 0 )
666 {
667 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
668 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
669 add( m, n, ALPHA, aptrd, &LDA, BETA, B+lcmt*incb, &LDB );
670 }
671 else
672 {
673 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
674 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
675 add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
676 }
677 if( ( kb -= tmp2 ) == 0 ) return( npq );
678
679
680
681 lcmt += qnb; nblkd--; aptrd += nbloc * inca;
682 }
683
684
685
686 lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
687 }
688
689
690
691 do
692 {
693
694
695
696
697 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
698 {
699 while( mblks && nblks )
700 {
701 while( mblks && ( lcmt00 > upp ) )
702 { lcmt00 -= pmb; mblks--; B += mb * incb; }
703 if( lcmt00 >= low ) break;
704 while( nblks && ( lcmt00 < low ) )
705 { lcmt00 += qnb; nblks--; A += nb * inca; }
706 if( lcmt00 <= upp ) break;
707 }
708 }
709 if( !( mblks ) || !( nblks ) ) return( npq );
710
711
712
713
714
715 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
716 lcmt = lcmt00; mblkd = mblks; bptrd = B;
717
718 while( mblkd && ( lcmt >= low ) )
719 {
720
721
722
723 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
724 if( lcmt >= 0 )
725 {
726 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
727 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
728 add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
729 }
730 else
731 {
732 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
733 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
734 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
735 }
736 if( ( kb -= tmp2 ) == 0 ) return( npq );
737
738
739
740 lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
741 }
742
743
744
745 lcmt00 += qnb; nblks--; A += nbloc * inca;
746
747
748
749 } while( nblks > 0 );
750
751
752
753 return( npq );
754 }
755
756
757
758}