SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CVMloc.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
21 char * VROCS, char * ROCS, char * UNPA, char * TRANS,
22 Int MN, Int K, char * ALPHA, char * A, Int LDA,
23 char * BETA, char * B, Int LDB )
24#else
25Int PB_CVMloc( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
26 LDA, BETA, B, LDB )
27/*
28* .. Scalar Arguments ..
29*/
30 Int K, LDA, LDB, MN;
31 char * ALPHA, * BETA;
32/*
33* .. Array Arguments ..
34*/
35 char * VROCS, * ROCS, * UNPA, * TRANS;
36 PBTYP_T * TYPE;
37 PB_VM_T * VM;
38 char * A, * B;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PB_CVMloc packs a one-dimensional distributed array A into another
46* one-dimensional distributed array B, or unpacks a one-dimensional
47* distributed array B into a one-dimensional distributed array A. This
48* operation is triggered by a virtual distributed array.
49*
50* Arguments
51* =========
52*
53* TYPE (local input) pointer to a PBTYP_T structure
54* On entry, TYPE is a pointer to a structure of type PBTYP_T,
55* that contains type information (See pblas.h).
56*
57* VM (local input) pointer to a PB_VM_T structure
58* On entry, VM is a pointer to a structure of type PB_VM_T,
59* that contains the virtual matrix information (see pblas.h).
60*
61* VROCS (local input) pointer to CHAR
62* On entry, VROCS specifies if the rows or columns of the vir-
63* tual distributed array grid should be used for the packing or
64* unpacking operation as follows:
65* VROCS = 'R' or 'r', the rows should be used,
66* VROCS = 'C' or 'c', the columns should be used.
67*
68* ROCS (local input) pointer to CHAR
69* On entry, ROCS specifies if rows or columns should be packed
70* or unpacked as follows:
71* ROCS = 'R' or 'r', rows should be (un)packed,
72* ROCS = 'C' or 'c', columns should be (un)packed.
73*
74* UNPA (local input) pointer to CHAR
75* On entry, UNPA specifies if the data should be packed or un-
76* packed as follows:
77* UNPA = 'P' or 'p', packing (A into B),
78* UNPA = 'U' or 'u', unpacking (B into A).
79*
80* TRANS (local input) pointer to CHAR
81* On entry, TRANS specifies if conjugation, transposition or
82* conjugate transposition should occur during the (un)packing
83* operation as follows:
84* TRANS = 'N' or 'n', natural (un)packing,
85* TRANS = 'Z' or 'z', conjugated (un)packing,
86* TRANS = 'T' or 'T', transposed (un)packing,
87* TRANS = 'C' or 'c', conjugate transposed (un)packing.
88*
89* MN (local input) INTEGER
90* On entry, MN specifies the number of rows or columns to be
91* (un)packed. MN must be at least zero.
92*
93* K (local input) INTEGER
94* On entry, K specifies the length of the non-distributed di-
95* mension to be (un)packed. K must be at least zero.
96*
97* ALPHA (local input) pointer to CHAR
98* On entry, ALPHA specifies the scalar alpha.
99*
100* A (local input/local output) pointer to CHAR
101* On entry, A points to an array of dimension (LDA, Ka), where
102* Ka is K when ROCS is 'R' or 'r' and when ROCS is 'C' or 'c',
103* Ka is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'R' or 'r' and
104* when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This
105* array contains unpacked data.
106*
107* LDA (local input) INTEGER
108* On entry, LDA specifies the leading dimension of the array A.
109* LDA must be at least MAX( 1, K ) when ROCS = 'C' or 'c' and
110* MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and
111* VROCS is 'R' or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB )
112* when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'.
113*
114* BETA (local input) pointer to CHAR
115* On entry, BETA specifies the scalar beta.
116*
117* B (local input/local output) pointer to CHAR
118* On entry, B points to an array of dimension (LDB, Kb). When
119* TRANS is 'N', 'n', 'Z' or 'z', Kb is K when ROCS is 'R'or
120* 'r', and when ROCS is 'C' or 'c', Kb is IMBLOC+(MBLKS-2)*MB+
121* LMB when VROCS is 'C' or 'c' and when VROCS is 'R', Kb is
122* INBLOC+(NBLKS-2)*NB+LNB. When TRANS is 'T', 't', 'C' or 'c',
123* Kb is K when ROCS is 'C' or 'c' and when ROCS is 'R' or 'r',
124* Kb is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'C' or 'c' and
125* when VROCS is 'R' or 'r', Kb is INBLOC+(NBLKS-2)*NB+LNB. This
126* array contains unpacked data.
127*
128* LDB (local input) INTEGER
129* On entry, LDB specifies the leading dimension of the array B.
130*
131* -- Written on April 1, 1998 by
132* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
133*
134* ---------------------------------------------------------------------
135*/
136/*
137* .. Local Scalars ..
138*/
139 Int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
140 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
141 * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
142 nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
143 MMADD_T add;
144 char * aptrd, * bptrd;
145/* ..
146* .. Executable Statements ..
147*
148*/
149 mblks = VM->mblks; nblks = VM->nblks;
150/*
151* Quick return if I don't own any blocks.
152*/
153 if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
154/*
155* Retrieve the contents of VM structure fields
156*/
157 lcmt00 = VM->lcmt00;
158 imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp;
159 iupp = VM->iupp; nprow = VM->nprow;
160 inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low;
161 ilow = VM->ilow; npcol = VM->npcol;
162
163 if( Mupcase( UNPA[0] ) == CPACKING )
164 {
165/*
166* B is the distributed target, A is the distributed source
167*/
168 if( Mupcase( TRANS[0] ) == CNOTRAN )
169 {
170/*
171* Add A to B
172*/
173 notran = 1;
174 add = TYPE->Fmmadd;
175 }
176 else if( Mupcase( TRANS[0] ) == CCONJG )
177 {
178/*
179* Add the conjugate of A to B
180*/
181 notran = 1;
182 add = TYPE->Fmmcadd;
183 }
184 else if( Mupcase( TRANS[0] ) == CTRAN )
185 {
186/*
187* Add the tranpose of A to B
188*/
189 notran = 0;
190 add = TYPE->Fmmtadd;
191 }
192 else
193 {
194/*
195* Add the conjugate tranpose of A to B
196*/
197 notran = 0;
198 add = TYPE->Fmmtcadd;
199 }
200 }
201 else
202 {
203/*
204* A is the distributed target, B is the distributed source
205*/
206 if( Mupcase( TRANS[0] ) == CNOTRAN )
207 {
208/*
209* Add B to A
210*/
211 notran = 1;
212 add = TYPE->Fmmdda;
213 }
214 else if( Mupcase( TRANS[0] ) == CCONJG )
215 {
216/*
217* Add the conjugate of B to A
218*/
219 notran = 1;
220 add = TYPE->Fmmddac;
221 }
222 else if( Mupcase( TRANS[0] ) == CTRAN )
223 {
224/*
225* Add the tranpose of B to A
226*/
227 notran = 0;
228 add = TYPE->Fmmddat;
229 }
230 else
231 {
232/*
233* Add the conjugate tranpose of B to A
234*/
235 notran = 0;
236 add = TYPE->Fmmddact;
237 }
238 }
239
240 size = TYPE->size;
241 rows = ( Mupcase( ROCS[0] ) == CROW );
242
243 if( Mupcase( VROCS[0] ) == CROW )
244 {
245/*
246* (un)packing using rows of virtual matrix
247*/
248 if( rows )
249 {
250/*
251* (un)packing rows of mn by k array A.
252*/
253 inca = size;
254 incb = ( notran ? size : LDB * size );
255 m = &tmp2;
256 n = &K;
257 }
258 else
259 {
260/*
261* (un)packing columns of k by mn array A
262*/
263 inca = LDA * size;
264 incb = ( notran ? LDB * size : size );
265 m = &K;
266 n = &tmp2;
267 }
268 kb = MN;
269/*
270* From the (un)packing point of view the only valuable shortcut is when the
271* virtual grid and the blocks are square, and the offset is zero or the grid
272* is 1x1.
273*/
274 if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
275 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
276 {
277 if( VM->prow == VM->pcol )
278 {
279 npq = ( ( mblks < 2 ) ? imbloc :
280 imbloc + ( mblks - 2 ) * mb + lmbloc );
281 npq = MIN( npq, kb );
282 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
283 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
284 }
285 return( npq );
286 }
287 pmb = nprow * mb;
288 qnb = npcol * nb;
289/*
290* Handle separately the first row and/or column of the LCM table. Update the
291* LCM value of the curent block lcmt00, as well as the number of rows and
292* columns mblks and nblks remaining in the LCM table.
293*/
294 GoSouth = ( lcmt00 > iupp );
295 GoEast = ( lcmt00 < ilow );
296/*
297* Go through the table looking for blocks owning diagonal entries.
298*/
299 if( !( GoSouth ) && !( GoEast ) )
300 {
301/*
302* The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
303*/
304 if( lcmt00 >= 0 )
305 {
306 tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
307 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
308 add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
309 }
310 else
311 {
312 tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
313 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
314 add( m, n, ALPHA, A, &LDA, BETA, B-lcmt00*incb, &LDB );
315 }
316 if( ( kb -= tmp2 ) == 0 ) return( npq );
317/*
318* Decide whether one should go south or east in the table: Go east if
319* the block below the current one only owns lower entries. If this block,
320* however, owns diagonals, then go south.
321*/
322 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
323 }
324
325 if( GoSouth )
326 {
327/*
328* Go one step south in the LCM table. Adjust the current LCM value as well as
329* the pointer to A. The pointer to B remains unchanged.
330*/
331 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
332/*
333* While there are blocks remaining that own upper entries, keep going south.
334* Adjust the current LCM value as well as the pointer to A accordingly.
335*/
336 while( mblks && ( lcmt00 > upp ) )
337 { lcmt00 -= pmb; mblks--; A += mb * inca; }
338/*
339* Return if no more row in the LCM table.
340*/
341 if( mblks <= 0 ) return( npq );
342/*
343* lcmt00 <= upp. The current block owns either diagonals or lower entries.
344* Save the current position in the LCM table. After this column has been
345* completely taken care of, re-start from this row and the next column of
346* the LCM table.
347*/
348 lcmt = lcmt00; mblkd = mblks; aptrd = A;
349
350 while( mblkd && ( lcmt >= ilow ) )
351 {
352/*
353* A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
354*/
355 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
356 if( lcmt >= 0 )
357 {
358 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
359 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
360 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
361 }
362 else
363 {
364 tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
365 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
366 add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
367 }
368 if( ( kb -= tmp2 ) == 0 ) return( npq );
369/*
370* Keep going south until there are no more blocks owning diagonals
371*/
372 lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
373 }
374/*
375* I am done with the first column of the LCM table. Go to the next column.
376*/
377 lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
378 }
379 else if( GoEast )
380 {
381/*
382* Go one step east in the LCM table. Adjust the current LCM value as
383* well as the pointer to B. The pointer to A remains unchanged.
384*/
385 lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
386/*
387* While there are blocks remaining that own lower entries, keep going east
388* in the LCM table. Adjust the current LCM value as well as the pointer to
389* B accordingly.
390*/
391 while( nblks && ( lcmt00 < low ) )
392 { lcmt00 += qnb; nblks--; B += nb * incb; }
393/*
394* Return if no more column in the LCM table.
395*/
396 if( nblks <= 0 ) return( npq );
397/*
398* lcmt00 >= low. The current block owns either diagonals or upper entries. Save
399* the current position in the LCM table. After this row has been completely
400* taken care of, re-start from this column and the next row of the LCM table.
401*/
402 lcmt = lcmt00; nblkd = nblks; bptrd = B;
403
404 while( nblkd && ( lcmt <= iupp ) )
405 {
406/*
407* A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
408*/
409 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
410 if( lcmt >= 0 )
411 {
412 tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
413 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
414 add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, bptrd, &LDB );
415 }
416 else
417 {
418 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
419 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
420 add( m, n, ALPHA, A, &LDA, BETA, bptrd-lcmt*incb, &LDB );
421 }
422 if( ( kb -= tmp2 ) == 0 ) return( npq );
423/*
424* Keep going east until there are no more blocks owning diagonals.
425*/
426 lcmt += qnb; nblkd--; bptrd += nbloc * incb;
427 }
428/*
429* I am done with the first row of the LCM table. Go to the next row.
430*/
431 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
432 }
433/*
434* Loop over the remaining columns of the LCM table.
435*/
436 do
437 {
438/*
439* If the current block does not have diagonal elements, find the closest one in
440* the LCM table having some.
441*/
442 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
443 {
444 while( mblks && nblks )
445 {
446 while( mblks && ( lcmt00 > upp ) )
447 { lcmt00 -= pmb; mblks--; A += mb * inca; }
448 if( lcmt00 >= low ) break;
449 while( nblks && ( lcmt00 < low ) )
450 { lcmt00 += qnb; nblks--; B += nb * incb; }
451 if( lcmt00 <= upp ) break;
452 }
453 }
454 if( !( mblks ) || !( nblks ) ) return( npq );
455/*
456* The current block owns diagonals. Save the current position in the LCM table.
457* After this column has been completely taken care of, re-start from this row
458* and the next column in the LCM table.
459*/
460 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
461 lcmt = lcmt00; mblkd = mblks; aptrd = A;
462
463 while( mblkd && ( lcmt >= low ) )
464 {
465/*
466* A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
467*/
468 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
469 if( lcmt >= 0 )
470 {
471 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
472 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
473 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
474 }
475 else
476 {
477 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
478 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
479 add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
480 }
481 if( ( kb -= tmp2 ) == 0 ) return( npq );
482/*
483* Keep going south until there are no more blocks owning diagonals
484*/
485 lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
486 }
487/*
488* I am done with this column of the LCM table. Go to the next column ...
489*/
490 lcmt00 += qnb; nblks--; B += nbloc * incb;
491/*
492* ... until there are no more columns.
493*/
494 } while( nblks > 0 );
495/*
496* Return the number of diagonals found.
497*/
498 return( npq );
499 }
500 else
501 {
502/*
503* (un)packing using columns of virtual matrix
504*/
505 if( rows )
506 {
507/*
508* (un)packing rows of mn by k array A
509*/
510 inca = size;
511 incb = ( notran ? size : LDB * size );
512 m = &tmp2;
513 n = &K;
514 }
515 else
516 {
517/*
518* (un)packing columns of k by mn array A
519*/
520 inca = LDA * size;
521 incb = ( notran ? LDB * size : size );
522 m = &K;
523 n = &tmp2;
524 }
525 kb = MN;
526/*
527* From the (un)packing point of view the only valuable shortcut is when the
528* virtual grid and the blocks are square, and the offset is zero or the grid
529* is 1x1.
530*/
531 if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
532 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
533 {
534 if( VM->prow == VM->pcol )
535 {
536 npq = ( ( nblks < 2 ) ? inbloc :
537 inbloc + ( nblks - 2 ) * nb + lnbloc );
538 npq = MIN( npq, kb );
539 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
540 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
541 }
542 return( npq );
543 }
544 pmb = nprow * mb;
545 qnb = npcol * nb;
546/*
547* Handle separately the first row and/or column of the LCM table. Update the
548* LCM value of the curent block lcmt00, as well as the number of rows and
549* columns mblks and nblks remaining in the LCM table.
550*/
551 GoSouth = ( lcmt00 > iupp );
552 GoEast = ( lcmt00 < ilow );
553
554 if( !( GoSouth ) && !( GoEast ) )
555 {
556/*
557* The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
558*/
559 if( lcmt00 >= 0 )
560 {
561 tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
562 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
563 add( m, n, ALPHA, A, &LDA, BETA, B+lcmt00*incb, &LDB );
564 }
565 else
566 {
567 tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
568 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
569 add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
570 }
571 if( ( kb -= tmp2 ) == 0 ) return( npq );
572/*
573* Decide whether one should go south or east in the table: Go east if
574* the block below the current one only owns lower entries. If this block,
575* however, owns diagonals, then go south.
576*/
577 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
578 }
579
580 if( GoSouth )
581 {
582/*
583* Go one step south in the LCM table. Adjust the current LCM value as well as
584* the pointer to B. The pointer to A remains unchanged.
585*/
586 lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
587/*
588* While there are blocks remaining that own upper entries, keep going south.
589* Adjust the current LCM value as well as the pointer to B accordingly.
590*/
591 while( mblks && ( lcmt00 > upp ) )
592 { lcmt00 -= pmb; mblks--; B += mb * incb; }
593/*
594* Return if no more row in the LCM table.
595*/
596 if( mblks <= 0 ) return( npq );
597/*
598* lcmt00 <= upp. The current block owns either diagonals or lower entries.
599* Save the current position in the LCM table. After this column has been
600* completely taken care of, re-start from this row and the next column of
601* the LCM table.
602*/
603 lcmt = lcmt00; mblkd = mblks; bptrd = B;
604
605 while( mblkd && ( lcmt >= ilow ) )
606 {
607/*
608* A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
609*/
610 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
611 if( lcmt >= 0 )
612 {
613 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
614 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
615 add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
616 }
617 else
618 {
619 tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
620 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
621 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
622 }
623 if( ( kb -= tmp2 ) == 0 ) return( npq );
624/*
625* Keep going south until there are no more blocks owning diagonals
626*/
627 lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
628 }
629/*
630* I am done with the first column of the LCM table. Go to the next column.
631*/
632 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
633 }
634 else if( GoEast )
635 {
636/*
637* Go one step east in the LCM table. Adjust the current LCM value as
638* well as the pointer to A. The pointer to B remains unchanged.
639*/
640 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
641/*
642* While there are blocks remaining that own lower entries, keep going east
643* in the LCM table. Adjust the current LCM value as well as the pointer to
644* A accordingly.
645*/
646 while( nblks && ( lcmt00 < low ) )
647 { lcmt00 += qnb; nblks--; A += nb * inca; }
648/*
649* Return if no more column in the LCM table.
650*/
651 if( nblks <= 0 ) return( npq );
652/*
653* lcmt00 >= low. The current block owns either diagonals or upper entries. Save
654* the current position in the LCM table. After this row has been completely
655* taken care of, re-start from this column and the next row of the LCM table.
656*/
657 lcmt = lcmt00; nblkd = nblks; aptrd = A;
658
659 while( nblkd && ( lcmt <= iupp ) )
660 {
661/*
662* A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
663*/
664 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
665 if( lcmt >= 0 )
666 {
667 tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
668 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
669 add( m, n, ALPHA, aptrd, &LDA, BETA, B+lcmt*incb, &LDB );
670 }
671 else
672 {
673 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
674 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
675 add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
676 }
677 if( ( kb -= tmp2 ) == 0 ) return( npq );
678/*
679* Keep going east until there are no more blocks owning diagonals.
680*/
681 lcmt += qnb; nblkd--; aptrd += nbloc * inca;
682 }
683/*
684* I am done with the first row of the LCM table. Go to the next row.
685*/
686 lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
687 }
688/*
689* Loop over the remaining columns of the LCM table.
690*/
691 do
692 {
693/*
694* If the current block does not have diagonal elements, find the closest one in
695* the LCM table having some.
696*/
697 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
698 {
699 while( mblks && nblks )
700 {
701 while( mblks && ( lcmt00 > upp ) )
702 { lcmt00 -= pmb; mblks--; B += mb * incb; }
703 if( lcmt00 >= low ) break;
704 while( nblks && ( lcmt00 < low ) )
705 { lcmt00 += qnb; nblks--; A += nb * inca; }
706 if( lcmt00 <= upp ) break;
707 }
708 }
709 if( !( mblks ) || !( nblks ) ) return( npq );
710/*
711* The current block owns diagonals. Save the current position in the LCM table.
712* After this column has been completely taken care of, re-start from this row
713* and the next column in the LCM table.
714*/
715 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
716 lcmt = lcmt00; mblkd = mblks; bptrd = B;
717
718 while( mblkd && ( lcmt >= low ) )
719 {
720/*
721* A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
722*/
723 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
724 if( lcmt >= 0 )
725 {
726 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
727 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
728 add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
729 }
730 else
731 {
732 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
733 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
734 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
735 }
736 if( ( kb -= tmp2 ) == 0 ) return( npq );
737/*
738* Keep going south until there are no more blocks owning diagonals
739*/
740 lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
741 }
742/*
743* I am done with this column of the LCM table. Go to the next column ...
744*/
745 lcmt00 += qnb; nblks--; A += nbloc * inca;
746/*
747* ... until there are no more columns.
748*/
749 } while( nblks > 0 );
750/*
751* Return the number of diagonals found.
752*/
753 return( npq );
754 }
755/*
756* End of PB_CVMloc
757*/
758}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define CROW
Definition PBblacs.h:21
#define CCONJG
Definition PBblas.h:21
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define MAX(a_, b_)
Definition PBtools.h:77
#define MIN(a_, b_)
Definition PBtools.h:76
#define CPACKING
Definition PBtools.h:50
#define Mupcase(C)
Definition PBtools.h:83
Int PB_CVMloc()
#define TYPE
Definition clamov.c:7
Int lnbloc
Definition pblas.h:456
Int low
Definition pblas.h:459
Int iupp
Definition pblas.h:447
Int npcol
Definition pblas.h:461
Int nprow
Definition pblas.h:450
Int lcmt00
Definition pblas.h:439
Int pcol
Definition pblas.h:460
Int inbloc
Definition pblas.h:454
Int ilow
Definition pblas.h:458
Int inb1
Definition pblas.h:453
Int nblks
Definition pblas.h:457
Int prow
Definition pblas.h:449
Int imbloc
Definition pblas.h:443
Int lmbloc
Definition pblas.h:445
Int nb
Definition pblas.h:455
Int imb1
Definition pblas.h:442
Int mblks
Definition pblas.h:446
Int mb
Definition pblas.h:444
Int upp
Definition pblas.h:448