ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CVMpack.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 int PB_CVMpack( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS,
21  char * UNPA, char * TRANS, int MN, int K,
22  char * ALPHA, char * A, int LDA,
23  char * BETA, char * B, int LDB )
24 #else
25 int PB_CVMpack( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
26  LDA, BETA, B, LDB )
27 /*
28 * .. Scalar Arguments ..
29 */
30  int K, LDA, LDB, MN;
31  char * ALPHA, * BETA;
32 /*
33 * .. Array Arguments ..
34 */
35  char * VROCS, * ROCS, * UNPA, * TRANS;
36  PBTYP_T * TYPE;
37  PB_VM_T * VM;
38  char * A, * B;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PB_CVMpack packs a one-dimensional distributed array A into B, or
46 * unpacks an array B into a one-dimensional distributed array A. This
47 * operation is triggered by a virtual distributed array.
48 *
49 * Arguments
50 * =========
51 *
52 * TYPE (local input) pointer to a PBTYP_T structure
53 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
54 * that contains type information (see pblas.h).
55 *
56 * VM (local input) pointer to a VM structure
57 * On entry, VM is a pointer to a structure of type PB_VM_T,
58 * that contains the virtual matrix information (see pblas.h).
59 *
60 * VROCS (local input) pointer to CHAR
61 * On entry, VROCS specifies if the rows or columns of the vir-
62 * tual distributed array grid should be used for the packing or
63 * unpacking operation as follows:
64 * VROCS = 'R' or 'r', the rows should be used,
65 * VROCS = 'C' or 'c', the columns should be used.
66 *
67 * ROCS (local input) pointer to CHAR
68 * On entry, ROCS specifies if rows or columns should be used
69 * packed or unpacked as follows:
70 * ROCS = 'R' or 'r', rows should be (un)packed,
71 * ROCS = 'C' or 'c', columns should be (un)packed.
72 *
73 * UNPA (local input) pointer to CHAR
74 * On entry, UNPA specifies if the data should be packed or un-
75 * packed as follows:
76 * UNPA = 'P' or 'p', packing,
77 * UNPA = 'U' or 'u', unpacking.
78 *
79 * TRANS (local input) pointer to CHAR
80 * On entry, TRANS specifies if conjugation, transposition or
81 * conjugate transposition should occur during the (un)packing
82 * operation as follows:
83 * TRANS = 'N' or 'n', natural (un)packing,
84 * TRANS = 'Z' or 'z', conjugated (un)packing,
85 * TRANS = 'T' or 't', transposed (un)packing,
86 * TRANS = 'C' or 'c', conjugate transposed (un)packing.
87 *
88 * MN (local input) INTEGER
89 * On entry, MN specifies the length of the distributed dimen-
90 * sion to be (un)packed. MN must be at least zero.
91 *
92 * K (local input) INTEGER
93 * On entry, K specifies the length of the non-distributed di-
94 * mension to be (un)packed. K must be at least zero.
95 *
96 * ALPHA (local input) pointer to CHAR
97 * On entry, ALPHA specifies the scalar alpha.
98 *
99 * A (local input/local output) pointer to CHAR
100 * On entry, A points to an array of dimension (LDA, Ka), where
101 * Ka is K when ROCS is 'R' or 'r' and when ROCS is 'C' or 'c',
102 * Ka is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'R' or 'r' and
103 * when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This
104 * array contains unpacked data.
105 *
106 * LDA (local input) INTEGER
107 * On entry, LDA specifies the leading dimension of the array A.
108 * LDA must be at least MAX( 1, K ) when ROCS = 'C' or 'c' and
109 * MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and
110 * VROCS is 'R' or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB )
111 * when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'.
112 *
113 * BETA (local input) pointer to CHAR
114 * On entry, BETA specifies the scalar beta.
115 *
116 * B (local input/local output) pointer to CHAR
117 * On entry, B points to an array of dimension (LDB,*). When
118 * ROCS is 'C' or 'c', and TRANS is 'N', 'n', 'Z' or 'Z', B
119 * points to an K by MN array. When TRANS is 'T', 't', 'C' or
120 * 'c', B points to an MN by K array. When ROCS is 'R' or 'r',
121 * and TRANS is 'T', 't', 'C' or 'c', B points to an K by MN ar-
122 * ray. When TRANS is 'N', 'n', 'Z' or 'z', B points to an MN by
123 * K array. This array contains the packed data.
124 *
125 * LDB (local input) INTEGER
126 * On entry, LDB specifies the leading dimension of the array B.
127 *
128 * -- Written on April 1, 1998 by
129 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
130 *
131 * ---------------------------------------------------------------------
132 */
133 /*
134 * .. Local Scalars ..
135 */
136  int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
137  lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
138  * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
139  nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
140  char * aptrd;
141  MMADD_T add;
142 /* ..
143 * .. Executable Statements ..
144 *
145 */
146  mblks = VM->mblks; nblks = VM->nblks;
147 /*
148 * Quick return if I don't own any blocks.
149 */
150  if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
151 /*
152 * Retrieve the contents of VM structure fields
153 */
154  lcmt00 = VM->lcmt00;
155  imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp;
156  iupp = VM->iupp; nprow = VM->nprow;
157  inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low;
158  ilow = VM->ilow; npcol = VM->npcol;
159 
160  if( Mupcase( UNPA[0] ) == CPACKING )
161  {
162 /*
163 * B is the target packed buffer, A is the distributed source
164 */
165  if( Mupcase( TRANS[0] ) == CNOTRAN )
166  {
167 /*
168 * Add A to B
169 */
170  notran = 1; add = TYPE->Fmmadd;
171  }
172  else if( Mupcase( TRANS[0] ) == CCONJG )
173  {
174 /*
175 * Add the conjugate of A to B
176 */
177  notran = 1; add = TYPE->Fmmcadd;
178  }
179  else if( Mupcase( TRANS[0] ) == CTRAN )
180  {
181 /*
182 * Add the tranpose of A to B
183 */
184  notran = 0; add = TYPE->Fmmtadd;
185  }
186  else
187  {
188 /*
189 * Add the conjugate tranpose of A to B
190 */
191  notran = 0; add = TYPE->Fmmtcadd;
192  }
193  }
194  else
195  {
196 /*
197 * B is the source packed buffer, A is the distributed target
198 */
199  if( Mupcase( TRANS[0] ) == CNOTRAN )
200  {
201 /*
202 * Add B to A
203 */
204  notran = 1; add = TYPE->Fmmdda;
205  }
206  else if( Mupcase( TRANS[0] ) == CCONJG )
207  {
208 /*
209 * Add the conjugate of B to A
210 */
211  notran = 1; add = TYPE->Fmmddac;
212  }
213  else if( Mupcase( TRANS[0] ) == CTRAN )
214  {
215 /*
216 * Add the tranpose of B to A
217 */
218  notran = 0; add = TYPE->Fmmddat;
219  }
220  else
221  {
222 /*
223 * Add the conjugate tranpose of B to A
224 */
225  notran = 0; add = TYPE->Fmmddact;
226  }
227  }
228 
229  size = TYPE->size;
230  rows = ( Mupcase( ROCS[0] ) == CROW );
231 
232  if( Mupcase( VROCS[0] ) == CROW )
233  {
234 /*
235 * (un)packing using rows of virtual matrix
236 */
237  if( rows )
238  {
239 /*
240 * (un)packing rows of mn by k array A.
241 */
242  inca = size;
243  incb = ( notran ? size : LDB * size );
244  m = &tmp2;
245  n = &K;
246  }
247  else
248  {
249 /*
250 * (un)packing columns of k by mn array A
251 */
252  inca = LDA * size;
253  incb = ( notran ? LDB * size : size );
254  m = &K;
255  n = &tmp2;
256  }
257  kb = MN;
258 /*
259 * From the (un)packing point of view the only valuable shortcut is when the
260 * virtual grid and the blocks are square, and the offset is zero or the grid
261 * is 1x1.
262 */
263  if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
264  ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
265  {
266  if( VM->prow == VM->pcol )
267  {
268  npq = ( ( mblks < 2 ) ? imbloc :
269  imbloc + ( mblks - 2 ) * mb + lmbloc );
270  npq = MIN( npq, kb );
271  if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
272  else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
273  }
274  return( npq );
275  }
276  pmb = nprow * mb;
277  qnb = npcol * nb;
278 /*
279 * Handle separately the first row and/or column of the LCM table. Update the
280 * LCM value of the curent block lcmt00, as well as the number of rows and
281 * columns mblks and nblks remaining in the LCM table.
282 */
283  GoSouth = ( lcmt00 > iupp );
284  GoEast = ( lcmt00 < ilow );
285 
286  if( !( GoSouth ) && !( GoEast ) )
287  {
288 /*
289 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
290 */
291  if( lcmt00 >= 0 )
292  {
293  tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
294  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
295  add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
296  }
297  else
298  {
299  tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
300  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
301  add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
302  }
303  if( ( kb -= tmp2 ) == 0 ) return( npq );
304  B += tmp2 * incb;
305 /*
306 * Decide whether one should go south or east in the table: Go east if
307 * the block below the current one only owns lower entries. If this block,
308 * however, owns diagonals, then go south.
309 */
310  GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
311  }
312 
313  if( GoSouth )
314  {
315 /*
316 * Go one step south in the LCM table. Adjust the current LCM value as well as
317 * the pointer to A. The pointer to B remains unchanged.
318 */
319  lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
320 /*
321 * While there are blocks remaining that own upper entries, keep going south.
322 * Adjust the current LCM value as well as the pointer to A accordingly.
323 */
324  while( mblks && ( lcmt00 > upp ) )
325  { lcmt00 -= pmb; mblks--; A += mb * inca; }
326 /*
327 * Return if no more row in the LCM table.
328 */
329  if( mblks <= 0 ) return( npq );
330 /*
331 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
332 * Save the current position in the LCM table. After this column has been
333 * completely taken care of, re-start from this row and the next column of
334 * the LCM table.
335 */
336  lcmt = lcmt00; mblkd = mblks; aptrd = A;
337 
338  while( mblkd && ( lcmt >= ilow ) )
339  {
340 /*
341 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
342 */
343  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
344  if( lcmt >= 0 )
345  {
346  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
347  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
348  add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
349  }
350  else
351  {
352  tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
353  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
354  add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
355  }
356  if( ( kb -= tmp2 ) == 0 ) return( npq );
357 /*
358 * Keep going south until there are no more blocks owning diagonals
359 */
360  lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
361  }
362 /*
363 * I am done with the first column of the LCM table. Go to the next column.
364 */
365  lcmt00 += low - ilow + qnb; nblks--;
366  }
367  else if( GoEast )
368  {
369 /*
370 * Go one step east in the LCM table. Adjust the current LCM value as
371 * well as the pointer to B. The pointer to A remains unchanged.
372 */
373  lcmt00 += low - ilow + qnb; nblks--;
374 /*
375 * While there are blocks remaining that own lower entries, keep going east
376 * in the LCM table. Adjust the current LCM value as well as the pointer to
377 * B accordingly.
378 */
379  while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; }
380 /*
381 * Return if no more column in the LCM table.
382 */
383  if( nblks <= 0 ) return( npq );
384 /*
385 * lcmt00 >= low. The current block owns either diagonals or upper entries. Save
386 * the current position in the LCM table. After this row has been completely
387 * taken care of, re-start from this column and the next row of the LCM table.
388 */
389  lcmt = lcmt00; nblkd = nblks;
390 
391  while( nblkd && ( lcmt <= iupp ) )
392  {
393 /*
394 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
395 */
396  nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
397  if( lcmt >= 0 )
398  {
399  tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
400  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
401  add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, B, &LDB );
402  }
403  else
404  {
405  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
406  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
407  add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
408  }
409  if( ( kb -= tmp2 ) == 0 ) return( npq );
410 /*
411 * Keep going east until there are no more blocks owning diagonals.
412 */
413  lcmt += qnb; nblkd--; B += tmp2 * incb;
414  }
415 /*
416 * I am done with the first row of the LCM table. Go to the next row.
417 */
418  lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
419  }
420 /*
421 * Loop over the remaining columns of the LCM table.
422 */
423  do
424  {
425 /*
426 * If the current block does not have diagonal elements, find the closest one in
427 * the LCM table having some.
428 */
429  if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
430  {
431  while( mblks && nblks )
432  {
433  while( mblks && ( lcmt00 > upp ) )
434  { lcmt00 -= pmb; mblks--; A += mb*inca; }
435  if( lcmt00 >= low ) break;
436  while( nblks && ( lcmt00 < low ) )
437  { lcmt00 += qnb; nblks--; }
438  if( lcmt00 <= upp ) break;
439  }
440  }
441  if( !( mblks ) || !( nblks ) ) return( npq );
442 /*
443 * The current block owns diagonals. Save the current position in the LCM table.
444 * After this column has been completely taken care of, re-start from this row
445 * and the next column in the LCM table.
446 */
447  nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
448  lcmt = lcmt00; mblkd = mblks; aptrd = A;
449 
450  while( mblkd && ( lcmt >= low ) )
451  {
452 /*
453 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
454 */
455  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
456  if( lcmt >= 0 )
457  {
458  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
459  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
460  add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
461  }
462  else
463  {
464  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
465  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
466  add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
467  }
468  if( ( kb -= tmp2 ) == 0 ) return( npq );
469 /*
470 * Keep going south until there are no more blocks owning diagonals
471 */
472  lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
473  }
474 /*
475 * I am done with this column of the LCM table. Go to the next column ...
476 */
477  lcmt00 += qnb; nblks--;
478 /*
479 * ... until there are no more columns.
480 */
481  } while( nblks > 0 );
482 /*
483 * Return the number of diagonals found.
484 */
485  return( npq );
486  }
487  else
488  {
489 /*
490 * (un)packing using columns of virtual matrix
491 */
492  if( rows )
493  {
494 /*
495 * (un)packing rows of mn by k array A
496 */
497  inca = size;
498  incb = ( notran ? size : LDB * size );
499  m = &tmp2;
500  n = &K;
501  }
502  else
503  {
504 /*
505 * (un)packing columns of k by mn array A
506 */
507  inca = LDA * size;
508  incb = ( notran ? LDB * size : size );
509  m = &K;
510  n = &tmp2;
511  }
512  kb = MN;
513 /*
514 * From the (un)packing point of view the only valuable shortcut is when the
515 * virtual grid and the blocks are square, and the offset is zero or the grid
516 * is 1x1.
517 */
518  if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
519  ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
520  {
521  if( VM->prow == VM->pcol )
522  {
523  npq = ( ( nblks < 2 ) ? inbloc :
524  inbloc + ( nblks - 2 ) * nb + lnbloc );
525  npq = MIN( npq, kb );
526  if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
527  else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
528  }
529  return( npq );
530  }
531  pmb = nprow * mb;
532  qnb = npcol * nb;
533 /*
534 * Handle separately the first row and/or column of the LCM table. Update the
535 * LCM value of the curent block lcmt00, as well as the number of rows and
536 * columns mblks and nblks remaining in the LCM table.
537 */
538  GoSouth = ( lcmt00 > iupp );
539  GoEast = ( lcmt00 < ilow );
540 
541  if( !( GoSouth ) && !( GoEast ) )
542  {
543 /*
544 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
545 */
546  if( lcmt00 >= 0 )
547  {
548  tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
549  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
550  add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
551  }
552  else
553  {
554  tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
555  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
556  add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
557  }
558  if( ( kb -= tmp2 ) == 0 ) return( npq );
559  B += tmp2 * incb;
560 /*
561 * Decide whether one should go south or east in the table: Go east if
562 * the block below the current one only owns lower entries. If this block,
563 * however, owns diagonals, then go south.
564 */
565  GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
566  }
567 
568  if( GoSouth )
569  {
570 /*
571 * Go one step south in the LCM table. Adjust the current LCM value as well as
572 * the pointer to B. The pointer to A remains unchanged.
573 */
574  lcmt00 -= iupp - upp + pmb; mblks--;
575 /*
576 * While there are blocks remaining that own upper entries, keep going south.
577 * Adjust the current LCM value as well as the pointer to B accordingly.
578 */
579  while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; }
580 /*
581 * Return if no more row in the LCM table.
582 */
583  if( mblks <= 0 ) return( npq );
584 /*
585 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
586 * Save the current position in the LCM table. After this column has been
587 * completely taken care of, re-start from this row and the next column of
588 * the LCM table.
589 */
590  lcmt = lcmt00; mblkd = mblks;
591 
592  while( mblkd && ( lcmt >= ilow ) )
593  {
594 /*
595 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
596 */
597  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
598  if( lcmt >= 0 )
599  {
600  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
601  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
602  add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
603  }
604  else
605  {
606  tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
607  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
608  add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
609  }
610  if( ( kb -= tmp2 ) == 0 ) return( npq );
611 /*
612 * Keep going south until there are no more blocks owning diagonals
613 */
614  lcmt -= pmb; mblkd--; B += tmp2 * incb;
615  }
616 /*
617 * I am done with the first column of the LCM table. Go to the next column.
618 */
619  lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
620  }
621  else if( GoEast )
622  {
623 /*
624 * Go one step east in the LCM table. Adjust the current LCM value as
625 * well as the pointer to A. The pointer to B remains unchanged.
626 */
627  lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
628 /*
629 * While there are blocks remaining that own lower entries, keep going east
630 * in the LCM table. Adjust the current LCM value as well as the pointer to
631 * A accordingly.
632 */
633  while( nblks && ( lcmt00 < low ) )
634  { lcmt00 += qnb; nblks--; A += nb * inca; }
635 /*
636 * Return if no more column in the LCM table.
637 */
638  if( nblks <= 0 ) return( npq );
639 /*
640 * lcmt00 >= low. The current block owns either diagonals or upper entries. Save
641 * the current position in the LCM table. After this row has been completely
642 * taken care of, re-start from this column and the next row of the LCM table.
643 */
644  lcmt = lcmt00; nblkd = nblks; aptrd = A;
645 
646  while( nblkd && ( lcmt <= iupp ) )
647  {
648 /*
649 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
650 */
651  nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
652  if( lcmt >= 0 )
653  {
654  tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
655  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
656  add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
657  }
658  else
659  {
660  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
661  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
662  add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
663  }
664  if( ( kb -= tmp2 ) == 0 ) return( npq );
665 /*
666 * Keep going east until there are no more blocks owning diagonals.
667 */
668  lcmt += qnb; nblkd--; aptrd += nbloc * inca; B += tmp2 * incb;
669  }
670 /*
671 * I am done with the first row of the LCM table. Go to the next row.
672 */
673  lcmt00 -= iupp - upp + pmb; mblks--;
674  }
675 /*
676 * Loop over the remaining columns of the LCM table.
677 */
678  do
679  {
680 /*
681 * If the current block does not have diagonal elements, find the closest one in
682 * the LCM table having some.
683 */
684  if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
685  {
686  while( mblks && nblks )
687  {
688  while( mblks && ( lcmt00 > upp ) )
689  { lcmt00 -= pmb; mblks--; }
690  if( lcmt00 >= low ) break;
691  while( nblks && ( lcmt00 < low ) )
692  { lcmt00 += qnb; nblks--; A += nb*inca; }
693  if( lcmt00 <= upp ) break;
694  }
695  }
696  if( !( mblks ) || !( nblks ) ) return( npq );
697 /*
698 * The current block owns diagonals. Save the current position in the LCM table.
699 * After this column has been completely taken care of, re-start from this row
700 * and the next column in the LCM table.
701 */
702  nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
703  lcmt = lcmt00; mblkd = mblks;
704 
705  while( mblkd && ( lcmt >= low ) )
706  {
707 /*
708 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
709 */
710  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
711  if( lcmt >= 0 )
712  {
713  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
714  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
715  add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
716  }
717  else
718  {
719  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
720  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
721  add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
722  }
723  if( ( kb -= tmp2 ) == 0 ) return( npq );
724 /*
725 * Keep going south until there are no more blocks owning diagonals
726 */
727  lcmt -= pmb; mblkd--; B += tmp2 * incb;
728  }
729 /*
730 * I am done with this column of the LCM table. Go to the next column ...
731 */
732  lcmt00 += qnb; nblks--; A += nbloc * inca;
733 /*
734 * ... until there are no more columns.
735 */
736  } while( nblks > 0 );
737 /*
738 * Return the number of diagonals found.
739 */
740  return( npq );
741  }
742 /*
743 * End of PB_CVMpack
744 */
745 }
CCONJG
#define CCONJG
Definition: PBblas.h:21
TYPE
#define TYPE
Definition: clamov.c:7
PB_VM_T::imb1
int imb1
Definition: pblas.h:438
PB_VM_T::pcol
int pcol
Definition: pblas.h:456
PB_VM_T::lcmt00
int lcmt00
Definition: pblas.h:435
PB_VM_T::inbloc
int inbloc
Definition: pblas.h:450
PB_VM_T::iupp
int iupp
Definition: pblas.h:443
CPACKING
#define CPACKING
Definition: PBtools.h:50
PB_VM_T::imbloc
int imbloc
Definition: pblas.h:439
PB_VM_T::prow
int prow
Definition: pblas.h:445
PB_VM_T::inb1
int inb1
Definition: pblas.h:449
PB_VM_T::low
int low
Definition: pblas.h:455
PB_VM_T::npcol
int npcol
Definition: pblas.h:457
PB_VM_T::mblks
int mblks
Definition: pblas.h:442
PB_VM_T::lmbloc
int lmbloc
Definition: pblas.h:441
CROW
#define CROW
Definition: PBblacs.h:21
PB_VM_T::upp
int upp
Definition: pblas.h:444
PB_VM_T::nprow
int nprow
Definition: pblas.h:446
MMADD_T
F_VOID_FCT(* MMADD_T)()
Definition: pblas.h:284
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
PB_VM_T::nblks
int nblks
Definition: pblas.h:453
PB_CVMpack
int PB_CVMpack(PBTYP_T *TYPE, PB_VM_T *VM, char *VROCS, char *ROCS, char *UNPA, char *TRANS, int MN, int K, char *ALPHA, char *A, int LDA, char *BETA, char *B, int LDB)
Definition: PB_CVMpack.c:25
PB_VM_T
Definition: pblas.h:432
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
PB_VM_T::lnbloc
int lnbloc
Definition: pblas.h:452
PB_VM_T::ilow
int ilow
Definition: pblas.h:454
MAX
#define MAX(a_, b_)
Definition: PBtools.h:77
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CTRAN
#define CTRAN
Definition: PBblas.h:20
PB_VM_T::mb
int mb
Definition: pblas.h:440
PB_VM_T::nb
int nb
Definition: pblas.h:451