ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CVMloc.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 int PB_CVMloc( PBTYP_T * TYPE, PB_VM_T * VM,
21  char * VROCS, char * ROCS, char * UNPA, char * TRANS,
22  int MN, int K, char * ALPHA, char * A, int LDA,
23  char * BETA, char * B, int LDB )
24 #else
25 int PB_CVMloc( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
26  LDA, BETA, B, LDB )
27 /*
28 * .. Scalar Arguments ..
29 */
30  int K, LDA, LDB, MN;
31  char * ALPHA, * BETA;
32 /*
33 * .. Array Arguments ..
34 */
35  char * VROCS, * ROCS, * UNPA, * TRANS;
36  PBTYP_T * TYPE;
37  PB_VM_T * VM;
38  char * A, * B;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PB_CVMloc packs a one-dimensional distributed array A into another
46 * one-dimensional distributed array B, or unpacks a one-dimensional
47 * distributed array B into a one-dimensional distributed array A. This
48 * operation is triggered by a virtual distributed array.
49 *
50 * Arguments
51 * =========
52 *
53 * TYPE (local input) pointer to a PBTYP_T structure
54 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
55 * that contains type information (See pblas.h).
56 *
57 * VM (local input) pointer to a PB_VM_T structure
58 * On entry, VM is a pointer to a structure of type PB_VM_T,
59 * that contains the virtual matrix information (see pblas.h).
60 *
61 * VROCS (local input) pointer to CHAR
62 * On entry, VROCS specifies if the rows or columns of the vir-
63 * tual distributed array grid should be used for the packing or
64 * unpacking operation as follows:
65 * VROCS = 'R' or 'r', the rows should be used,
66 * VROCS = 'C' or 'c', the columns should be used.
67 *
68 * ROCS (local input) pointer to CHAR
69 * On entry, ROCS specifies if rows or columns should be packed
70 * or unpacked as follows:
71 * ROCS = 'R' or 'r', rows should be (un)packed,
72 * ROCS = 'C' or 'c', columns should be (un)packed.
73 *
74 * UNPA (local input) pointer to CHAR
75 * On entry, UNPA specifies if the data should be packed or un-
76 * packed as follows:
77 * UNPA = 'P' or 'p', packing (A into B),
78 * UNPA = 'U' or 'u', unpacking (B into A).
79 *
80 * TRANS (local input) pointer to CHAR
81 * On entry, TRANS specifies if conjugation, transposition or
82 * conjugate transposition should occur during the (un)packing
83 * operation as follows:
84 * TRANS = 'N' or 'n', natural (un)packing,
85 * TRANS = 'Z' or 'z', conjugated (un)packing,
86 * TRANS = 'T' or 'T', transposed (un)packing,
87 * TRANS = 'C' or 'c', conjugate transposed (un)packing.
88 *
89 * MN (local input) INTEGER
90 * On entry, MN specifies the number of rows or columns to be
91 * (un)packed. MN must be at least zero.
92 *
93 * K (local input) INTEGER
94 * On entry, K specifies the length of the non-distributed di-
95 * mension to be (un)packed. K must be at least zero.
96 *
97 * ALPHA (local input) pointer to CHAR
98 * On entry, ALPHA specifies the scalar alpha.
99 *
100 * A (local input/local output) pointer to CHAR
101 * On entry, A points to an array of dimension (LDA, Ka), where
102 * Ka is K when ROCS is 'R' or 'r' and when ROCS is 'C' or 'c',
103 * Ka is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'R' or 'r' and
104 * when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This
105 * array contains unpacked data.
106 *
107 * LDA (local input) INTEGER
108 * On entry, LDA specifies the leading dimension of the array A.
109 * LDA must be at least MAX( 1, K ) when ROCS = 'C' or 'c' and
110 * MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and
111 * VROCS is 'R' or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB )
112 * when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'.
113 *
114 * BETA (local input) pointer to CHAR
115 * On entry, BETA specifies the scalar beta.
116 *
117 * B (local input/local output) pointer to CHAR
118 * On entry, B points to an array of dimension (LDB, Kb). When
119 * TRANS is 'N', 'n', 'Z' or 'z', Kb is K when ROCS is 'R'or
120 * 'r', and when ROCS is 'C' or 'c', Kb is IMBLOC+(MBLKS-2)*MB+
121 * LMB when VROCS is 'C' or 'c' and when VROCS is 'R', Kb is
122 * INBLOC+(NBLKS-2)*NB+LNB. When TRANS is 'T', 't', 'C' or 'c',
123 * Kb is K when ROCS is 'C' or 'c' and when ROCS is 'R' or 'r',
124 * Kb is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'C' or 'c' and
125 * when VROCS is 'R' or 'r', Kb is INBLOC+(NBLKS-2)*NB+LNB. This
126 * array contains unpacked data.
127 *
128 * LDB (local input) INTEGER
129 * On entry, LDB specifies the leading dimension of the array B.
130 *
131 * -- Written on April 1, 1998 by
132 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
133 *
134 * ---------------------------------------------------------------------
135 */
136 /*
137 * .. Local Scalars ..
138 */
139  int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
140  lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
141  * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
142  nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
143  MMADD_T add;
144  char * aptrd, * bptrd;
145 /* ..
146 * .. Executable Statements ..
147 *
148 */
149  mblks = VM->mblks; nblks = VM->nblks;
150 /*
151 * Quick return if I don't own any blocks.
152 */
153  if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
154 /*
155 * Retrieve the contents of VM structure fields
156 */
157  lcmt00 = VM->lcmt00;
158  imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp;
159  iupp = VM->iupp; nprow = VM->nprow;
160  inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low;
161  ilow = VM->ilow; npcol = VM->npcol;
162 
163  if( Mupcase( UNPA[0] ) == CPACKING )
164  {
165 /*
166 * B is the distributed target, A is the distributed source
167 */
168  if( Mupcase( TRANS[0] ) == CNOTRAN )
169  {
170 /*
171 * Add A to B
172 */
173  notran = 1;
174  add = TYPE->Fmmadd;
175  }
176  else if( Mupcase( TRANS[0] ) == CCONJG )
177  {
178 /*
179 * Add the conjugate of A to B
180 */
181  notran = 1;
182  add = TYPE->Fmmcadd;
183  }
184  else if( Mupcase( TRANS[0] ) == CTRAN )
185  {
186 /*
187 * Add the tranpose of A to B
188 */
189  notran = 0;
190  add = TYPE->Fmmtadd;
191  }
192  else
193  {
194 /*
195 * Add the conjugate tranpose of A to B
196 */
197  notran = 0;
198  add = TYPE->Fmmtcadd;
199  }
200  }
201  else
202  {
203 /*
204 * A is the distributed target, B is the distributed source
205 */
206  if( Mupcase( TRANS[0] ) == CNOTRAN )
207  {
208 /*
209 * Add B to A
210 */
211  notran = 1;
212  add = TYPE->Fmmdda;
213  }
214  else if( Mupcase( TRANS[0] ) == CCONJG )
215  {
216 /*
217 * Add the conjugate of B to A
218 */
219  notran = 1;
220  add = TYPE->Fmmddac;
221  }
222  else if( Mupcase( TRANS[0] ) == CTRAN )
223  {
224 /*
225 * Add the tranpose of B to A
226 */
227  notran = 0;
228  add = TYPE->Fmmddat;
229  }
230  else
231  {
232 /*
233 * Add the conjugate tranpose of B to A
234 */
235  notran = 0;
236  add = TYPE->Fmmddact;
237  }
238  }
239 
240  size = TYPE->size;
241  rows = ( Mupcase( ROCS[0] ) == CROW );
242 
243  if( Mupcase( VROCS[0] ) == CROW )
244  {
245 /*
246 * (un)packing using rows of virtual matrix
247 */
248  if( rows )
249  {
250 /*
251 * (un)packing rows of mn by k array A.
252 */
253  inca = size;
254  incb = ( notran ? size : LDB * size );
255  m = &tmp2;
256  n = &K;
257  }
258  else
259  {
260 /*
261 * (un)packing columns of k by mn array A
262 */
263  inca = LDA * size;
264  incb = ( notran ? LDB * size : size );
265  m = &K;
266  n = &tmp2;
267  }
268  kb = MN;
269 /*
270 * From the (un)packing point of view the only valuable shortcut is when the
271 * virtual grid and the blocks are square, and the offset is zero or the grid
272 * is 1x1.
273 */
274  if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
275  ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
276  {
277  if( VM->prow == VM->pcol )
278  {
279  npq = ( ( mblks < 2 ) ? imbloc :
280  imbloc + ( mblks - 2 ) * mb + lmbloc );
281  npq = MIN( npq, kb );
282  if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
283  else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
284  }
285  return( npq );
286  }
287  pmb = nprow * mb;
288  qnb = npcol * nb;
289 /*
290 * Handle separately the first row and/or column of the LCM table. Update the
291 * LCM value of the curent block lcmt00, as well as the number of rows and
292 * columns mblks and nblks remaining in the LCM table.
293 */
294  GoSouth = ( lcmt00 > iupp );
295  GoEast = ( lcmt00 < ilow );
296 /*
297 * Go through the table looking for blocks owning diagonal entries.
298 */
299  if( !( GoSouth ) && !( GoEast ) )
300  {
301 /*
302 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
303 */
304  if( lcmt00 >= 0 )
305  {
306  tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
307  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
308  add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
309  }
310  else
311  {
312  tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
313  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
314  add( m, n, ALPHA, A, &LDA, BETA, B-lcmt00*incb, &LDB );
315  }
316  if( ( kb -= tmp2 ) == 0 ) return( npq );
317 /*
318 * Decide whether one should go south or east in the table: Go east if
319 * the block below the current one only owns lower entries. If this block,
320 * however, owns diagonals, then go south.
321 */
322  GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
323  }
324 
325  if( GoSouth )
326  {
327 /*
328 * Go one step south in the LCM table. Adjust the current LCM value as well as
329 * the pointer to A. The pointer to B remains unchanged.
330 */
331  lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
332 /*
333 * While there are blocks remaining that own upper entries, keep going south.
334 * Adjust the current LCM value as well as the pointer to A accordingly.
335 */
336  while( mblks && ( lcmt00 > upp ) )
337  { lcmt00 -= pmb; mblks--; A += mb * inca; }
338 /*
339 * Return if no more row in the LCM table.
340 */
341  if( mblks <= 0 ) return( npq );
342 /*
343 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
344 * Save the current position in the LCM table. After this column has been
345 * completely taken care of, re-start from this row and the next column of
346 * the LCM table.
347 */
348  lcmt = lcmt00; mblkd = mblks; aptrd = A;
349 
350  while( mblkd && ( lcmt >= ilow ) )
351  {
352 /*
353 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
354 */
355  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
356  if( lcmt >= 0 )
357  {
358  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
359  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
360  add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
361  }
362  else
363  {
364  tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
365  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
366  add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
367  }
368  if( ( kb -= tmp2 ) == 0 ) return( npq );
369 /*
370 * Keep going south until there are no more blocks owning diagonals
371 */
372  lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
373  }
374 /*
375 * I am done with the first column of the LCM table. Go to the next column.
376 */
377  lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
378  }
379  else if( GoEast )
380  {
381 /*
382 * Go one step east in the LCM table. Adjust the current LCM value as
383 * well as the pointer to B. The pointer to A remains unchanged.
384 */
385  lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
386 /*
387 * While there are blocks remaining that own lower entries, keep going east
388 * in the LCM table. Adjust the current LCM value as well as the pointer to
389 * B accordingly.
390 */
391  while( nblks && ( lcmt00 < low ) )
392  { lcmt00 += qnb; nblks--; B += nb * incb; }
393 /*
394 * Return if no more column in the LCM table.
395 */
396  if( nblks <= 0 ) return( npq );
397 /*
398 * lcmt00 >= low. The current block owns either diagonals or upper entries. Save
399 * the current position in the LCM table. After this row has been completely
400 * taken care of, re-start from this column and the next row of the LCM table.
401 */
402  lcmt = lcmt00; nblkd = nblks; bptrd = B;
403 
404  while( nblkd && ( lcmt <= iupp ) )
405  {
406 /*
407 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
408 */
409  nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
410  if( lcmt >= 0 )
411  {
412  tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
413  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
414  add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, bptrd, &LDB );
415  }
416  else
417  {
418  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
419  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
420  add( m, n, ALPHA, A, &LDA, BETA, bptrd-lcmt*incb, &LDB );
421  }
422  if( ( kb -= tmp2 ) == 0 ) return( npq );
423 /*
424 * Keep going east until there are no more blocks owning diagonals.
425 */
426  lcmt += qnb; nblkd--; bptrd += nbloc * incb;
427  }
428 /*
429 * I am done with the first row of the LCM table. Go to the next row.
430 */
431  lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
432  }
433 /*
434 * Loop over the remaining columns of the LCM table.
435 */
436  do
437  {
438 /*
439 * If the current block does not have diagonal elements, find the closest one in
440 * the LCM table having some.
441 */
442  if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
443  {
444  while( mblks && nblks )
445  {
446  while( mblks && ( lcmt00 > upp ) )
447  { lcmt00 -= pmb; mblks--; A += mb * inca; }
448  if( lcmt00 >= low ) break;
449  while( nblks && ( lcmt00 < low ) )
450  { lcmt00 += qnb; nblks--; B += nb * incb; }
451  if( lcmt00 <= upp ) break;
452  }
453  }
454  if( !( mblks ) || !( nblks ) ) return( npq );
455 /*
456 * The current block owns diagonals. Save the current position in the LCM table.
457 * After this column has been completely taken care of, re-start from this row
458 * and the next column in the LCM table.
459 */
460  nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
461  lcmt = lcmt00; mblkd = mblks; aptrd = A;
462 
463  while( mblkd && ( lcmt >= low ) )
464  {
465 /*
466 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
467 */
468  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
469  if( lcmt >= 0 )
470  {
471  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
472  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
473  add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
474  }
475  else
476  {
477  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
478  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
479  add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
480  }
481  if( ( kb -= tmp2 ) == 0 ) return( npq );
482 /*
483 * Keep going south until there are no more blocks owning diagonals
484 */
485  lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
486  }
487 /*
488 * I am done with this column of the LCM table. Go to the next column ...
489 */
490  lcmt00 += qnb; nblks--; B += nbloc * incb;
491 /*
492 * ... until there are no more columns.
493 */
494  } while( nblks > 0 );
495 /*
496 * Return the number of diagonals found.
497 */
498  return( npq );
499  }
500  else
501  {
502 /*
503 * (un)packing using columns of virtual matrix
504 */
505  if( rows )
506  {
507 /*
508 * (un)packing rows of mn by k array A
509 */
510  inca = size;
511  incb = ( notran ? size : LDB * size );
512  m = &tmp2;
513  n = &K;
514  }
515  else
516  {
517 /*
518 * (un)packing columns of k by mn array A
519 */
520  inca = LDA * size;
521  incb = ( notran ? LDB * size : size );
522  m = &K;
523  n = &tmp2;
524  }
525  kb = MN;
526 /*
527 * From the (un)packing point of view the only valuable shortcut is when the
528 * virtual grid and the blocks are square, and the offset is zero or the grid
529 * is 1x1.
530 */
531  if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
532  ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
533  {
534  if( VM->prow == VM->pcol )
535  {
536  npq = ( ( nblks < 2 ) ? inbloc :
537  inbloc + ( nblks - 2 ) * nb + lnbloc );
538  npq = MIN( npq, kb );
539  if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
540  else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
541  }
542  return( npq );
543  }
544  pmb = nprow * mb;
545  qnb = npcol * nb;
546 /*
547 * Handle separately the first row and/or column of the LCM table. Update the
548 * LCM value of the curent block lcmt00, as well as the number of rows and
549 * columns mblks and nblks remaining in the LCM table.
550 */
551  GoSouth = ( lcmt00 > iupp );
552  GoEast = ( lcmt00 < ilow );
553 
554  if( !( GoSouth ) && !( GoEast ) )
555  {
556 /*
557 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
558 */
559  if( lcmt00 >= 0 )
560  {
561  tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
562  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
563  add( m, n, ALPHA, A, &LDA, BETA, B+lcmt00*incb, &LDB );
564  }
565  else
566  {
567  tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
568  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
569  add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
570  }
571  if( ( kb -= tmp2 ) == 0 ) return( npq );
572 /*
573 * Decide whether one should go south or east in the table: Go east if
574 * the block below the current one only owns lower entries. If this block,
575 * however, owns diagonals, then go south.
576 */
577  GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
578  }
579 
580  if( GoSouth )
581  {
582 /*
583 * Go one step south in the LCM table. Adjust the current LCM value as well as
584 * the pointer to B. The pointer to A remains unchanged.
585 */
586  lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
587 /*
588 * While there are blocks remaining that own upper entries, keep going south.
589 * Adjust the current LCM value as well as the pointer to B accordingly.
590 */
591  while( mblks && ( lcmt00 > upp ) )
592  { lcmt00 -= pmb; mblks--; B += mb * incb; }
593 /*
594 * Return if no more row in the LCM table.
595 */
596  if( mblks <= 0 ) return( npq );
597 /*
598 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
599 * Save the current position in the LCM table. After this column has been
600 * completely taken care of, re-start from this row and the next column of
601 * the LCM table.
602 */
603  lcmt = lcmt00; mblkd = mblks; bptrd = B;
604 
605  while( mblkd && ( lcmt >= ilow ) )
606  {
607 /*
608 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
609 */
610  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
611  if( lcmt >= 0 )
612  {
613  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
614  tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
615  add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
616  }
617  else
618  {
619  tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
620  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
621  add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
622  }
623  if( ( kb -= tmp2 ) == 0 ) return( npq );
624 /*
625 * Keep going south until there are no more blocks owning diagonals
626 */
627  lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
628  }
629 /*
630 * I am done with the first column of the LCM table. Go to the next column.
631 */
632  lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
633  }
634  else if( GoEast )
635  {
636 /*
637 * Go one step east in the LCM table. Adjust the current LCM value as
638 * well as the pointer to A. The pointer to B remains unchanged.
639 */
640  lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
641 /*
642 * While there are blocks remaining that own lower entries, keep going east
643 * in the LCM table. Adjust the current LCM value as well as the pointer to
644 * A accordingly.
645 */
646  while( nblks && ( lcmt00 < low ) )
647  { lcmt00 += qnb; nblks--; A += nb * inca; }
648 /*
649 * Return if no more column in the LCM table.
650 */
651  if( nblks <= 0 ) return( npq );
652 /*
653 * lcmt00 >= low. The current block owns either diagonals or upper entries. Save
654 * the current position in the LCM table. After this row has been completely
655 * taken care of, re-start from this column and the next row of the LCM table.
656 */
657  lcmt = lcmt00; nblkd = nblks; aptrd = A;
658 
659  while( nblkd && ( lcmt <= iupp ) )
660  {
661 /*
662 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
663 */
664  nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
665  if( lcmt >= 0 )
666  {
667  tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
668  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
669  add( m, n, ALPHA, aptrd, &LDA, BETA, B+lcmt*incb, &LDB );
670  }
671  else
672  {
673  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
674  tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
675  add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
676  }
677  if( ( kb -= tmp2 ) == 0 ) return( npq );
678 /*
679 * Keep going east until there are no more blocks owning diagonals.
680 */
681  lcmt += qnb; nblkd--; aptrd += nbloc * inca;
682  }
683 /*
684 * I am done with the first row of the LCM table. Go to the next row.
685 */
686  lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
687  }
688 /*
689 * Loop over the remaining columns of the LCM table.
690 */
691  do
692  {
693 /*
694 * If the current block does not have diagonal elements, find the closest one in
695 * the LCM table having some.
696 */
697  if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
698  {
699  while( mblks && nblks )
700  {
701  while( mblks && ( lcmt00 > upp ) )
702  { lcmt00 -= pmb; mblks--; B += mb * incb; }
703  if( lcmt00 >= low ) break;
704  while( nblks && ( lcmt00 < low ) )
705  { lcmt00 += qnb; nblks--; A += nb * inca; }
706  if( lcmt00 <= upp ) break;
707  }
708  }
709  if( !( mblks ) || !( nblks ) ) return( npq );
710 /*
711 * The current block owns diagonals. Save the current position in the LCM table.
712 * After this column has been completely taken care of, re-start from this row
713 * and the next column in the LCM table.
714 */
715  nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
716  lcmt = lcmt00; mblkd = mblks; bptrd = B;
717 
718  while( mblkd && ( lcmt >= low ) )
719  {
720 /*
721 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
722 */
723  mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
724  if( lcmt >= 0 )
725  {
726  tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
727  tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
728  add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
729  }
730  else
731  {
732  tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
733  tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
734  add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
735  }
736  if( ( kb -= tmp2 ) == 0 ) return( npq );
737 /*
738 * Keep going south until there are no more blocks owning diagonals
739 */
740  lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
741  }
742 /*
743 * I am done with this column of the LCM table. Go to the next column ...
744 */
745  lcmt00 += qnb; nblks--; A += nbloc * inca;
746 /*
747 * ... until there are no more columns.
748 */
749  } while( nblks > 0 );
750 /*
751 * Return the number of diagonals found.
752 */
753  return( npq );
754  }
755 /*
756 * End of PB_CVMloc
757 */
758 }
CCONJG
#define CCONJG
Definition: PBblas.h:21
TYPE
#define TYPE
Definition: clamov.c:7
PB_VM_T::imb1
int imb1
Definition: pblas.h:438
PB_VM_T::pcol
int pcol
Definition: pblas.h:456
PB_VM_T::lcmt00
int lcmt00
Definition: pblas.h:435
PB_VM_T::inbloc
int inbloc
Definition: pblas.h:450
PB_VM_T::iupp
int iupp
Definition: pblas.h:443
CPACKING
#define CPACKING
Definition: PBtools.h:50
PB_VM_T::imbloc
int imbloc
Definition: pblas.h:439
PB_VM_T::prow
int prow
Definition: pblas.h:445
PB_VM_T::inb1
int inb1
Definition: pblas.h:449
PB_VM_T::low
int low
Definition: pblas.h:455
PB_VM_T::npcol
int npcol
Definition: pblas.h:457
PB_VM_T::mblks
int mblks
Definition: pblas.h:442
PB_VM_T::lmbloc
int lmbloc
Definition: pblas.h:441
CROW
#define CROW
Definition: PBblacs.h:21
PB_VM_T::upp
int upp
Definition: pblas.h:444
PB_VM_T::nprow
int nprow
Definition: pblas.h:446
MMADD_T
F_VOID_FCT(* MMADD_T)()
Definition: pblas.h:284
CNOTRAN
#define CNOTRAN
Definition: PBblas.h:18
PB_VM_T::nblks
int nblks
Definition: pblas.h:453
PB_VM_T
Definition: pblas.h:432
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
PB_VM_T::lnbloc
int lnbloc
Definition: pblas.h:452
PB_VM_T::ilow
int ilow
Definition: pblas.h:454
MAX
#define MAX(a_, b_)
Definition: PBtools.h:77
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CTRAN
#define CTRAN
Definition: PBblas.h:20
PB_CVMloc
int PB_CVMloc(PBTYP_T *TYPE, PB_VM_T *VM, char *VROCS, char *ROCS, char *UNPA, char *TRANS, int MN, int K, char *ALPHA, char *A, int LDA, char *BETA, char *B, int LDB)
Definition: PB_CVMloc.c:25
PB_VM_T::mb
int mb
Definition: pblas.h:440
PB_VM_T::nb
int nb
Definition: pblas.h:451