/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_CVMswp( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS, char * TRANS, int MN, char * X, int INCX, char * Y, int INCY ) #else int PB_CVMswp( TYPE, VM, VROCS, ROCS, TRANS, MN, X, INCX, Y, INCY ) /* * .. Scalar Arguments .. */ int INCX, INCY, MN; /* * .. Array Arguments .. */ char * VROCS, * ROCS, * TRANS; PBTYP_T * TYPE; PB_VM_T * VM; char * X, * Y; #endif { /* * Purpose * ======= * * PB_CVMswp swaps a one-dimensional distributed vector X with another * one-dimensional distributed vector Y. This operation is triggered by * a virtual distributed array. * * Arguments * ========= * * TYPE (local input) pointer to a PBTYP_T structure * On entry, TYPE is a pointer to a structure of type PBTYP_T, * that contains type information (See pblas.h). * * VM (local input) pointer to a PB_VM_T structure * On entry, VM is a pointer to a structure of type PB_VM_T, * that contains the virtual matrix information (see pblas.h). * * VROCS (local input) pointer to CHAR * On entry, VROCS specifies if the rows or columns of the vir- * tual distributed array grid should be used for the swapping * operation as follows: * VROCS = 'R' or 'r', the rows should be used, * VROCS = 'C' or 'c', the columns should be used. * * ROCS (local input) pointer to CHAR * On entry, ROCS specifies if rows or columns should be swap- * ped as follows: * ROCS = 'R' or 'r', rows should be swapped, * ROCS = 'C' or 'c', columns should be swapped. * * TRANS (local input) pointer to CHAR * On entry, TRANS specifies if transposition should occur du- * ring the swapping operation as follows: * TRANS = 'N' or 'n', natural swapping, * otherwise, transposed swapping. * * MN (local input) INTEGER * On entry, MN specifies the number of rows or columns to be * swapped. MN must be at least zero. * * X (local input/local output) pointer to CHAR * On entry, X points to an array of dimension at least * ( 1 + ( n - 1 )*abs( INCX ) ) where n is IMBLOC+(MBLKS-2)*MB+ * LMB when VROCS is 'R' or 'r', and INBLOC+(NBLKS-2)*NB+LNB * otherwise. Before entry, the incremented array X must contain * the vector x. On exit, the entries of the incremented array X * are exchanged with the entries of the incremented array Y. * * INCX (local input) INTEGER * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (local input/local output) pointer to CHAR * On entry, Y points to an array of dimension at least * ( 1 + ( n - 1 )*abs( INCY ) ) where n is IMBLOC+(MBLKS-2)*MB+ * LMB when VROCS is 'C' or 'c', and INBLOC+(NBLKS-2)*NB+LNB * otherwise. Before entry, the incremented array Y must contain * the vector y. On exit, the entries of the incremented array Y * are exchanged with the entries of the incremented array X. * * INCY (local input) INTEGER * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int GoEast, GoSouth, Xinc, Yinc, ilow, imbloc, inbloc, iupp, kb, lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc, nb, nblkd, nblks, nbloc, notran, npcol, npq=0, nprow, pmb, qnb, rows, size, tmp1, tmp2, upp; char * Xptrd, * Yptrd; /* .. * .. Executable Statements .. * */ mblks = VM->mblks; nblks = VM->nblks; /* * Quick return if I don't own any blocks. */ if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 ); /* * Retrieve the contents of VM structure fields */ lcmt00 = VM->lcmt00; imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp; iupp = VM->iupp; nprow = VM->nprow; inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low; ilow = VM->ilow; npcol = VM->npcol; notran = ( Mupcase( TRANS[0] ) == CNOTRAN ); size = TYPE->size; rows = ( Mupcase( ROCS[0] ) == CROW ); if( Mupcase( VROCS[0] ) == CROW ) { /* * (un)packing using rows of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A. */ Xinc = size; Yinc = ( notran ? size : INCY * size ); } else { /* * (un)packing columns of k by mn array A */ Xinc = INCX * size; Yinc = ( notran ? INCY * size : size ); } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( mblks < 2 ) ? imbloc : imbloc + ( mblks - 2 ) * mb + lmbloc ); npq = MIN( npq, kb ); if( rows ) TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); else TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X+lcmt00*Xinc, &INCX, Y, &INCY ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Y-lcmt00*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to X. The pointer to Y remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; X += imbloc * Xinc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to X accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; X += mb * Xinc; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; Xptrd = X; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd+lcmt*Xinc, &INCX, Y, &INCY ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd, &INCX, Y-lcmt*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Xptrd += mbloc * Xinc; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; Y += inbloc * Yinc; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to Y. The pointer to X remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; Y += inbloc * Yinc; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * Y accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Y += nb * Yinc; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; Yptrd = Y; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X+lcmt*Xinc, &INCX, Yptrd, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Yptrd-lcmt*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; Yptrd += nbloc * Yinc; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; X += imbloc * Xinc; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; X += mb * Xinc; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; Y += nb * Yinc; } if( lcmt00 <= upp ) break; } } if( !mblks || !nblks ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; Xptrd = X; while( mblkd && lcmt >= low ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd+lcmt*Xinc, &INCX, Y, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd, &INCX, Y-lcmt*Yinc, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Xptrd += mbloc * Xinc; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; Y += nbloc * Yinc; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } else { /* * (un)packing using columns of virtual matrix */ if( rows ) { /* * (un)packing rows of mn by k array A */ Xinc = size; Yinc = ( notran ? size : INCY * size ); } else { /* * (un)packing columns of k by mn array A */ Xinc = INCX * size; Yinc = ( notran ? INCY * size : size ); } kb = MN; /* * From the (un)packing point of view the only valuable shortcut is when the * virtual grid and the blocks are square, and the offset is zero or the grid * is 1x1. */ if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) && ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) ) { if( VM->prow == VM->pcol ) { npq = ( ( nblks < 2 ) ? inbloc : inbloc + ( nblks - 2 ) * nb + lnbloc ); npq = MIN( npq, kb ); if( rows ) TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); else TYPE->Fswap( &npq, X, &INCX, Y, &INCY ); } return( npq ); } pmb = nprow * mb; qnb = npcol * nb; /* * Handle separately the first row and/or column of the LCM table. Update the * LCM value of the curent block lcmt00, as well as the number of rows and * columns mblks and nblks remaining in the LCM table. */ GoSouth = ( lcmt00 > iupp ); GoEast = ( lcmt00 < ilow ); if( !( GoSouth ) && !( GoEast ) ) { /* * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp */ if( lcmt00 >= 0 ) { tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Y+lcmt00*Yinc, &INCY ); } else { tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X-lcmt00*Xinc, &INCX, Y, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Decide whether one should go south or east in the table: Go east if * the block below the current one only owns lower entries. If this block, * however, owns diagonals, then go south. */ GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); } if( GoSouth ) { /* * Go one step south in the LCM table. Adjust the current LCM value as well as * the pointer to Y. The pointer to X remains unchanged. */ lcmt00 -= iupp - upp + pmb; mblks--; Y += imbloc * Yinc; /* * While there are blocks remaining that own upper entries, keep going south. * Adjust the current LCM value as well as the pointer to Y accordingly. */ while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Y += mb * Yinc; } /* * Return if no more row in the LCM table. */ if( mblks <= 0 ) return( npq ); /* * lcmt00 <= upp. The current block owns either diagonals or lower entries. * Save the current position in the LCM table. After this column has been * completely taken care of, re-start from this row and the next column of * the LCM table. */ lcmt = lcmt00; mblkd = mblks; Yptrd = Y; while( mblkd && ( lcmt >= ilow ) ) { /* * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. */ mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Yptrd+lcmt*Yinc, &INCY ); } else { tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X-lcmt*Xinc, &INCX, Yptrd, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Yptrd += mbloc * Yinc; } /* * I am done with the first column of the LCM table. Go to the next column. */ lcmt00 += low - ilow + qnb; nblks--; X += inbloc * Xinc; } else if( GoEast ) { /* * Go one step east in the LCM table. Adjust the current LCM value as * well as the pointer to X. The pointer to Y remains unchanged. */ lcmt00 += low - ilow + qnb; nblks--; X += inbloc * Xinc; /* * While there are blocks remaining that own lower entries, keep going east * in the LCM table. Adjust the current LCM value as well as the pointer to * X accordingly. */ while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; X += nb * Xinc; } /* * Return if no more column in the LCM table. */ if( nblks <= 0 ) return( npq ); /* * lcmt00 >= low. The current block owns either diagonals or upper entries. Save * the current position in the LCM table. After this row has been completely * taken care of, re-start from this column and the next row of the LCM table. */ lcmt = lcmt00; nblkd = nblks; Xptrd = X; while( nblkd && ( lcmt <= iupp ) ) { /* * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. */ nbloc = ( ( nblkd == 1 ) ? lnbloc : nb ); if( lcmt >= 0 ) { tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd, &INCX, Y+lcmt*Yinc, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, Xptrd-lcmt*Xinc, &INCX, Y, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going east until there are no more blocks owning diagonals. */ lcmt += qnb; nblkd--; Xptrd += nbloc * Xinc; } /* * I am done with the first row of the LCM table. Go to the next row. */ lcmt00 -= iupp - upp + pmb; mblks--; Y += imbloc * Yinc; } /* * Loop over the remaining columns of the LCM table. */ do { /* * If the current block does not have diagonal elements, find the closest one in * the LCM table having some. */ if( ( lcmt00 < low ) || ( lcmt00 > upp ) ) { while( mblks && nblks ) { while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; Y += mb * Yinc; } if( lcmt00 >= low ) break; while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; X += nb * Xinc; } if( lcmt00 <= upp ) break; } } if( !( mblks ) || !( nblks ) ) return( npq ); /* * The current block owns diagonals. Save the current position in the LCM table. * After this column has been completely taken care of, re-start from this row * and the next column in the LCM table. */ nbloc = ( ( nblks == 1 ) ? lnbloc : nb ); lcmt = lcmt00; mblkd = mblks; Yptrd = Y; /* * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. */ while( mblkd && lcmt >= low ) { mbloc = ( ( mblkd == 1 ) ? lmbloc : mb ); if( lcmt >= 0 ) { tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X, &INCX, Yptrd+lcmt*Yinc, &INCY ); } else { tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 ); tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) ); TYPE->Fswap( &tmp2, X-lcmt*Xinc, &INCX, Yptrd, &INCY ); } if( ( kb -= tmp2 ) == 0 ) return( npq ); /* * Keep going south until there are no more blocks owning diagonals */ lcmt -= pmb; mblkd--; Yptrd += mbloc * Yinc; } /* * I am done with this column of the LCM table. Go to the next column ... */ lcmt00 += qnb; nblks--; X += nbloc * Xinc; /* * ... until there are no more columns. */ } while( nblks > 0 ); /* * Return the number of diagonals found. */ return( npq ); } /* * End of PB_CVMswp */ }