/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cinfog2l( int I, int J, int * DESC, int NPROW, int NPCOL, int MYROW, int MYCOL, int * II, int * JJ, int * PROW, int * PCOL ) #else void PB_Cinfog2l( I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, JJ, PROW, PCOL ) int I, * II, J, * JJ, MYCOL, MYROW, NPCOL, NPROW, * PCOL, * PROW; /* * .. Scalar Arguments .. */ /* * .. Array Arguments .. */ int * DESC; #endif { /* * Purpose * ======= * * PB_Cinfog2l computes the starting local index II, JJ corresponding to * the submatrix starting globally at the entry pointed by I, J. This * routine returns the coordinates in the grid of the process owning the * matrix entry of global indexes I, J, namely PROW and PCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least zero. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least zero. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least zero. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of the * submatrix. On exit, JJ is at least zero. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC( RSRC_ ) is -1 on input, and, at least zero * and strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC( CSRC_ ) is -1 on input, and, at least * zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; /* .. * .. Executable Statements .. * */ /* * Retrieve the row distribution parameters */ imb = DESC[IMB_ ]; *PROW = DESC[RSRC_]; if( ( *PROW == -1 ) || ( NPROW == 1 ) ) { /* * The data is not distributed, or there is just one process row in the grid. */ *II = I; } else if( I < imb ) { /* * I refers to an entry in the first block of rows */ *II = ( MYROW == *PROW ? I : 0 ); } else { mb = DESC[MB_]; rsrc = *PROW; /* * The discussion goes as follows: compute my distance from the source process * so that within this process coordinate system, the source process is the * process such that mydist = 0, or equivalently MYROW == rsrc. * * Find out the global coordinate of the block I belongs to (nblocks), as well * as the minimum local number of blocks that every process has. * * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROCS, I own ilocblk full blocks * but not I, or I own ilocblk + 1 blocks and the entry I refers to. */ if( MYROW == rsrc ) { /* * I refers to an entry that is not in the first block, find out which process * has it. */ nblocks = ( I - imb ) / mb + 1; *PROW += nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only three * possible cases: * * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I don't own I, in * which case II = IMB + ( ilocblk - 1 ) * MB. Note that this case cannot * happen when ilocblk is zero, since nblocks is at least one. * * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in which * case I and II can respectively be written as IMB + (nblocks-1)*NB + IL * and IMB + (ilocblk-1) * MB + IL. That is II = I + (ilocblk-nblocks)*MB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. Note * that when ilocblk is zero, II is just IMB. */ if( nblocks < NPROW ) { *II = imb; } else { ilocblk = nblocks / NPROW; if( ilocblk * NPROW >= nblocks ) { *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb : imb + ( ilocblk - 1 ) * mb ); } else { *II = imb + ilocblk * mb; } } } else { /* * I refers to an entry that is not in the first block, find out which process * has it. */ nblocks = ( I -= imb ) / mb + 1; *PROW += nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; /* * When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of * size MB since I am not the source process, i.e. II = ( ilocblk + 1 ) * MB. * When mydist >= nblocks - ilocblk * NPROW and I don't own I, I own ilocblk * full blocks of size MB, i.e. II = ilocblk * MB, otherwise I own ilocblk * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + IL * and II = ilocblk*MB + IL = I - IMB + ( ilocblk - nblocks + 1 )*MB. */ if( nblocks < NPROW ) { mydist -= nblocks; *II = ( ( mydist < 0 ) ? mb : ( ( MYROW == *PROW ) ? I + ( 1 - nblocks ) * mb : 0 ) ); } else { ilocblk = nblocks / NPROW; mydist -= nblocks - ilocblk * NPROW; *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : ( ( MYROW == *PROW ) ? ( ilocblk - nblocks + 1 ) * mb + I : ilocblk * mb ) ); } } } /* * Idem for the columns */ inb = DESC[INB_ ]; *PCOL = DESC[CSRC_]; if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) { *JJ = J; } else if( J < inb ) { *JJ = ( MYCOL == *PCOL ? J : 0 ); } else { nb = DESC[NB_]; csrc = *PCOL; if( MYCOL == csrc ) { nblocks = ( J - inb ) / nb + 1; *PCOL += nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( nblocks < NPCOL ) { *JJ = inb; } else { ilocblk = nblocks / NPCOL; if( ilocblk * NPCOL >= nblocks ) { *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb : inb + ( ilocblk - 1 ) * nb ); } else { *JJ = inb + ilocblk * nb; } } } else { nblocks = ( J -= inb ) / nb + 1; *PCOL += nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; if( nblocks < NPCOL ) { mydist -= nblocks; *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? J + ( 1 - nblocks )*nb : 0 ) ); } else { ilocblk = nblocks / NPCOL; mydist -= nblocks - ilocblk * NPCOL; *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : ( ( MYCOL == *PCOL ) ? ( ilocblk - nblocks + 1 ) * nb + J : ilocblk * nb ) ); } } } /* * End of PB_Cinfog2l */ }