/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ void PB_Cainfog2l( int M, int N, int I, int J, int * DESC, int NPROW, int NPCOL, int MYROW, int MYCOL, int * IMB1, int * INB1, int * MP, int * NQ, int * II, int * JJ, int * PROW, int * PCOL, int * RPROW, int * RPCOL ) #else void PB_Cainfog2l( M, N, I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, IMB1, INB1, MP, NQ, II, JJ, PROW, PCOL, RPROW, RPCOL ) /* * .. Scalar Arguments .. */ int I, * II, * IMB1, * INB1, J, * JJ, M, * MP, MYCOL, MYROW, N, NPCOL, NPROW, * NQ, * PCOL, * PROW, * RPCOL, * RPROW; /* * .. Array Arguments .. */ int * DESC; #endif { /* * Purpose * ======= * * PB_Cainfog2l computes the starting local row and column indexes II, * JJ corresponding to the submatrix starting globally at the entry * pointed by I, J. This routine returns the coordinates in the grid of * the process owning the matrix entry of global indexes I, J, namely * PROW and PCOL. In addition, this routine computes the quantities MP * and NQ, which are respectively the local number of rows and columns * owned by the process of coordinate MYROW, MYCOL corresponding to the * global submatrix A(I:I+M-1,J:J+N-1). Finally, the size of the first * partial block and the relative process coordinates are also returned * respectively in IMB, INB and RPROW, RPCOL. * * Notes * ===== * * A description vector is associated with each 2D block-cyclicly dis- * tributed matrix. This vector stores the information required to * establish the mapping between a matrix entry and its corresponding * process and memory location. * * In the following comments, the character _ should be read as * "of the distributed matrix". Let A be a generic term for any 2D * block cyclicly distributed matrix. Its description vector is DESC_A: * * NOTATION STORED IN EXPLANATION * ---------------- --------------- ------------------------------------ * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating * the NPROW x NPCOL BLACS process grid * A is distributed over. The context * itself is global, but the handle * (the integer value) may vary. * M_A (global) DESCA[ M_ ] The number of rows in the distribu- * ted matrix A, M_A >= 0. * N_A (global) DESCA[ N_ ] The number of columns in the distri- * buted matrix A, N_A >= 0. * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left * block of the matrix A, IMB_A > 0. * INB_A (global) DESCA[ INB_ ] The number of columns of the upper * left block of the matrix A, * INB_A > 0. * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- * bute the last M_A-IMB_A rows of A, * MB_A > 0. * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- * bute the last N_A-INB_A columns of * A, NB_A > 0. * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first * row of the matrix A is distributed, * NPROW > RSRC_A >= 0. * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the * first column of A is distributed. * NPCOL > CSRC_A >= 0. * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local * array storing the local blocks of * the distributed matrix A, * IF( Lc( 1, N_A ) > 0 ) * LLD_A >= MAX( 1, Lr( 1, M_A ) ) * ELSE * LLD_A >= 1. * * Let K be the number of rows of a matrix A starting at the global in- * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would * receive if these K rows were distributed over NPROW processes. If K * is the number of columns of a matrix A starting at the global index * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if * these K columns were distributed over NPCOL processes. * * The values of Lr() and Lc() may be determined via a call to the func- * tion PB_Cnumroc: * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) * * Arguments * ========= * * M (global input) INTEGER * On entry, M specifies the global number of rows of the subma- * trix. M must be at least zero. * * N (global input) INTEGER * On entry, N specifies the global number of columns of the * submatrix. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global starting row index of the * submatrix. I must at least zero. * * J (global input) INTEGER * On entry, J specifies the global starting column index of * the submatrix. J must at least zero. * * DESC (global and local input) INTEGER array * On entry, DESC is an integer array of dimension DLEN_. This * is the array descriptor of the underlying matrix. * * NPROW (global input) INTEGER * On entry, NPROW specifies the total number of process rows * over which the matrix is distributed. NPROW must be at least * one. * * NPCOL (global input) INTEGER * On entry, NPCOL specifies the total number of process columns * over which the matrix is distributed. NPCOL must be at least * one. * * MYROW (local input) INTEGER * On entry, MYROW specifies the row coordinate of the process * whose local index II is determined. MYROW must be at least * zero and strictly less than NPROW. * * MYCOL (local input) INTEGER * On entry, MYCOL specifies the column coordinate of the pro- * cess whose local index JJ is determined. MYCOL must be at * least zero and strictly less than NPCOL. * * IMB1 (global output) INTEGER * On exit, IMB1 specifies the number of rows of the upper left * block of the submatrix. On exit, IMB1 is less or equal than * M and greater or equal than MIN( 1, M ). * * INB1 (global output) INTEGER * On exit, INB1 specifies the number of columns of the upper * left block of the submatrix. On exit, INB1 is less or equal * than N and greater or equal than MIN( 1, N ). * * MP (local output) INTEGER * On exit, MP specifies the local number of rows of the subma- * trix, that the processes of row coordinate MYROW own. MP is * at least zero. * * NQ (local output) INTEGER * On exit, NQ specifies the local number of columns of the * submatrix, that the processes of column coordinate MYCOL * own. NQ is at least zero. * * II (local output) INTEGER * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least zero. * * JJ (local output) INTEGER * On exit, JJ specifies the local starting column index of * the submatrix. On exit, II is at least zero. * * PROW (global output) INTEGER * On exit, PROW specifies the row coordinate of the process * that possesses the first row of the submatrix. On exit, PROW * is -1 if DESC(RSRC_) is -1 on input, and, at least zero and * strictly less than NPROW otherwise. * * PCOL (global output) INTEGER * On exit, PCOL specifies the column coordinate of the process * that possesses the first column of the submatrix. On exit, * PCOL is -1 if DESC(CSRC_) is -1 on input, and, at least zero * and strictly less than NPCOL otherwise. * * RPROW (global output) INTEGER * On exit, RPROW specifies the relative row coordinate of the * process that possesses the first row I of the submatrix. On * exit, RPROW is -1 if DESC(RSRC_) is -1 on input, and, at * least zero and strictly less than NPROW otherwise. * * RPCOL (global output) INTEGER * On exit, RPCOL specifies the relative column coordinate of * the process that possesses the first column J of the subma- * trix. On exit, RPCOL is -1 if DESC(CSRC_) is -1 on input, * and, at least zero and strictly less than NPCOL otherwise. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int i1, ilocblk, j1, mb, mydist, nb, nblocks, csrc, rsrc; /* .. * .. Executable Statements .. * */ /* * Retrieve the row distribution parameters */ mb = DESC[ MB_ ]; rsrc = DESC[ RSRC_ ]; if( ( rsrc == -1 ) || ( NPROW == 1 ) ) { /* * The rows are not distributed, or there is just one process row in the grid. * Therefore, the local and global indexes are the same, as well as the local * and global number of rows. Finally, the relative row process coordinate is * zero, since every process owns all rows. Note that the size of the first * row block can be zero only if M is zero. */ *II = I; if( ( *IMB1 = DESC[IMB_] - I ) <= 0 ) *IMB1 += ( ( -(*IMB1) ) / mb + 1 ) * mb; *IMB1 = MIN( *IMB1, M ); *MP = M; *PROW = rsrc; *RPROW = 0; } else { /* * Figure out PROW, II and IMB1 first. */ *IMB1 = DESC[IMB_]; if( I < *IMB1 ) /* Is I in first block range ? */ { /* * If I is in the first block of rows, then PROW is simply rsrc, II is I in * this process and zero elsewhere, and the size of the first block is the * IMB complement. */ *PROW = rsrc; *II = ( ( MYROW == *PROW ) ? I : 0 ); *IMB1 -= I; } else { /* * The discussion goes as follows: compute my distance from the source process * so that within this process coordinate system, the source row process is the * process such that mydist=0, or equivalently MYROW == rsrc. * * Find out the global coordinate of the block of rows I belongs to (nblocks), * as well as the minimum local number of row blocks that every process has. * * when mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROW, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROW, I own ilocblk full blocks * but not I, or I own ilocblk + 1 blocks and the entry I refers to. */ i1 = I - *IMB1; if( MYROW == rsrc ) { /* * I refers to an entry that is not in the first block, find out which process * has it. */ nblocks = i1 / mb + 1; *PROW = rsrc + nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only three * possible cases: * * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I don't own I, in * which case II = IMB + ( ilocblk - 1 ) * MB. Note that this case cannot * happen when ilocblk is zero, since nblocks is at least one. * * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in which * case I and II can respectively be written as IMB + (nblocks-1)*MB + IL * and IMB+(ilocblk-1) * MB + IL. That is II = I + (ilocblk - nblocks)*MB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. Note * that when ilocblk is zero, II is just IMB. */ if( nblocks < NPROW ) { *II = *IMB1; } else { ilocblk = nblocks / NPROW; if( ilocblk * NPROW >= nblocks ) { *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb : *IMB1 + ( ilocblk - 1 ) * mb ); } else { *II = *IMB1 + ilocblk * mb; } } } else { /* * I is not in the first block, find out which process has it. */ nblocks = i1 / mb + 1; *PROW = rsrc + nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; /* * When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of * size MB since I am not the source process, i.e. II = ( ilocblk + 1 ) * MB. * When mydist >= nblocks - ilocblk * NPROW and I don't own I, I own ilocblk * full blocks of size MB, i.e. II = ilocblk * MB, otherwise I own ilocblk * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + IL * and II = ilocblk*MB + IL = I - IMB + ( ilocblk - nblocks + 1 )*MB. */ if( nblocks < NPROW ) { mydist -= nblocks; *II = ( ( mydist < 0 ) ? mb : ( ( MYROW == *PROW ) ? i1 + ( 1 - nblocks ) * mb : 0 ) ); } else { ilocblk = nblocks / NPROW; mydist -= nblocks - ilocblk * NPROW; *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : ( ( MYROW == *PROW ) ? ( ilocblk - nblocks + 1 ) * mb + i1 : ilocblk * mb ) ); } } /* * Update the size of first block */ *IMB1 = nblocks * mb - i1; } /* * Now everything is just like M, I=0, IMB1, MB, PROW, NPROW. The discussion * goes as follows: compute my distance from the source process PROW so that * within this process coordinate system, the source process is the process * such that mydist = 0. Figure out MP. */ if( M <= *IMB1 ) { /* * M <= IMB1: if I am the source process, i.e. I own I (mydist = 0), MP is M * and 0 otherwise. */ *MP = ( ( MYROW == *PROW ) ? M : 0 ); } else { /* * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those M entries */ nblocks = ( M - *IMB1 ) / mb + 1; if( MYROW == *PROW ) { /* * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only two * possible cases: * * 1) When mydist = nblocks - ilocblk * NPROW = 0, that is NPROW divides * the global number of full blocks, then the source process PROW owns * one more block than the other processes; and M can be rewritten as * M = IMB1 + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block. * Similarly, the local value MP corresponding to M can be written as * MP = IMB1 + (ilocblk-1) * MB + LMB = M + ( ilocblk-1 - (nblocks-1) )*MB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 2) mydist = 0 < nblocks - ilocblk * NPROW, the source process only owns * full blocks, and therefore MP = IMB1 + ilocblk * MB. Note that when * ilocblk is zero, MP is just IMB1. */ if( nblocks < NPROW ) { *MP = *IMB1; } else { ilocblk = nblocks / NPROW; *MP = ( ( nblocks - ilocblk * NPROW ) ? *IMB1 + ilocblk * mb : M + ( ilocblk - nblocks ) * mb ); } } else { /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = MYROW - *PROW ) < 0 ) mydist += NPROW; /* * When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of * size MB since I am not the source process, * * when mydist > nblocks - ilocblk * NPROW, I own ilocblk full blocks of * size MB since I am not the source process, * * when mydist = nblocks - ilocblk * NPROW, * either the last block is not full and I own it, in which case * M = IMB1 + (nblocks - 1)*MB + LMB with LNB the size of the last block * such that MB > LMB > 0; the local value MP corresponding to M is given * by MP = ilocblk * MB + LMB = M - IMB1 + ( ilocblk - nblocks + 1 ) * MB; * or the last block is full and I am the first process owning only ilocblk * full blocks of size MB, that is M = IMB + ( nblocks - 1 ) * MB and * MP = ilocblk * MB = M - IMB + ( ilocblk - nblocks + 1 ) * MB. */ if( nblocks < NPROW ) { mydist -= nblocks; *MP = ( ( mydist < 0 ) ? mb : ( ( mydist > 0 ) ? 0 : M - *IMB1 + mb * ( 1 - nblocks ) ) ); } else { ilocblk = nblocks / NPROW; mydist -= nblocks - ilocblk * NPROW; *MP = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : ( ( mydist > 0 ) ? ilocblk * mb : M - *IMB1 + mb * ( ilocblk - nblocks + 1 ) ) ); } } } /* * Finally figure out IMB1 and RPROW. Note that IMB1 can be zero when M = 0. */ *IMB1 = MIN( *IMB1, M ); if( ( *RPROW = MYROW - *PROW ) < 0 ) *RPROW += NPROW; } /* * Idem for the columns */ nb = DESC[ NB_ ]; csrc = DESC[ CSRC_ ]; if( ( csrc == -1 ) || ( NPCOL == 1 ) ) { *JJ = J; if( ( *INB1 = DESC[INB_] - J ) <= 0 ) *INB1 += ( ( -(*INB1) ) / nb + 1 ) * nb; *INB1 = MIN( *INB1, N ); *NQ = N; *PCOL = csrc; *RPCOL = 0; } else { *INB1 = DESC[INB_]; if( J < *INB1 ) { *PCOL = csrc; *JJ = ( ( MYCOL == *PCOL ) ? J : 0 ); *INB1 -= J; } else { j1 = J - *INB1; if( MYCOL == csrc ) { nblocks = j1 / nb + 1; *PCOL = csrc + nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( nblocks < NPCOL ) { *JJ = *INB1; } else { ilocblk = nblocks / NPCOL; if( ilocblk * NPCOL >= nblocks ) { *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb : *INB1 + ( ilocblk - 1 ) * nb ); } else { *JJ = *INB1 + ilocblk * nb; } } } else { nblocks = j1 / nb + 1; *PCOL = csrc + nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; if( nblocks < NPCOL ) { mydist -= nblocks; *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? j1 + ( 1 - nblocks ) * nb : 0 ) ); } else { ilocblk = nblocks / NPCOL; mydist -= nblocks - ilocblk * NPCOL; *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : ( ( MYCOL == *PCOL ) ? ( ilocblk - nblocks + 1 ) * nb + j1 : ilocblk * nb ) ); } } *INB1 = nblocks * nb - j1; } if( N <= *INB1 ) { *NQ = ( ( MYCOL == *PCOL ) ? N : 0 ); } else { nblocks = ( N - *INB1 ) / nb + 1; if( MYCOL == *PCOL ) { if( nblocks < NPCOL ) { *NQ = *INB1; } else { ilocblk = nblocks / NPCOL; *NQ = ( ( nblocks - ilocblk * NPCOL ) ? *INB1 + ilocblk * nb : N + ( ilocblk - nblocks ) * nb ); } } else { if( ( mydist = MYCOL - *PCOL ) < 0 ) mydist += NPCOL; if( nblocks < NPCOL ) { mydist -= nblocks; *NQ = ( ( mydist < 0 ) ? nb : ( ( mydist > 0 ) ? 0 : N - *INB1 + nb * ( 1 - nblocks ) ) ); } else { ilocblk = nblocks / NPCOL; mydist -= nblocks - ilocblk * NPCOL; *NQ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : ( ( mydist > 0 ) ? ilocblk * nb : N - *INB1 + nb * ( ilocblk - nblocks + 1 ) ) ); } } } *INB1 = MIN( *INB1, N ); if( ( *RPCOL = MYCOL - *PCOL ) < 0 ) *RPCOL += NPCOL; } /* * End of PB_Cainfog2l */ }