/* --------------------------------------------------------------------- * * -- PBLAS auxiliary routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * Include files */ #include "../pblas.h" #include "../PBpblas.h" #include "../PBtools.h" #include "../PBblacs.h" #include "../PBblas.h" #ifdef __STDC__ int PB_Cnumroc( int N, int I, int INB, int NB, int PROC, int SRCPROC, int NPROCS ) #else int PB_Cnumroc( N, I, INB, NB, PROC, SRCPROC, NPROCS ) /* * .. Scalar Arguments .. */ int I, INB, N, NB, NPROCS, PROC, SRCPROC; #endif { /* * Purpose * ======= * * PB_Cnumroc returns the local number of matrix rows/columns process * PROC will get if we give out N rows/columns starting from global in- * dex I. * * Arguments * ========= * * N (global input) INTEGER * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (global input) INTEGER * On entry, I specifies the global index of the matrix entry. * I must be at least zero. * * INB (global input) INTEGER * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (global input) INTEGER * On entry, NB specifies the size of the blocks used to parti- * tion the matrix. NB must be at least one. * * PROC (local input) INTEGER * On entry, PROC specifies the coordinate of the process whose * local portion is determined. PROC must be at least zero and * strictly less than NPROCS. * * SRCPROC (global input) INTEGER * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. When * SRCPROC = -1, the data is not distributed but replicated, * otherwise SRCPROC must be at least zero and strictly less * than NPROCS. * * NPROCS (global input) INTEGER * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * -- Written on April 1, 1998 by * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. * * --------------------------------------------------------------------- */ /* * .. Local Scalars .. */ int ilocblk, mydist, nblocks; /* .. * .. Executable Statements .. * */ if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * The data is not distributed, or there is just one process in this dimension * of the grid. */ return( N ); /* * Compute coordinate of process owning I and corresponding INB */ if( ( INB -= I ) <= 0 ) { /* * I is not in the first block, find out which process has it and update the * size of first block */ nblocks = (-INB) / NB + 1; SRCPROC += nblocks; SRCPROC -= ( SRCPROC / NPROCS ) * NPROCS; INB += nblocks * NB; } /* * Now everything is just like N, I=0, INB, NB, SRCPROC, NPROCS. The discussion * goes as follows: compute my distance from the source process so that within * this process coordinate system, the source process is the process such that * mydist = 0, or equivalently PROC == SRCPROC. * * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries. Then remark that * * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk * NPROCS, either the last block is not full * and I own it, or the last block is full and I am the first process owning * only ilocblk full blocks. */ if( PROC == SRCPROC ) { /* * I am the source process, i.e. I own I (mydist = 0). When N <= INB, the * answer is simply N. */ if( N <= INB ) return( N ); /* * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries. */ nblocks = ( N - INB ) / NB + 1; /* * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only two * possible cases: * * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS divides * the global number of full blocks, then the source process SRCPROC owns * one more block than the other processes; and N can be rewritten as * N = INB + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block. * Similarly, the local value Np corresponding to N can be written as * Np = INB + (ilocblk-1) * NB + LNB = N + ( ilocblk-1 - (nblocks-1) )*NB. * Note that this case cannot happen when ilocblk is zero, since nblocks * is at least one. * * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only owns * full blocks, and therefore Np = INB + ilocblk * NB. Note that when * ilocblk is zero, Np is just INB. */ if( nblocks < NPROCS ) return( INB ); ilocblk = nblocks / NPROCS; return( ( nblocks - ilocblk * NPROCS ) ? INB + ilocblk * NB : N + ( ilocblk - nblocks ) * NB ); } else { /* * I am not the source process. When N <= INB, the answer is simply 0. */ if( N <= INB ) return( 0 ); /* * Find out how many full blocks are globally (nblocks) and locally (ilocblk) * in those N entries */ nblocks = ( N - INB ) / NB + 1; /* * Compute my distance from the source process so that within this process * coordinate system, the source process is the process such that mydist=0. */ if( ( mydist = PROC - SRCPROC ) < 0 ) mydist += NPROCS; /* * When mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks of * size NB since I am not the source process, * * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks of * size NB since I am not the source process, * * when mydist = nblocks - ilocblk * NPROCS, * either the last block is not full and I own it, in which case * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last block * such that NB > LNB > 0; the local value Np corresponding to N is given * by Np = ilocblk * NB + LNB = N - INB + ( ilocblk - nblocks + 1 ) * NB; * or the last block is full and I am the first process owning only ilocblk * full blocks of size NB, that is N = INB + ( nblocks - 1 ) * NB and * Np = ilocblk * NB = N - INB + ( ilocblk - nblocks + 1 ) * NB. */ if( nblocks < NPROCS ) return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : N - INB + NB * ( 1 - nblocks ) ) ); ilocblk = nblocks / NPROCS; mydist -= nblocks - ilocblk * NPROCS; return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : ( ( mydist > 0 ) ? ilocblk * NB : N - INB + NB * ( ilocblk - nblocks + 1 ) ) ); } /* * End of PB_Cnumroc */ }