|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 SUBROUTINE PCCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, 00002 $ CSRC, RDEST, CDEST, WORK) 00003 * 00004 * -- ScaLAPACK tools routine (version 1.7) -- 00005 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00006 * and University of California, Berkeley. 00007 * May 1, 1997 00008 * 00009 * .. Scalar Arguments .. 00010 INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, 00011 $ RDEST, RSRC 00012 * .. 00013 * .. Array Arguments .. 00014 COMPLEX VD( LDVD, * ), VS( LDVS, * ), WORK( * ) 00015 * .. 00016 * 00017 * Purpose 00018 * ======= 00019 * 00020 * Take a block of vectors with M total rows which are distributed over 00021 * a column of processes, and distribute those rows over a row of 00022 * processes. This routine minimizes communication by sending all 00023 * information it has that a given process in the RDEST needs at once. 00024 * To do this it uses the least common multiple (LCM) concept. This is 00025 * simply the realization that if I have part of a vector split over a 00026 * process column consisting of P processes, and I want to send all of 00027 * that vector that I own to a new vector distributed over Q processes 00028 * within a process row, that after I find the process in RDEST that 00029 * owns the row of the vector I'm currently looking at, he will want 00030 * every ( (LCM(P,Q) / P ) block of my vector (the block being of size 00031 * NB x N). 00032 * 00033 * Arguments 00034 * ========= 00035 * 00036 * Rem: MP, resp. NQ, denotes the number of local rows, resp. local 00037 * ==== columns, necessary to store a global vector of dimension M 00038 * across P processes, resp. N over Q processes. 00039 * 00040 * ICTXT (global input) INTEGER 00041 * The BLACS context handle, indicating the global context of 00042 * the operation. The context itself is global. 00043 * 00044 * M (global input) INTEGER 00045 * The number of global rows each vector has. 00046 * 00047 * N (global input) INTEGER 00048 * The number of vectors in the vector block. 00049 * 00050 * NB (global input) INTEGER 00051 * The blocking factor used to divide the rows of the vector 00052 * amongst the processes of a column. 00053 * 00054 * VS (local input) COMPLEX 00055 * Array of dimension (LDVS,N), the block of vectors stored on 00056 * process column CSRC to be put into memory VD, and stored 00057 * on process row RDEST. 00058 * 00059 * LDVS (local input) INTEGER 00060 * The leading dimension of VS, LDVS >= MAX( 1, MP ). 00061 * 00062 * VD (local output) COMPLEX 00063 * Array of dimension (LDVD,N), on output, the contents of VS 00064 * stored on process row RDEST will be here. 00065 * 00066 * LDVD (local input) INTEGER 00067 * The leading dimension of VD, LDVD >= MAX( 1, MQ ). 00068 * 00069 * RSRC (global input) INTEGER 00070 * The process row the distributed block of vectors VS begins 00071 * on. 00072 * 00073 * CSRC (global input) INTEGER 00074 * The process column VS is distributed over. 00075 * 00076 * RDEST (global input) INTEGER 00077 * The process row to distribute VD over. 00078 * 00079 * CDEST (global input) INTEGER 00080 * The process column that VD begins on. 00081 * 00082 * WORK (local workspace) COMPLEX 00083 * Array of dimension (LDW), the required size of work varies: 00084 * if( nprow.eq.npcol ) then 00085 * LDW = 0; WORK not accessed. 00086 * else 00087 * lcm = least common multiple of process rows and columns. 00088 * Mp = number of rows of VS on my process. 00089 * nprow = number of process rows 00090 * CEIL = the ceiling of given operation 00091 * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) ) 00092 * end if 00093 * 00094 * ===================================================================== 00095 * 00096 * .. Local Scalars .. 00097 INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB, 00098 $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, 00099 $ NBLOCKS, NPCOL, NPROW, RBLKSKIP 00100 * .. 00101 * .. External Subroutines .. 00102 EXTERNAL BLACS_GRIDINFO, CGESD2D, CGERV2D, CLACPY 00103 * .. 00104 * .. External Functions .. 00105 INTEGER ILCM, NUMROC 00106 EXTERNAL ILCM, NUMROC 00107 * .. 00108 * .. Executable Statements .. 00109 * 00110 ICPY = 0 00111 * 00112 * Get grid parameters. 00113 * 00114 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) 00115 * 00116 * If we are not in special case for NPROW = NPCOL where there 00117 * is no copying required 00118 * 00119 IF( NPROW.NE.NPCOL ) THEN 00120 LCM = ILCM( NPROW, NPCOL ) 00121 RBLKSKIP = LCM / NPCOL 00122 CBLKSKIP = LCM / NPROW 00123 * 00124 * If I have part of VS, the source vector(s) 00125 * 00126 IF( MYCOL.EQ.CSRC ) THEN 00127 * 00128 ISTART = 1 00129 * 00130 * Figure my distance from RSRC: the process in RDEST the same 00131 * distance from CDEST will want my first block 00132 * 00133 MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) 00134 MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) 00135 ICDEST = MOD( CDEST+MYDIST, NPCOL ) 00136 * 00137 * Loop over all possible destination processes 00138 * 00139 DO 20 K = 1, CBLKSKIP 00140 JJ = 1 00141 * 00142 * If I am not destination process 00143 * 00144 IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN 00145 * 00146 * Pack all data I own that destination needs 00147 * 00148 DO 10 II = ISTART, MP, NB*CBLKSKIP 00149 JB = MIN(NB, MP-II+1) 00150 CALL CLACPY( 'G', JB, N, VS(II,1), LDVS, 00151 $ WORK(JJ), JB ) 00152 JJ = JJ + NB*N 00153 10 CONTINUE 00154 * 00155 * Figure how many rows are to be sent and send them if 00156 * necessary (NOTE: will send extra if NB > JB) 00157 * 00158 JJ = JJ - 1 00159 IF( JJ.GT.0 ) 00160 $ CALL CGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST, 00161 $ ICDEST ) 00162 * 00163 ELSE 00164 * 00165 * I am both source and destination, save where to start 00166 * copying from for later use. 00167 * 00168 ICPY = ISTART 00169 END IF 00170 * 00171 ISTART = ISTART + NB 00172 ICDEST = MOD(ICDEST+NPROW, NPCOL) 00173 20 CONTINUE 00174 END IF 00175 * 00176 * If I should receive info into VD 00177 * 00178 IF( MYROW.EQ.RDEST ) THEN 00179 * 00180 ISTART = 1 00181 * 00182 * Figure my distance from CDEST: the process in CSRC the same 00183 * distance from RSRC will have my first block. 00184 * 00185 MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) 00186 MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) 00187 IRSRC = MOD( RSRC+MYDIST, NPROW ) 00188 DO 50 K = 1, RBLKSKIP 00189 * 00190 * If I don't already possess the required data 00191 * 00192 IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN 00193 * 00194 * Figure how many rows to receive, and receive them 00195 * NOTE: may receive to much -- NB instead of JB 00196 * 00197 NBLOCKS = (MQ - ISTART + NB) / NB 00198 JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB 00199 IF( JJ.GT.0 ) 00200 $ CALL CGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC ) 00201 * 00202 * Copy data to destination vector 00203 * 00204 JJ = 1 00205 DO 30 II = ISTART, MQ, NB*RBLKSKIP 00206 JB = MIN( NB, MQ-II+1 ) 00207 CALL CLACPY( 'G', JB, N, WORK(JJ), JB, 00208 $ VD(II,1), LDVD ) 00209 JJ = JJ + NB*N 00210 30 CONTINUE 00211 * 00212 * If I am both source and destination 00213 * 00214 ELSE 00215 JJ = ICPY 00216 DO 40 II = ISTART, MQ, NB*RBLKSKIP 00217 JB = MIN( NB, MQ-II+1 ) 00218 CALL CLACPY( 'G', JB, N, VS(JJ,1), LDVS, 00219 $ VD(II,1), LDVD ) 00220 JJ = JJ + NB*CBLKSKIP 00221 40 CONTINUE 00222 END IF 00223 ISTART = ISTART + NB 00224 IRSRC = MOD( IRSRC+NPCOL, NPROW ) 00225 50 CONTINUE 00226 END IF 00227 * 00228 * If NPROW = NPCOL, there is a one-to-one correspondance between 00229 * process rows and columns, so no work space or copying required 00230 * 00231 ELSE 00232 * 00233 IF( MYCOL.EQ.CSRC ) THEN 00234 * 00235 * Figure my distance from RSRC: the process in RDEST the same 00236 * distance from CDEST will want my piece of the vector. 00237 * 00238 MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) 00239 MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) 00240 ICDEST = MOD( CDEST+MYDIST, NPCOL ) 00241 * 00242 IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN 00243 CALL CGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST ) 00244 ELSE 00245 CALL CLACPY( 'G', MP, N, VS, LDVS, VD, LDVD ) 00246 END IF 00247 END IF 00248 * 00249 IF( MYROW.EQ.RDEST ) THEN 00250 * 00251 * Figure my distance from CDEST: the process in CSRC the same 00252 * distance from RSRC will have my piece of the vector. 00253 * 00254 MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) 00255 MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) 00256 IRSRC = MOD( RSRC+MYDIST, NPROW ) 00257 * 00258 IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) ) 00259 $ CALL CGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC ) 00260 * 00261 END IF 00262 * 00263 END IF 00264 * 00265 RETURN 00266 * 00267 * End of PCCOL2ROW 00268 * 00269 END