ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pccol2row.f
Go to the documentation of this file.
00001       SUBROUTINE PCCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
00002      $                      CSRC, RDEST, CDEST, WORK)
00003 *
00004 *  -- ScaLAPACK tools routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       INTEGER            CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
00011      $                   RDEST, RSRC
00012 *     ..
00013 *     .. Array Arguments ..
00014       COMPLEX            VD( LDVD, * ), VS( LDVS, * ), WORK( * )
00015 *     ..
00016 *
00017 *  Purpose
00018 *  =======
00019 *
00020 *  Take a block of vectors with M total rows which are distributed over
00021 *  a column of processes, and distribute those rows over a row of
00022 *  processes. This routine minimizes communication by sending all
00023 *  information it has that a given process in the RDEST needs at once.
00024 *  To do this it uses the least common multiple (LCM) concept.  This is
00025 *  simply the realization that if I have part of a vector split over a
00026 *  process column consisting of P processes, and I want to send all of
00027 *  that vector that I own to a new vector distributed over Q processes
00028 *  within a process row, that after I find the process in RDEST that
00029 *  owns the row of the vector I'm currently looking at, he will want
00030 *  every ( (LCM(P,Q) / P ) block of my vector (the block being of size
00031 *  NB x N).
00032 *
00033 *  Arguments
00034 *  =========
00035 *
00036 *  Rem:  MP, resp. NQ, denotes the number of local rows, resp. local
00037 *  ====  columns, necessary to store a global vector of dimension M
00038 *        across P processes, resp. N over Q processes.
00039 *
00040 *  ICTXT   (global input) INTEGER
00041 *          The BLACS context handle, indicating the global context of
00042 *          the operation. The context itself is global.
00043 *
00044 *  M       (global input) INTEGER
00045 *          The number of global rows each vector has.
00046 *
00047 *  N       (global input) INTEGER
00048 *          The number of vectors in the vector block.
00049 *
00050 *  NB      (global input) INTEGER
00051 *          The blocking factor used to divide the rows of the vector
00052 *          amongst the processes of a column.
00053 *
00054 *  VS      (local input) COMPLEX
00055 *          Array of dimension (LDVS,N), the block of vectors stored on
00056 *          process column CSRC to be put into memory VD, and stored
00057 *          on process row RDEST.
00058 *
00059 *  LDVS    (local input) INTEGER
00060 *          The leading dimension of VS, LDVS >= MAX( 1, MP ).
00061 *
00062 *  VD      (local output) COMPLEX
00063 *          Array of dimension (LDVD,N), on output, the contents of VS
00064 *          stored on process row RDEST will be here.
00065 *
00066 *  LDVD    (local input) INTEGER
00067 *          The leading dimension of VD, LDVD >= MAX( 1, MQ ).
00068 *
00069 *  RSRC    (global input) INTEGER
00070 *          The process row the distributed block of vectors VS begins
00071 *          on.
00072 *
00073 *  CSRC    (global input) INTEGER
00074 *          The process column VS is distributed over.
00075 *
00076 *  RDEST   (global input) INTEGER
00077 *          The process row to distribute VD over.
00078 *
00079 *  CDEST   (global input) INTEGER
00080 *          The process column that VD begins on.
00081 *
00082 *  WORK    (local workspace) COMPLEX
00083 *          Array of dimension (LDW), the required size of work varies:
00084 *          if( nprow.eq.npcol ) then
00085 *             LDW = 0; WORK not accessed.
00086 *          else
00087 *             lcm = least common multiple of process rows and columns.
00088 *             Mp  = number of rows of VS on my process.
00089 *             nprow = number of process rows
00090 *             CEIL = the ceiling of given operation
00091 *             LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
00092 *          end if
00093 *
00094 *  =====================================================================
00095 *
00096 *     .. Local Scalars ..
00097       INTEGER            CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
00098      $                   JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW,
00099      $                   NBLOCKS, NPCOL, NPROW, RBLKSKIP
00100 *     ..
00101 *     .. External Subroutines ..
00102       EXTERNAL           BLACS_GRIDINFO, CGESD2D, CGERV2D, CLACPY
00103 *     ..
00104 *     .. External Functions ..
00105       INTEGER            ILCM, NUMROC
00106       EXTERNAL           ILCM, NUMROC
00107 *     ..
00108 *     .. Executable Statements ..
00109 *
00110         ICPY = 0
00111 *
00112 *     Get grid parameters.
00113 *       
00114       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00115 *
00116 *     If we are not in special case for NPROW = NPCOL where there
00117 *     is no copying required
00118 *
00119       IF( NPROW.NE.NPCOL ) THEN
00120          LCM = ILCM( NPROW, NPCOL )
00121          RBLKSKIP = LCM / NPCOL
00122          CBLKSKIP = LCM / NPROW
00123 *
00124 *        If I have part of VS, the source vector(s)
00125 *
00126          IF( MYCOL.EQ.CSRC ) THEN
00127 *
00128             ISTART = 1
00129 *
00130 *           Figure my distance from RSRC: the process in RDEST the same
00131 *           distance from CDEST will want my first block
00132 *
00133             MYDIST = MOD( NPROW+MYROW-RSRC, NPROW )
00134             MP = NUMROC( M, NB, MYROW, RSRC, NPROW )
00135             ICDEST = MOD( CDEST+MYDIST, NPCOL )
00136 *
00137 *           Loop over all possible destination processes
00138 *
00139             DO 20 K = 1, CBLKSKIP
00140                JJ = 1
00141 *
00142 *              If I am not destination process
00143 *
00144                IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN
00145 *
00146 *                 Pack all data I own that destination needs
00147 *
00148                   DO 10 II = ISTART, MP, NB*CBLKSKIP
00149                      JB = MIN(NB, MP-II+1)
00150                      CALL CLACPY( 'G', JB, N, VS(II,1), LDVS,
00151      $                            WORK(JJ), JB )
00152                      JJ = JJ + NB*N
00153    10             CONTINUE
00154 *
00155 *                 Figure how many rows are to be sent and send them if
00156 *                 necessary (NOTE: will send extra if NB > JB)
00157 *
00158                   JJ = JJ - 1
00159                   IF( JJ.GT.0 )
00160      $               CALL CGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST,
00161      $                             ICDEST )
00162 *
00163                ELSE
00164 *
00165 *                 I am both source and destination, save where to start
00166 *                 copying from for later use.
00167 *
00168                   ICPY = ISTART
00169                END IF
00170 *
00171                ISTART = ISTART + NB
00172                ICDEST = MOD(ICDEST+NPROW, NPCOL)
00173    20       CONTINUE
00174          END IF
00175 *
00176 *        If I should receive info into VD
00177 *
00178          IF( MYROW.EQ.RDEST ) THEN
00179 *
00180             ISTART = 1
00181 *
00182 *           Figure my distance from CDEST: the process in CSRC the same
00183 *           distance from RSRC will have my first block.
00184 *
00185             MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL )
00186             MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL )
00187             IRSRC = MOD( RSRC+MYDIST, NPROW )
00188             DO 50 K = 1, RBLKSKIP
00189 *
00190 *              If I don't already possess the required data
00191 *
00192                IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN
00193 *
00194 *                 Figure how many rows to receive, and receive them
00195 *                 NOTE: may receive to much -- NB instead of JB
00196 *
00197                   NBLOCKS = (MQ - ISTART + NB) / NB
00198                   JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB
00199                   IF( JJ.GT.0 )
00200      $               CALL CGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC )
00201 *
00202 *                 Copy data to destination vector
00203 *
00204                   JJ = 1
00205                   DO 30 II = ISTART, MQ, NB*RBLKSKIP
00206                      JB = MIN( NB, MQ-II+1 )
00207                      CALL CLACPY( 'G', JB, N, WORK(JJ), JB,
00208      $                            VD(II,1), LDVD )
00209                      JJ = JJ + NB*N
00210    30             CONTINUE
00211 *
00212 *                 If I am both source and destination
00213 *
00214                ELSE
00215                   JJ = ICPY
00216                   DO 40 II = ISTART, MQ, NB*RBLKSKIP
00217                      JB = MIN( NB, MQ-II+1 )
00218                      CALL CLACPY( 'G', JB, N, VS(JJ,1), LDVS,
00219      $                            VD(II,1), LDVD )
00220                      JJ = JJ + NB*CBLKSKIP
00221    40             CONTINUE
00222                END IF
00223                ISTART = ISTART + NB
00224                IRSRC = MOD( IRSRC+NPCOL, NPROW )
00225    50       CONTINUE
00226          END IF
00227 *
00228 *     If NPROW = NPCOL, there is a one-to-one correspondance between
00229 *     process rows and columns, so no work space or copying required
00230 *
00231       ELSE
00232 *
00233          IF( MYCOL.EQ.CSRC ) THEN
00234 *
00235 *           Figure my distance from RSRC: the process in RDEST the same
00236 *           distance from CDEST will want my piece of the vector.
00237 *
00238             MYDIST = MOD( NPROW+MYROW-RSRC, NPROW )
00239             MP = NUMROC( M, NB, MYROW, RSRC, NPROW )
00240             ICDEST = MOD( CDEST+MYDIST, NPCOL )
00241 *
00242             IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN
00243                CALL CGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST )
00244             ELSE
00245                CALL CLACPY( 'G', MP, N, VS, LDVS, VD, LDVD )
00246             END IF
00247          END IF
00248 *
00249          IF( MYROW.EQ.RDEST ) THEN
00250 *
00251 *           Figure my distance from CDEST: the process in CSRC the same
00252 *           distance from RSRC will have my piece of the vector.
00253 *
00254             MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL )
00255             MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL )
00256             IRSRC = MOD( RSRC+MYDIST, NPROW )
00257 *
00258             IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) )
00259      $         CALL CGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC )
00260 *
00261           END IF
00262 *
00263       END IF
00264 *
00265       RETURN
00266 *
00267 *     End of PCCOL2ROW
00268 *
00269       END