ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pirow2col.f
Go to the documentation of this file.
1  SUBROUTINE pirow2col( ICTXT, M, N, NB, VS, LDVS, VD, LDVD,
2  $ RSRC, CSRC, RDEST, CDEST, WORK)
3 *
4 * -- ScaLAPACK tools routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11  $ rdest, rsrc
12 * ..
13 * .. Array Arguments ..
14  INTEGER VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * Take a block of vectors with M total rows which are distributed over
21 * a row of processes, and distribute those rows over a column of
22 * processes. This routine minimizes communication by sending all
23 * information it has that a given process in the CDEST needs at once.
24 * To do this it uses the least common multiple (LCM) concept. This is
25 * simply the realization that if I have part of a vector split over a
26 * process row consisting of Q processes, and I want to send all of that
27 * vector that I own to a new vector distributed over P processes within
28 * a process column, that after I find the process in RDEST that owns
29 * the row of the vector I'm currently looking at, he will want every
30 * ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N).
31 *
32 * Arguments
33 * =========
34 *
35 * Rem: MP, resp. NQ, denotes the number of local rows, resp. local
36 * ==== columns, necessary to store a global vector of dimension M
37 * across P processes, resp. N over Q processes.
38 *
39 * ICTXT (global input) INTEGER
40 * The BLACS context handle, indicating the global context of
41 * the operation. The context itself is global.
42 *
43 * M (global input) INTEGER
44 * The number of global rows each vector has.
45 *
46 * N (global input) INTEGER
47 * The number of vectors in the vector block
48 *
49 * NB (global input) INTEGER
50 * The blocking factor used to divide the rows of the vector
51 * amongst the processes of a row.
52 *
53 * VS (local input) @(typec)
54 * Array of dimension (LDVS,N), the block of vectors stored on
55 * process row RSRC to be put into memory VD, and stored on
56 * process column CDEST.
57 *
58 * LDVS (local input) INTEGER
59 * The leading dimension of VS.
60 *
61 * VD (local output) @(typec)
62 * Array of dimension (LDVD,N), on output, the contents of VD
63 * stored on process column CDEST will be here.
64 *
65 * LDVD (local input) INTEGER
66 * The leading dimension of VD.
67 *
68 * RSRC (global input) INTEGER
69 * The process row VS is distributed over.
70 *
71 * CSRC (global input) INTEGER
72 * The process column the distributed block of vectors VS
73 * begins on.
74 *
75 * RDEST (global input) INTEGER
76 * The process row that VD begins on.
77 *
78 * CDEST (global input) INTEGER
79 * The process column to distribute VD over.
80 *
81 * WORK (local workspace) @(typec)
82 * Array, dimension (LDW). The required size of work varies:
83 * if( nprow.eq.npcol ) then
84 * LDW = 0; WORK not accessed.
85 * else
86 * lcm = least common multiple of process rows and columns.
87 * Mq = number of rows of VS on my process.
88 * npcol = number of process columns
89 * CEIL = the ceiling of given operation
90 * LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) )
91 * end if
92 *
93 * =====================================================================
94 *
95 * .. Local Scalars ..
96  INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB,
97  $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
98  $ nblocks, npcol, nprow, rblkskip
99 * ..
100 * .. External Subroutines ..
101  EXTERNAL blacs_gridinfo, igesd2d, igerv2d, ilacpy
102 * ..
103 * .. External Functions ..
104  INTEGER ILCM, NUMROC
105  EXTERNAL ilcm, numroc
106 * ..
107 * .. Executable Statements ..
108 *
109 *
110 * .. Initialize Variables ..
111 *
112  icpy = 0
113 *
114  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
115 *
116 * If we are not in special case for NPROW = NPCOL where there is no
117 * copying required
118 *
119  IF( nprow .NE. npcol ) THEN
120  lcm = ilcm( nprow, npcol )
121  rblkskip = lcm / npcol
122  cblkskip = lcm / nprow
123 *
124 * If I have part of VS, the source vector(s)
125 *
126  IF( myrow.EQ.rsrc ) THEN
127 *
128  istart = 1
129 *
130 * Figure my distance from CSRC: the process in CDEST the same
131 * distance from RDEST will want my first block
132 *
133  mydist = mod( npcol+mycol-csrc, npcol )
134  mq = numroc( m, nb, mycol, csrc, npcol )
135  irdest = mod( rdest+mydist, nprow )
136 *
137 * Loop over all possible destination processes
138 *
139  DO 20 k = 1, rblkskip
140  jj = 1
141 *
142 * If I am not destination process
143 *
144  IF( (myrow.NE.irdest).OR.(mycol.NE.cdest) ) THEN
145 *
146 * Pack all data I own that destination needs
147 *
148  DO 10 ii = istart, mq, nb*rblkskip
149  jb = min( nb, mq-ii+1 )
150  CALL ilacpy( 'G', jb, n, vs(ii,1), ldvs,
151  $ work(jj), jb )
152  jj = jj + nb*n
153  10 CONTINUE
154 *
155 * Figure how many rows are to be sent and send them if
156 * necessary, NOTE: will send extra if NB > JB
157 *
158  jj = jj - 1
159  IF( jj.GT.0 )
160  $ CALL igesd2d( ictxt, jj, 1, work, jj, irdest,
161  $ cdest )
162 *
163 * I am both source and destination, save where to start
164 * copying from for later use
165 *
166  ELSE
167  icpy = istart
168  END IF
169 *
170  istart = istart + nb
171  irdest = mod( irdest+npcol, nprow )
172  20 CONTINUE
173  END IF
174 *
175 * If I should receive info into VD
176 *
177  IF( mycol.EQ.cdest ) THEN
178 *
179  istart = 1
180 *
181 * Figure my distance from CDEST: the process in CSRC the same
182 * distance from RSRC will have my first block
183 *
184  mydist = mod( nprow+myrow-rdest, nprow )
185  mp = numroc( m, nb, myrow, rdest, nprow )
186  icsrc = mod( csrc+mydist, npcol )
187 *
188 * Loop over all sending processes
189 *
190  DO 50 k = 1, cblkskip
191 *
192 * If I don't already possess the required data
193 *
194  IF( (myrow.NE.rsrc).OR.(mycol.NE.icsrc) ) THEN
195 *
196 * Figure how many rows to receive, and receive them
197 * NOTE: may receive to much -- NB instead of JB
198 *
199  nblocks = (mp - istart + nb) / nb
200  jj = ((nblocks+cblkskip-1) / cblkskip)*nb
201  IF( jj.GT.0 )
202  $ CALL igerv2d( ictxt, jj, n, work, jj, rsrc, icsrc )
203 *
204 * Copy data to destination vector
205 *
206  jj = 1
207  DO 30 ii = istart, mp, nb*cblkskip
208  jb = min( nb, mp-ii+1 )
209  CALL ilacpy( 'G', jb, n, work(jj), jb, vd(ii,1),
210  $ ldvd )
211  jj = jj + nb*n
212  30 CONTINUE
213 *
214 * If I am both source and destination
215 *
216  ELSE
217  jj = icpy
218  DO 40 ii = istart, mp, nb*cblkskip
219  jb = min( nb, mp-ii+1 )
220  CALL ilacpy( 'G', jb, n, vs(jj,1), ldvs, vd(ii,1),
221  $ ldvd )
222  jj = jj + nb*rblkskip
223  40 CONTINUE
224  END IF
225  istart = istart + nb
226  icsrc = mod( icsrc+nprow, npcol )
227  50 CONTINUE
228  END IF
229 *
230 * if NPROW = NPCOL, there is a one-to-one correspondance between
231 * process rows and columns, so no work space or copying required
232 *
233  ELSE
234 *
235  IF( myrow.EQ.rsrc ) THEN
236 *
237 * Figure my distance from CSRC: the process in CDEST the same
238 * distance from RDEST will want my piece of the vector
239 *
240  mydist = mod( npcol+mycol-csrc, npcol )
241  mq = numroc( m, nb, mycol, csrc, npcol )
242  irdest = mod( rdest+mydist, nprow )
243  IF( (myrow.NE.irdest).OR.(mycol.NE.cdest) ) THEN
244  CALL igesd2d( ictxt, mq, n, vs, ldvs, irdest, cdest )
245  ELSE
246  CALL ilacpy( 'G', mq, n, vs, ldvs, vd, ldvd )
247  END IF
248  END IF
249  IF( mycol.EQ.cdest ) THEN
250 *
251 * Figure my distance from RDEST: the process in RSRC the same
252 * distance from CSRC will have my piece of the vector
253 *
254  mydist = mod( nprow+myrow-rdest, nprow )
255  mp = numroc( m, nb, myrow, rdest, nprow )
256  icsrc = mod( csrc+mydist, npcol )
257  IF( (mycol.NE.icsrc).OR.(myrow.NE. rsrc) )
258  $ CALL igerv2d( ictxt, mp, n, vd, ldvd, rsrc, icsrc )
259  END IF
260  END IF
261 *
262  RETURN
263 *
264 * End of PIROW2COL
265 *
266  END
ilacpy
subroutine ilacpy(UPLO, M, N, A, LDA, B, LDB)
Definition: ilacpy.f:2
pirow2col
subroutine pirow2col(ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, CSRC, RDEST, CDEST, WORK)
Definition: pirow2col.f:3
min
#define min(A, B)
Definition: pcgemr.c:181