SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pscol2row.f
Go to the documentation of this file.
1 SUBROUTINE pscol2row( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
2 $ CSRC, RDEST, CDEST, WORK)
3*
4* -- ScaLAPACK tools routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11 $ rdest, rsrc
12* ..
13* .. Array Arguments ..
14 REAL VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* Take a block of vectors with M total rows which are distributed over
21* a column of processes, and distribute those rows over a row of
22* processes. This routine minimizes communication by sending all
23* information it has that a given process in the RDEST needs at once.
24* To do this it uses the least common multiple (LCM) concept. This is
25* simply the realization that if I have part of a vector split over a
26* process column consisting of P processes, and I want to send all of
27* that vector that I own to a new vector distributed over Q processes
28* within a process row, that after I find the process in RDEST that
29* owns the row of the vector I'm currently looking at, he will want
30* every ( (LCM(P,Q) / P ) block of my vector (the block being of size
31* NB x N).
32*
33* Arguments
34* =========
35*
36* Rem: MP, resp. NQ, denotes the number of local rows, resp. local
37* ==== columns, necessary to store a global vector of dimension M
38* across P processes, resp. N over Q processes.
39*
40* ICTXT (global input) INTEGER
41* The BLACS context handle, indicating the global context of
42* the operation. The context itself is global.
43*
44* M (global input) INTEGER
45* The number of global rows each vector has.
46*
47* N (global input) INTEGER
48* The number of vectors in the vector block.
49*
50* NB (global input) INTEGER
51* The blocking factor used to divide the rows of the vector
52* amongst the processes of a column.
53*
54* VS (local input) REAL
55* Array of dimension (LDVS,N), the block of vectors stored on
56* process column CSRC to be put into memory VD, and stored
57* on process row RDEST.
58*
59* LDVS (local input) INTEGER
60* The leading dimension of VS, LDVS >= MAX( 1, MP ).
61*
62* VD (local output) REAL
63* Array of dimension (LDVD,N), on output, the contents of VS
64* stored on process row RDEST will be here.
65*
66* LDVD (local input) INTEGER
67* The leading dimension of VD, LDVD >= MAX( 1, MQ ).
68*
69* RSRC (global input) INTEGER
70* The process row the distributed block of vectors VS begins
71* on.
72*
73* CSRC (global input) INTEGER
74* The process column VS is distributed over.
75*
76* RDEST (global input) INTEGER
77* The process row to distribute VD over.
78*
79* CDEST (global input) INTEGER
80* The process column that VD begins on.
81*
82* WORK (local workspace) REAL
83* Array of dimension (LDW), the required size of work varies:
84* if( nprow.eq.npcol ) then
85* LDW = 0; WORK not accessed.
86* else
87* lcm = least common multiple of process rows and columns.
88* Mp = number of rows of VS on my process.
89* nprow = number of process rows
90* CEIL = the ceiling of given operation
91* LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
92* end if
93*
94* =====================================================================
95*
96* .. Local Scalars ..
97 INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
98 $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
99 $ nblocks, npcol, nprow, rblkskip
100* ..
101* .. External Subroutines ..
102 EXTERNAL blacs_gridinfo, sgesd2d, sgerv2d, slacpy
103* ..
104* .. External Functions ..
105 INTEGER ILCM, NUMROC
106 EXTERNAL ilcm, numroc
107* ..
108* .. Executable Statements ..
109*
110*
111* .. Initialize Variables ..
112*
113 icpy = 0
114*
115* Get grid parameters.
116*
117 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
118*
119* If we are not in special case for NPROW = NPCOL where there
120* is no copying required
121*
122 IF( nprow.NE.npcol ) THEN
123 lcm = ilcm( nprow, npcol )
124 rblkskip = lcm / npcol
125 cblkskip = lcm / nprow
126*
127* If I have part of VS, the source vector(s)
128*
129 IF( mycol.EQ.csrc ) THEN
130*
131 istart = 1
132*
133* Figure my distance from RSRC: the process in RDEST the same
134* distance from CDEST will want my first block
135*
136 mydist = mod( nprow+myrow-rsrc, nprow )
137 mp = numroc( m, nb, myrow, rsrc, nprow )
138 icdest = mod( cdest+mydist, npcol )
139*
140* Loop over all possible destination processes
141*
142 DO 20 k = 1, cblkskip
143 jj = 1
144*
145* If I am not destination process
146*
147 IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
148*
149* Pack all data I own that destination needs
150*
151 DO 10 ii = istart, mp, nb*cblkskip
152 jb = min(nb, mp-ii+1)
153 CALL slacpy( 'G', jb, n, vs(ii,1), ldvs,
154 $ work(jj), jb )
155 jj = jj + nb*n
156 10 CONTINUE
157*
158* Figure how many rows are to be sent and send them if
159* necessary (NOTE: will send extra if NB > JB)
160*
161 jj = jj - 1
162 IF( jj.GT.0 )
163 $ CALL sgesd2d( ictxt, jj, 1, work, jj, rdest,
164 $ icdest )
165*
166 ELSE
167*
168* I am both source and destination, save where to start
169* copying from for later use.
170*
171 icpy = istart
172 END IF
173*
174 istart = istart + nb
175 icdest = mod(icdest+nprow, npcol)
176 20 CONTINUE
177 END IF
178*
179* If I should receive info into VD
180*
181 IF( myrow.EQ.rdest ) THEN
182*
183 istart = 1
184*
185* Figure my distance from CDEST: the process in CSRC the same
186* distance from RSRC will have my first block.
187*
188 mydist = mod( npcol+mycol-cdest, npcol )
189 mq = numroc( m, nb, mycol, cdest, npcol )
190 irsrc = mod( rsrc+mydist, nprow )
191 DO 50 k = 1, rblkskip
192*
193* If I don't already possess the required data
194*
195 IF( (mycol.NE.csrc).OR.(myrow.NE.irsrc) ) THEN
196*
197* Figure how many rows to receive, and receive them
198* NOTE: may receive to much -- NB instead of JB
199*
200 nblocks = (mq - istart + nb) / nb
201 jj = ((nblocks+rblkskip-1) / rblkskip)*nb
202 IF( jj.GT.0 )
203 $ CALL sgerv2d( ictxt, jj, n, work, jj, irsrc, csrc )
204*
205* Copy data to destination vector
206*
207 jj = 1
208 DO 30 ii = istart, mq, nb*rblkskip
209 jb = min( nb, mq-ii+1 )
210 CALL slacpy( 'G', jb, n, work(jj), jb,
211 $ vd(ii,1), ldvd )
212 jj = jj + nb*n
213 30 CONTINUE
214*
215* If I am both source and destination
216*
217 ELSE
218 jj = icpy
219 DO 40 ii = istart, mq, nb*rblkskip
220 jb = min( nb, mq-ii+1 )
221 CALL slacpy( 'G', jb, n, vs(jj,1), ldvs,
222 $ vd(ii,1), ldvd )
223 jj = jj + nb*cblkskip
224 40 CONTINUE
225 END IF
226 istart = istart + nb
227 irsrc = mod( irsrc+npcol, nprow )
228 50 CONTINUE
229 END IF
230*
231* If NPROW = NPCOL, there is a one-to-one correspondance between
232* process rows and columns, so no work space or copying required
233*
234 ELSE
235*
236 IF( mycol.EQ.csrc ) THEN
237*
238* Figure my distance from RSRC: the process in RDEST the same
239* distance from CDEST will want my piece of the vector.
240*
241 mydist = mod( nprow+myrow-rsrc, nprow )
242 mp = numroc( m, nb, myrow, rsrc, nprow )
243 icdest = mod( cdest+mydist, npcol )
244*
245 IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
246 CALL sgesd2d( ictxt, mp, n, vs, ldvs, rdest, icdest )
247 ELSE
248 CALL slacpy( 'G', mp, n, vs, ldvs, vd, ldvd )
249 END IF
250 END IF
251*
252 IF( myrow.EQ.rdest ) THEN
253*
254* Figure my distance from CDEST: the process in CSRC the same
255* distance from RSRC will have my piece of the vector.
256*
257 mydist = mod( npcol+mycol-cdest, npcol )
258 mq = numroc( m, nb, mycol, cdest, npcol )
259 irsrc = mod( rsrc+mydist, nprow )
260*
261 IF( (myrow.NE.irsrc).OR.(mycol.NE.csrc) )
262 $ CALL sgerv2d( ictxt, mq, n, vd, ldvd, irsrc, csrc )
263*
264 END IF
265*
266 END IF
267*
268 RETURN
269*
270* End of PSCOL2ROW
271*
272 END
#define min(A, B)
Definition pcgemr.c:181
subroutine pscol2row(ictxt, m, n, nb, vs, ldvs, vd, ldvd, rsrc, csrc, rdest, cdest, work)
Definition pscol2row.f:3