SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pcrow2col()

subroutine pcrow2col ( integer  ictxt,
integer  m,
integer  n,
integer  nb,
complex, dimension( ldvs, * )  vs,
integer  ldvs,
complex, dimension( ldvd, * )  vd,
integer  ldvd,
integer  rsrc,
integer  csrc,
integer  rdest,
integer  cdest,
complex, dimension( * )  work 
)

Definition at line 1 of file pcrow2col.f.

3*
4* -- ScaLAPACK tools routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11 $ RDEST, RSRC
12* ..
13* .. Array Arguments ..
14 COMPLEX VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* Take a block of vectors with M total rows which are distributed over
21* a row of processes, and distribute those rows over a column of
22* processes. This routine minimizes communication by sending all
23* information it has that a given process in the CDEST needs at once.
24* To do this it uses the least common multiple (LCM) concept. This is
25* simply the realization that if I have part of a vector split over a
26* process row consisting of Q processes, and I want to send all of that
27* vector that I own to a new vector distributed over P processes within
28* a process column, that after I find the process in RDEST that owns
29* the row of the vector I'm currently looking at, he will want every
30* ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N).
31*
32* Arguments
33* =========
34*
35* Rem: MP, resp. NQ, denotes the number of local rows, resp. local
36* ==== columns, necessary to store a global vector of dimension M
37* across P processes, resp. N over Q processes.
38*
39* ICTXT (global input) INTEGER
40* The BLACS context handle, indicating the global context of
41* the operation. The context itself is global.
42*
43* M (global input) INTEGER
44* The number of global rows each vector has.
45*
46* N (global input) INTEGER
47* The number of vectors in the vector block
48*
49* NB (global input) INTEGER
50* The blocking factor used to divide the rows of the vector
51* amongst the processes of a row.
52*
53* VS (local input) COMPLEX
54* Array of dimension (LDVS,N), the block of vectors stored on
55* process row RSRC to be put into memory VD, and stored on
56* process column CDEST.
57*
58* LDVS (local input) INTEGER
59* The leading dimension of VS.
60*
61* VD (local output) COMPLEX
62* Array of dimension (LDVD,N), on output, the contents of VD
63* stored on process column CDEST will be here.
64*
65* LDVD (local input) INTEGER
66* The leading dimension of VD.
67*
68* RSRC (global input) INTEGER
69* The process row VS is distributed over.
70*
71* CSRC (global input) INTEGER
72* The process column the distributed block of vectors VS
73* begins on.
74*
75* RDEST (global input) INTEGER
76* The process row that VD begins on.
77*
78* CDEST (global input) INTEGER
79* The process column to distribute VD over.
80*
81* WORK (local workspace) COMPLEX
82* Array, dimension (LDW). The required size of work varies:
83* if( nprow.eq.npcol ) then
84* LDW = 0; WORK not accessed.
85* else
86* lcm = least common multiple of process rows and columns.
87* Mq = number of rows of VS on my process.
88* npcol = number of process columns
89* CEIL = the ceiling of given operation
90* LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) )
91* end if
92*
93* =====================================================================
94*
95* .. Local Scalars ..
96 INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB,
97 $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW,
98 $ NBLOCKS, NPCOL, NPROW, RBLKSKIP
99* ..
100* .. External Subroutines ..
101 EXTERNAL blacs_gridinfo, cgesd2d, cgerv2d, clacpy
102* ..
103* .. External Functions ..
104 INTEGER ILCM, NUMROC
105 EXTERNAL ilcm, numroc
106* ..
107* .. Executable Statements ..
108*
109 icpy = 0
110*
111 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
112*
113* If we are not in special case for NPROW = NPCOL where there is no
114* copying required
115*
116 IF( nprow .NE. npcol ) THEN
117 lcm = ilcm( nprow, npcol )
118 rblkskip = lcm / npcol
119 cblkskip = lcm / nprow
120*
121* If I have part of VS, the source vector(s)
122*
123 IF( myrow.EQ.rsrc ) THEN
124*
125 istart = 1
126*
127* Figure my distance from CSRC: the process in CDEST the same
128* distance from RDEST will want my first block
129*
130 mydist = mod( npcol+mycol-csrc, npcol )
131 mq = numroc( m, nb, mycol, csrc, npcol )
132 irdest = mod( rdest+mydist, nprow )
133*
134* Loop over all possible destination processes
135*
136 DO 20 k = 1, rblkskip
137 jj = 1
138*
139* If I am not destination process
140*
141 IF( (myrow.NE.irdest).OR.(mycol.NE.cdest) ) THEN
142*
143* Pack all data I own that destination needs
144*
145 DO 10 ii = istart, mq, nb*rblkskip
146 jb = min( nb, mq-ii+1 )
147 CALL clacpy( 'G', jb, n, vs(ii,1), ldvs,
148 $ work(jj), jb )
149 jj = jj + nb*n
150 10 CONTINUE
151*
152* Figure how many rows are to be sent and send them if
153* necessary, NOTE: will send extra if NB > JB
154*
155 jj = jj - 1
156 IF( jj.GT.0 )
157 $ CALL cgesd2d( ictxt, jj, 1, work, jj, irdest,
158 $ cdest )
159*
160* I am both source and destination, save where to start
161* copying from for later use
162*
163 ELSE
164 icpy = istart
165 END IF
166*
167 istart = istart + nb
168 irdest = mod( irdest+npcol, nprow )
169 20 CONTINUE
170 END IF
171*
172* If I should receive info into VD
173*
174 IF( mycol.EQ.cdest ) THEN
175*
176 istart = 1
177*
178* Figure my distance from CDEST: the process in CSRC the same
179* distance from RSRC will have my first block
180*
181 mydist = mod( nprow+myrow-rdest, nprow )
182 mp = numroc( m, nb, myrow, rdest, nprow )
183 icsrc = mod( csrc+mydist, npcol )
184*
185* Loop over all sending processes
186*
187 DO 50 k = 1, cblkskip
188*
189* If I don't already possess the required data
190*
191 IF( (myrow.NE.rsrc).OR.(mycol.NE.icsrc) ) THEN
192*
193* Figure how many rows to receive, and receive them
194* NOTE: may receive to much -- NB instead of JB
195*
196 nblocks = (mp - istart + nb) / nb
197 jj = ((nblocks+cblkskip-1) / cblkskip)*nb
198 IF( jj.GT.0 )
199 $ CALL cgerv2d( ictxt, jj, n, work, jj, rsrc, icsrc )
200*
201* Copy data to destination vector
202*
203 jj = 1
204 DO 30 ii = istart, mp, nb*cblkskip
205 jb = min( nb, mp-ii+1 )
206 CALL clacpy( 'G', jb, n, work(jj), jb, vd(ii,1),
207 $ ldvd )
208 jj = jj + nb*n
209 30 CONTINUE
210*
211* If I am both source and destination
212*
213 ELSE
214 jj = icpy
215 DO 40 ii = istart, mp, nb*cblkskip
216 jb = min( nb, mp-ii+1 )
217 CALL clacpy( 'G', jb, n, vs(jj,1), ldvs, vd(ii,1),
218 $ ldvd )
219 jj = jj + nb*rblkskip
220 40 CONTINUE
221 END IF
222 istart = istart + nb
223 icsrc = mod( icsrc+nprow, npcol )
224 50 CONTINUE
225 END IF
226*
227* if NPROW = NPCOL, there is a one-to-one correspondance between
228* process rows and columns, so no work space or copying required
229*
230 ELSE
231*
232 IF( myrow.EQ.rsrc ) THEN
233*
234* Figure my distance from CSRC: the process in CDEST the same
235* distance from RDEST will want my piece of the vector
236*
237 mydist = mod( npcol+mycol-csrc, npcol )
238 mq = numroc( m, nb, mycol, csrc, npcol )
239 irdest = mod( rdest+mydist, nprow )
240 IF( (myrow.NE.irdest).OR.(mycol.NE.cdest) ) THEN
241 CALL cgesd2d( ictxt, mq, n, vs, ldvs, irdest, cdest )
242 ELSE
243 CALL clacpy( 'G', mq, n, vs, ldvs, vd, ldvd )
244 END IF
245 END IF
246 IF( mycol.EQ.cdest ) THEN
247*
248* Figure my distance from RDEST: the process in RSRC the same
249* distance from CSRC will have my piece of the vector
250*
251 mydist = mod( nprow+myrow-rdest, nprow )
252 mp = numroc( m, nb, myrow, rdest, nprow )
253 icsrc = mod( csrc+mydist, npcol )
254 IF( (mycol.NE.icsrc).OR.(myrow.NE. rsrc) )
255 $ CALL cgerv2d( ictxt, mp, n, vd, ldvd, rsrc, icsrc )
256 END IF
257 END IF
258*
259 RETURN
260*
261* End of PCROW2COL
262*
integer function ilcm(m, n)
Definition ilcm.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define min(A, B)
Definition pcgemr.c:181