SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslaevswp.f
Go to the documentation of this file.
1*
2*
3 SUBROUTINE pslaevswp( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY,
4 $ WORK, LWORK )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* April 15, 1997
10*
11* .. Scalar Arguments ..
12 INTEGER IZ, JZ, LDZI, LWORK, N
13* ..
14* .. Array Arguments ..
15 INTEGER DESCZ( * ), KEY( * ), NVS( * )
16 REAL WORK( * ), Z( * ), ZIN( LDZI, * )
17* ..
18*
19* Purpose
20* =======
21*
22* PSLAEVSWP moves the eigenvectors (potentially unsorted) from
23* where they are computed, to a ScaLAPACK standard block cyclic
24* array, sorted so that the corresponding eigenvalues are sorted.
25*
26* Notes
27* =====
28*
29*
30* Each global data object is described by an associated description
31* vector. This vector stores the information required to establish
32* the mapping between an object element and its corresponding process
33* and memory location.
34*
35* Let A be a generic term for any 2D block cyclicly distributed array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- --------------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45* the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54* the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56* the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58* row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCr(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCr( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCc( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes of
72* its process row.
73* The values of LOCr() and LOCc() may be determined via a call to the
74* ScaLAPACK tool function, NUMROC:
75* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81*
82* Arguments
83* =========
84*
85* NP = the number of rows local to a given process.
86* NQ = the number of columns local to a given process.
87*
88* N (global input) INTEGER
89* The order of the matrix A. N >= 0.
90*
91* ZIN (local input) REAL array,
92* dimension ( LDZI, NVS(iam) )
93* The eigenvectors on input. Each eigenvector resides entirely
94* in one process. Each process holds a contiguous set of
95* NVS(iam) eigenvectors. The first eigenvector which the
96* process holds is: sum for i=[0,iam-1) of NVS(i)
97*
98* LDZI (locl input) INTEGER
99* leading dimension of the ZIN array
100*
101* Z (local output) REAL array
102* global dimension (N, N), local dimension (DESCZ(DLEN_), NQ)
103* The eigenvectors on output. The eigenvectors are distributed
104* in a block cyclic manner in both dimensions, with a
105* block size of NB.
106*
107* IZ (global input) INTEGER
108* Z's global row index, which points to the beginning of the
109* submatrix which is to be operated on.
110*
111* JZ (global input) INTEGER
112* Z's global column index, which points to the beginning of
113* the submatrix which is to be operated on.
114*
115* DESCZ (global and local input) INTEGER array of dimension DLEN_.
116* The array descriptor for the distributed matrix Z.
117*
118* NVS (global input) INTEGER array, dimension( nprocs+1 )
119* nvs(i) = number of processes
120* number of eigenvectors held by processes [0,i-1)
121* nvs(1) = number of eigen vectors held by [0,1-1) == 0
122* nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) ==
123* total number of eigenvectors
124*
125* KEY (global input) INTEGER array, dimension( N )
126* Indicates the actual index (after sorting) for each of the
127* eigenvectors.
128*
129* WORK (local workspace) REAL array, dimension (LWORK)
130*
131* LWORK (local input) INTEGER dimension of WORK
132* .. Parameters ..
133 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
134 $ mb_, nb_, rsrc_, csrc_, lld_
135 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
136 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
137 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
138* ..
139* .. Local Scalars ..
140 INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J,
141 $ maxi, maxii, mini, minii, mycol, myrow, nb,
142 $ nbufsize, npcol, nprocs, nprow, pcol, recvcol,
143 $ recvfrom, recvrow, sendcol, sendrow, sendto
144* ..
145* .. External Functions ..
146 INTEGER INDXG2L, INDXG2P
147 EXTERNAL indxg2l, indxg2p
148* ..
149* .. External Subroutines ..
150 EXTERNAL blacs_gridinfo, sgerv2d, sgesd2d
151* ..
152* .. Intrinsic Functions ..
153 INTRINSIC max, min, mod
154* ..
155* .. Executable Statements ..
156* This is just to keep ftnchek happy
157 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
158 $ rsrc_.LT.0 )RETURN
159 CALL blacs_gridinfo( descz( ctxt_ ), nprow, npcol, myrow, mycol )
160 iam = myrow + mycol*nprow
161 iam = myrow*npcol + mycol
162*
163 nb = descz( mb_ )
164*
165 nprocs = nprow*npcol
166*
167* If PxSTEIN operates on a sub-matrix of a global matrix, the
168* key [] that contains the indicies of the eigenvectors is refe-
169* renced to the dimensions of the sub-matrix and not the global
170* distrubited matrix. Because of this, PxLAEVSWP will incorrectly
171* map the eigenvectors to the global eigenvector matrix, Z, unless
172* the key[] elements are shifted as below.
173*
174 DO 10 j = descz( n_ ), 1, -1
175 key( j ) = key( j-jz+1 ) + ( jz-1 )
176 10 CONTINUE
177*
178 DO 110 dist = 0, nprocs - 1
179*
180 sendto = mod( iam+dist, nprocs )
181 recvfrom = mod( nprocs+iam-dist, nprocs )
182*
183 sendrow = mod( sendto, nprow )
184 sendcol = sendto / nprow
185 recvrow = mod( recvfrom, nprow )
186 recvcol = recvfrom / nprow
187*
188 sendrow = sendto / npcol
189 sendcol = mod( sendto, npcol )
190 recvrow = recvfrom / npcol
191 recvcol = mod( recvfrom, npcol )
192*
193* Figure out what I have that process "sendto" wants
194*
195 nbufsize = 0
196*
197* We are looping through the eigenvectors that I presently own.
198*
199 DO 40 j = nvs( 1+iam ) + jz, nvs( 1+iam+1 ) + jz - 1
200 pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
201 $ npcol )
202 IF( sendcol.EQ.pcol ) THEN
203 minii = mod( sendrow+descz( rsrc_ ), nprow )*
204 $ descz( mb_ ) + 1
205 maxii = descz( m_ )
206 incii = descz( mb_ )*nprow
207 DO 30 ii = minii, maxii, incii
208 mini = max( ii, iz )
209 maxi = min( ii+descz( mb_ )-1, n+iz-1 )
210 DO 20 i = mini, maxi, 1
211 nbufsize = nbufsize + 1
212 work( nbufsize ) = zin( i+1-iz,
213 $ j-nvs( 1+iam )+1-jz )
214 20 CONTINUE
215 30 CONTINUE
216 END IF
217 40 CONTINUE
218*
219*
220 IF( myrow.NE.sendrow .OR. mycol.NE.sendcol )
221 $ CALL sgesd2d( descz( ctxt_ ), nbufsize, 1, work, nbufsize,
222 $ sendrow, sendcol )
223*
224*
225* Figure out what process "recvfrom" has that I want
226*
227 nbufsize = 0
228 DO 70 j = nvs( 1+recvfrom ) + jz,
229 $ nvs( 1+recvfrom+1 ) + jz - 1, 1
230 pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
231 $ npcol )
232 IF( mycol.EQ.pcol ) THEN
233 minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
234 $ 1
235 maxii = descz( m_ )
236 incii = descz( mb_ )*nprow
237 DO 60 ii = minii, maxii, incii
238 mini = max( ii, iz )
239 maxi = min( ii+nb-1, n+iz-1 )
240 DO 50 i = mini, maxi, 1
241 nbufsize = nbufsize + 1
242 50 CONTINUE
243 60 CONTINUE
244 END IF
245 70 CONTINUE
246*
247*
248*
249 IF( myrow.NE.recvrow .OR. mycol.NE.recvcol )
250 $ CALL sgerv2d( descz( ctxt_ ), 1, nbufsize, work, 1, recvrow,
251 $ recvcol )
252*
253 nbufsize = 0
254 DO 100 j = nvs( 1+recvfrom ) + jz,
255 $ nvs( 1+recvfrom+1 ) + jz - 1, 1
256 pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
257 $ npcol )
258 IF( mycol.EQ.pcol ) THEN
259 cyclic_j = indxg2l( key( j ), descz( mb_ ), -1, -1,
260 $ npcol )
261 cyclic_i = 1
262 minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
263 $ 1
264 maxii = descz( m_ )
265 incii = descz( mb_ )*nprow
266 DO 90 ii = minii, maxii, incii
267 mini = max( ii, iz )
268 cyclic_i = indxg2l( mini, descz( mb_ ), -1, -1,
269 $ nprow )
270 maxi = min( ii+nb-1, n+iz-1 )
271 DO 80 i = mini, maxi, 1
272 nbufsize = nbufsize + 1
273 z( cyclic_i+( cyclic_j-1 )*descz( lld_ ) )
274 $ = work( nbufsize )
275 cyclic_i = cyclic_i + 1
276 80 CONTINUE
277 90 CONTINUE
278 END IF
279 100 CONTINUE
280*
281 110 CONTINUE
282 RETURN
283*
284* End of PSLAEVSWP
285*
286 END
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pslaevswp(n, zin, ldzi, z, iz, jz, descz, nvs, key, work, lwork)
Definition pslaevswp.f:5