ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclaevswp.f
Go to the documentation of this file.
1 *
2 *
3  SUBROUTINE pclaevswp( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY,
4  $ RWORK, LRWORK )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * April 15, 1997
10 *
11 * .. Scalar Arguments ..
12  INTEGER IZ, JZ, LDZI, LRWORK, N
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCZ( * ), KEY( * ), NVS( * )
16  REAL RWORK( * ), ZIN( LDZI, * )
17  COMPLEX Z( * )
18 * ..
19 *
20 * Purpose
21 * =======
22 *
23 * PCLAEVSWP moves the eigenvectors (potentially unsorted) from
24 * where they are computed, to a ScaLAPACK standard block cyclic
25 * array, sorted so that the corresponding eigenvalues are sorted.
26 *
27 * Notes
28 * =====
29 *
30 *
31 * Each global data object is described by an associated description
32 * vector. This vector stores the information required to establish
33 * the mapping between an object element and its corresponding process
34 * and memory location.
35 *
36 * Let A be a generic term for any 2D block cyclicly distributed array.
37 * Such a global array has an associated description vector DESCA.
38 * In the following comments, the character _ should be read as
39 * "of the global array".
40 *
41 * NOTATION STORED IN EXPLANATION
42 * --------------- -------------- --------------------------------------
43 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
44 * DTYPE_A = 1.
45 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46 * the BLACS process grid A is distribu-
47 * ted over. The context itself is glo-
48 * bal, but the handle (the integer
49 * value) may vary.
50 * M_A (global) DESCA( M_ ) The number of rows in the global
51 * array A.
52 * N_A (global) DESCA( N_ ) The number of columns in the global
53 * array A.
54 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
55 * the rows of the array.
56 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
57 * the columns of the array.
58 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59 * row of the array A is distributed.
60 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61 * first column of the array A is
62 * distributed.
63 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
64 * array. LLD_A >= MAX(1,LOCr(M_A)).
65 *
66 * Let K be the number of rows or columns of a distributed matrix,
67 * and assume that its process grid has dimension p x q.
68 * LOCr( K ) denotes the number of elements of K that a process
69 * would receive if K were distributed over the p processes of its
70 * process column.
71 * Similarly, LOCc( K ) denotes the number of elements of K that a
72 * process would receive if K were distributed over the q processes of
73 * its process row.
74 * The values of LOCr() and LOCc() may be determined via a call to the
75 * ScaLAPACK tool function, NUMROC:
76 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78 * An upper bound for these quantities may be computed by:
79 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81 *
82 *
83 * Arguments
84 * =========
85 *
86 * NP = the number of rows local to a given process.
87 * NQ = the number of columns local to a given process.
88 *
89 * N (global input) INTEGER
90 * The order of the matrix A. N >= 0.
91 *
92 * ZIN (local input) REAL array,
93 * dimension ( LDZI, NVS(iam) )
94 * The eigenvectors on input. Each eigenvector resides entirely
95 * in one process. Each process holds a contiguous set of
96 * NVS(iam) eigenvectors. The first eigenvector which the
97 * process holds is: sum for i=[0,iam-1) of NVS(i)
98 *
99 * LDZI (locl input) INTEGER
100 * leading dimension of the ZIN array
101 *
102 * Z (local output) COMPLEX array
103 * global dimension (N, N), local dimension (DESCZ(DLEN_), NQ)
104 * The eigenvectors on output. The eigenvectors are distributed
105 * in a block cyclic manner in both dimensions, with a
106 * block size of NB.
107 *
108 * IZ (global input) INTEGER
109 * Z's global row index, which points to the beginning of the
110 * submatrix which is to be operated on.
111 *
112 * JZ (global input) INTEGER
113 * Z's global column index, which points to the beginning of
114 * the submatrix which is to be operated on.
115 *
116 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
117 * The array descriptor for the distributed matrix Z.
118 *
119 * NVS (global input) INTEGER array, dimension( nprocs+1 )
120 * nvs(i) = number of processes
121 * number of eigenvectors held by processes [0,i-1)
122 * nvs(1) = number of eigen vectors held by [0,1-1) == 0
123 * nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) ==
124 * total number of eigenvectors
125 *
126 * KEY (global input) INTEGER array, dimension( N )
127 * Indicates the actual index (after sorting) for each of the
128 * eigenvectors.
129 *
130 * RWORK (local workspace) REAL array, dimension (LRWORK)
131 *
132 * LRWORK (local input) INTEGER dimension of RWORK
133 * .. Parameters ..
134  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
135  $ mb_, nb_, rsrc_, csrc_, lld_
136  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
137  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
138  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
139 * ..
140 * .. Local Scalars ..
141  INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J,
142  $ maxi, maxii, mini, minii, mycol, myrow, nb,
143  $ nbufsize, npcol, nprocs, nprow, pcol, recvcol,
144  $ recvfrom, recvrow, sendcol, sendrow, sendto
145 * ..
146 * .. External Functions ..
147  INTEGER INDXG2L, INDXG2P
148  EXTERNAL indxg2l, indxg2p
149 * ..
150 * .. External Subroutines ..
151  EXTERNAL blacs_gridinfo, sgerv2d, sgesd2d
152 * ..
153 * .. Intrinsic Functions ..
154  INTRINSIC cmplx, max, min, mod
155 * ..
156 * .. Executable Statements ..
157 * This is just to keep ftnchek happy
158  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
159  $ rsrc_.LT.0 )RETURN
160  CALL blacs_gridinfo( descz( ctxt_ ), nprow, npcol, myrow, mycol )
161  iam = myrow + mycol*nprow
162  iam = myrow*npcol + mycol
163 *
164  nb = descz( mb_ )
165 *
166  nprocs = nprow*npcol
167 *
168 * If PxSTEIN operates on a sub-matrix of a global matrix, the
169 * key [] that contains the indicies of the eigenvectors is refe-
170 * renced to the dimensions of the sub-matrix and not the global
171 * distrubited matrix. Because of this, PxLAEVSWP will incorrectly
172 * map the eigenvectors to the global eigenvector matrix, Z, unless
173 * the key[] elements are shifted as below.
174 *
175  DO 10 j = descz( n_ ), 1, -1
176  key( j ) = key( j-jz+1 ) + ( jz-1 )
177  10 CONTINUE
178 *
179  DO 110 dist = 0, nprocs - 1
180 *
181  sendto = mod( iam+dist, nprocs )
182  recvfrom = mod( nprocs+iam-dist, nprocs )
183 *
184  sendrow = mod( sendto, nprow )
185  sendcol = sendto / nprow
186  recvrow = mod( recvfrom, nprow )
187  recvcol = recvfrom / nprow
188 *
189  sendrow = sendto / npcol
190  sendcol = mod( sendto, npcol )
191  recvrow = recvfrom / npcol
192  recvcol = mod( recvfrom, npcol )
193 *
194 * Figure out what I have that process "sendto" wants
195 *
196  nbufsize = 0
197 *
198 * We are looping through the eigenvectors that I presently own.
199 *
200  DO 40 j = nvs( 1+iam ) + jz, nvs( 1+iam+1 ) + jz - 1
201  pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
202  $ npcol )
203  IF( sendcol.EQ.pcol ) THEN
204  minii = mod( sendrow+descz( rsrc_ ), nprow )*
205  $ descz( mb_ ) + 1
206  maxii = descz( m_ )
207  incii = descz( mb_ )*nprow
208  DO 30 ii = minii, maxii, incii
209  mini = max( ii, iz )
210  maxi = min( ii+descz( mb_ )-1, n+iz-1 )
211  DO 20 i = mini, maxi, 1
212  nbufsize = nbufsize + 1
213  rwork( nbufsize ) = zin( i+1-iz,
214  $ j-nvs( 1+iam )+1-jz )
215  20 CONTINUE
216  30 CONTINUE
217  END IF
218  40 CONTINUE
219 *
220 *
221  IF( myrow.NE.sendrow .OR. mycol.NE.sendcol )
222  $ CALL sgesd2d( descz( ctxt_ ), nbufsize, 1, rwork, nbufsize,
223  $ sendrow, sendcol )
224 *
225 *
226 * Figure out what process "recvfrom" has that I want
227 *
228  nbufsize = 0
229  DO 70 j = nvs( 1+recvfrom ) + jz,
230  $ nvs( 1+recvfrom+1 ) + jz - 1, 1
231  pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
232  $ npcol )
233  IF( mycol.EQ.pcol ) THEN
234  minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
235  $ 1
236  maxii = descz( m_ )
237  incii = descz( mb_ )*nprow
238  DO 60 ii = minii, maxii, incii
239  mini = max( ii, iz )
240  maxi = min( ii+nb-1, n+iz-1 )
241  DO 50 i = mini, maxi, 1
242  nbufsize = nbufsize + 1
243  50 CONTINUE
244  60 CONTINUE
245  END IF
246  70 CONTINUE
247 *
248 *
249 *
250  IF( myrow.NE.recvrow .OR. mycol.NE.recvcol )
251  $ CALL sgerv2d( descz( ctxt_ ), 1, nbufsize, rwork, 1,
252  $ recvrow, recvcol )
253 *
254  nbufsize = 0
255  DO 100 j = nvs( 1+recvfrom ) + jz,
256  $ nvs( 1+recvfrom+1 ) + jz - 1, 1
257  pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
258  $ npcol )
259  IF( mycol.EQ.pcol ) THEN
260  cyclic_j = indxg2l( key( j ), descz( mb_ ), -1, -1,
261  $ npcol )
262  cyclic_i = 1
263  minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
264  $ 1
265  maxii = descz( m_ )
266  incii = descz( mb_ )*nprow
267  DO 90 ii = minii, maxii, incii
268  mini = max( ii, iz )
269  cyclic_i = indxg2l( mini, descz( mb_ ), -1, -1,
270  $ nprow )
271  maxi = min( ii+nb-1, n+iz-1 )
272  DO 80 i = mini, maxi, 1
273  nbufsize = nbufsize + 1
274  z( cyclic_i+( cyclic_j-1 )*descz( lld_ ) )
275  $ = cmplx( rwork( nbufsize ) )
276  cyclic_i = cyclic_i + 1
277  80 CONTINUE
278  90 CONTINUE
279  END IF
280  100 CONTINUE
281 *
282  110 CONTINUE
283  RETURN
284 *
285 * End of PCLAEVSWP
286 *
287  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
pclaevswp
subroutine pclaevswp(N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, RWORK, LRWORK)
Definition: pclaevswp.f:5
min
#define min(A, B)
Definition: pcgemr.c:181