ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdsyevx.f
Go to the documentation of this file.
1  SUBROUTINE pdsyevx( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL,
2  $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,
3  $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL,
4  $ ICLUSTR, GAP, INFO )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * May 25, 2001
10 *
11 * .. Scalar Arguments ..
12  CHARACTER JOBZ, RANGE, UPLO
13  INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M,
14  $ N, NZ
15  DOUBLE PRECISION ABSTOL, ORFAC, VL, VU
16 * ..
17 * .. Array Arguments ..
18  INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19  $ IFAIL( * ), IWORK( * )
20  DOUBLE PRECISION A( * ), GAP( * ), W( * ), WORK( * ), Z( * )
21 * ..
22 *
23 * Purpose
24 * =======
25 *
26 * PDSYEVX computes selected eigenvalues and, optionally, eigenvectors
27 * of a real symmetric matrix A by calling the recommended sequence
28 * of ScaLAPACK routines. Eigenvalues/vectors can be selected by
29 * specifying a range of values or a range of indices for the desired
30 * eigenvalues.
31 *
32 * Notes
33 * =====
34 *
35 * Each global data object is described by an associated description
36 * vector. This vector stores the information required to establish
37 * the mapping between an object element and its corresponding process
38 * and memory location.
39 *
40 * Let A be a generic term for any 2D block cyclicly distributed array.
41 * Such a global array has an associated description vector DESCA.
42 * In the following comments, the character _ should be read as
43 * "of the global array".
44 *
45 * NOTATION STORED IN EXPLANATION
46 * --------------- -------------- --------------------------------------
47 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
48 * DTYPE_A = 1.
49 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
50 * the BLACS process grid A is distribu-
51 * ted over. The context itself is glo-
52 * bal, but the handle (the integer
53 * value) may vary.
54 * M_A (global) DESCA( M_ ) The number of rows in the global
55 * array A.
56 * N_A (global) DESCA( N_ ) The number of columns in the global
57 * array A.
58 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
59 * the rows of the array.
60 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
61 * the columns of the array.
62 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
63 * row of the array A is distributed.
64 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
65 * first column of the array A is
66 * distributed.
67 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
68 * array. LLD_A >= MAX(1,LOCr(M_A)).
69 *
70 * Let K be the number of rows or columns of a distributed matrix,
71 * and assume that its process grid has dimension p x q.
72 * LOCr( K ) denotes the number of elements of K that a process
73 * would receive if K were distributed over the p processes of its
74 * process column.
75 * Similarly, LOCc( K ) denotes the number of elements of K that a
76 * process would receive if K were distributed over the q processes of
77 * its process row.
78 * The values of LOCr() and LOCc() may be determined via a call to the
79 * ScaLAPACK tool function, NUMROC:
80 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
81 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
82 * An upper bound for these quantities may be computed by:
83 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
84 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
85 *
86 * PDSYEVX assumes IEEE 754 standard compliant arithmetic. To port
87 * to a system which does not have IEEE 754 arithmetic, modify
88 * the appropriate SLmake.inc file to include the compiler switch
89 * -DNO_IEEE. This switch only affects the compilation of pdlaiect.c.
90 *
91 * Arguments
92 * =========
93 *
94 * NP = the number of rows local to a given process.
95 * NQ = the number of columns local to a given process.
96 *
97 * JOBZ (global input) CHARACTER*1
98 * Specifies whether or not to compute the eigenvectors:
99 * = 'N': Compute eigenvalues only.
100 * = 'V': Compute eigenvalues and eigenvectors.
101 *
102 * RANGE (global input) CHARACTER*1
103 * = 'A': all eigenvalues will be found.
104 * = 'V': all eigenvalues in the interval [VL,VU] will be found.
105 * = 'I': the IL-th through IU-th eigenvalues will be found.
106 *
107 * UPLO (global input) CHARACTER*1
108 * Specifies whether the upper or lower triangular part of the
109 * symmetric matrix A is stored:
110 * = 'U': Upper triangular
111 * = 'L': Lower triangular
112 *
113 * N (global input) INTEGER
114 * The number of rows and columns of the matrix A. N >= 0.
115 *
116 * A (local input/workspace) block cyclic DOUBLE PRECISION array,
117 * global dimension (N, N),
118 * local dimension ( LLD_A, LOCc(JA+N-1) )
119 *
120 * On entry, the symmetric matrix A. If UPLO = 'U', only the
121 * upper triangular part of A is used to define the elements of
122 * the symmetric matrix. If UPLO = 'L', only the lower
123 * triangular part of A is used to define the elements of the
124 * symmetric matrix.
125 *
126 * On exit, the lower triangle (if UPLO='L') or the upper
127 * triangle (if UPLO='U') of A, including the diagonal, is
128 * destroyed.
129 *
130 * IA (global input) INTEGER
131 * A's global row index, which points to the beginning of the
132 * submatrix which is to be operated on.
133 *
134 * JA (global input) INTEGER
135 * A's global column index, which points to the beginning of
136 * the submatrix which is to be operated on.
137 *
138 * DESCA (global and local input) INTEGER array of dimension DLEN_.
139 * The array descriptor for the distributed matrix A.
140 * If DESCA( CTXT_ ) is incorrect, PDSYEVX cannot guarantee
141 * correct error reporting.
142 *
143 * VL (global input) DOUBLE PRECISION
144 * If RANGE='V', the lower bound of the interval to be searched
145 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
146 *
147 * VU (global input) DOUBLE PRECISION
148 * If RANGE='V', the upper bound of the interval to be searched
149 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
150 *
151 * IL (global input) INTEGER
152 * If RANGE='I', the index (from smallest to largest) of the
153 * smallest eigenvalue to be returned. IL >= 1.
154 * Not referenced if RANGE = 'A' or 'V'.
155 *
156 * IU (global input) INTEGER
157 * If RANGE='I', the index (from smallest to largest) of the
158 * largest eigenvalue to be returned. min(IL,N) <= IU <= N.
159 * Not referenced if RANGE = 'A' or 'V'.
160 *
161 * ABSTOL (global input) DOUBLE PRECISION
162 * If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
163 * the most orthogonal eigenvectors.
164 *
165 * The absolute error tolerance for the eigenvalues.
166 * An approximate eigenvalue is accepted as converged
167 * when it is determined to lie in an interval [a,b]
168 * of width less than or equal to
169 *
170 * ABSTOL + EPS * max( |a|,|b| ) ,
171 *
172 * where EPS is the machine precision. If ABSTOL is less than
173 * or equal to zero, then EPS*norm(T) will be used in its place,
174 * where norm(T) is the 1-norm of the tridiagonal matrix
175 * obtained by reducing A to tridiagonal form.
176 *
177 * Eigenvalues will be computed most accurately when ABSTOL is
178 * set to twice the underflow threshold 2*PDLAMCH('S') not zero.
179 * If this routine returns with ((MOD(INFO,2).NE.0) .OR.
180 * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
181 * eigenvectors did not converge, try setting ABSTOL to
182 * 2*PDLAMCH('S').
183 *
184 * See "Computing Small Singular Values of Bidiagonal Matrices
185 * with Guaranteed High Relative Accuracy," by Demmel and
186 * Kahan, LAPACK Working Note #3.
187 *
188 * See "On the correctness of Parallel Bisection in Floating
189 * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
190 *
191 * M (global output) INTEGER
192 * Total number of eigenvalues found. 0 <= M <= N.
193 *
194 * NZ (global output) INTEGER
195 * Total number of eigenvectors computed. 0 <= NZ <= M.
196 * The number of columns of Z that are filled.
197 * If JOBZ .NE. 'V', NZ is not referenced.
198 * If JOBZ .EQ. 'V', NZ = M unless the user supplies
199 * insufficient space and PDSYEVX is not able to detect this
200 * before beginning computation. To get all the eigenvectors
201 * requested, the user must supply both sufficient
202 * space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
203 * and sufficient workspace to compute them. (See LWORK below.)
204 * PDSYEVX is always able to detect insufficient space without
205 * computation unless RANGE .EQ. 'V'.
206 *
207 * W (global output) DOUBLE PRECISION array, dimension (N)
208 * On normal exit, the first M entries contain the selected
209 * eigenvalues in ascending order.
210 *
211 * ORFAC (global input) DOUBLE PRECISION
212 * Specifies which eigenvectors should be reorthogonalized.
213 * Eigenvectors that correspond to eigenvalues which are within
214 * tol=ORFAC*norm(A) of each other are to be reorthogonalized.
215 * However, if the workspace is insufficient (see LWORK),
216 * tol may be decreased until all eigenvectors to be
217 * reorthogonalized can be stored in one process.
218 * No reorthogonalization will be done if ORFAC equals zero.
219 * A default value of 10^-3 is used if ORFAC is negative.
220 * ORFAC should be identical on all processes.
221 *
222 * Z (local output) DOUBLE PRECISION array,
223 * global dimension (N, N),
224 * local dimension ( LLD_Z, LOCc(JZ+N-1) )
225 * If JOBZ = 'V', then on normal exit the first M columns of Z
226 * contain the orthonormal eigenvectors of the matrix
227 * corresponding to the selected eigenvalues. If an eigenvector
228 * fails to converge, then that column of Z contains the latest
229 * approximation to the eigenvector, and the index of the
230 * eigenvector is returned in IFAIL.
231 * If JOBZ = 'N', then Z is not referenced.
232 *
233 * IZ (global input) INTEGER
234 * Z's global row index, which points to the beginning of the
235 * submatrix which is to be operated on.
236 *
237 * JZ (global input) INTEGER
238 * Z's global column index, which points to the beginning of
239 * the submatrix which is to be operated on.
240 *
241 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
242 * The array descriptor for the distributed matrix Z.
243 * DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
244 *
245 * WORK (local workspace/output) DOUBLE PRECISION array,
246 * dimension max(3,LWORK)
247 * On return, WORK(1) contains the optimal amount of
248 * workspace required for efficient execution.
249 * if JOBZ='N' WORK(1) = optimal amount of workspace
250 * required to compute eigenvalues efficiently
251 * if JOBZ='V' WORK(1) = optimal amount of workspace
252 * required to compute eigenvalues and eigenvectors
253 * efficiently with no guarantee on orthogonality.
254 * If RANGE='V', it is assumed that all eigenvectors
255 * may be required.
256 *
257 * LWORK (local input) INTEGER
258 * Size of WORK
259 * See below for definitions of variables used to define LWORK.
260 * If no eigenvectors are requested (JOBZ = 'N') then
261 * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
262 * If eigenvectors are requested (JOBZ = 'V' ) then
263 * the amount of workspace required to guarantee that all
264 * eigenvectors are computed is:
265 * LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
266 * ICEIL( NEIG, NPROW*NPCOL)*NN
267 *
268 * The computed eigenvectors may not be orthogonal if the
269 * minimal workspace is supplied and ORFAC is too small.
270 * If you want to guarantee orthogonality (at the cost
271 * of potentially poor performance) you should add
272 * the following to LWORK:
273 * (CLUSTERSIZE-1)*N
274 * where CLUSTERSIZE is the number of eigenvalues in the
275 * largest cluster, where a cluster is defined as a set of
276 * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
277 * W(J+1) <= W(J) + ORFAC*2*norm(A) }
278 * Variable definitions:
279 * NEIG = number of eigenvectors requested
280 * NB = DESCA( MB_ ) = DESCA( NB_ ) =
281 * DESCZ( MB_ ) = DESCZ( NB_ )
282 * NN = MAX( N, NB, 2 )
283 * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
284 * DESCZ( CSRC_ ) = 0
285 * NP0 = NUMROC( NN, NB, 0, 0, NPROW )
286 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
287 * ICEIL( X, Y ) is a ScaLAPACK function returning
288 * ceiling(X/Y)
289 *
290 * When LWORK is too small:
291 * If LWORK is too small to guarantee orthogonality,
292 * PDSYEVX attempts to maintain orthogonality in
293 * the clusters with the smallest
294 * spacing between the eigenvalues.
295 * If LWORK is too small to compute all the eigenvectors
296 * requested, no computation is performed and INFO=-23
297 * is returned. Note that when RANGE='V', PDSYEVX does
298 * not know how many eigenvectors are requested until
299 * the eigenvalues are computed. Therefore, when RANGE='V'
300 * and as long as LWORK is large enough to allow PDSYEVX to
301 * compute the eigenvalues, PDSYEVX will compute the
302 * eigenvalues and as many eigenvectors as it can.
303 *
304 * Relationship between workspace, orthogonality & performance:
305 * Greater performance can be achieved if adequate workspace
306 * is provided. On the other hand, in some situations,
307 * performance can decrease as the workspace provided
308 * increases above the workspace amount shown below:
309 *
310 * For optimal performance, greater workspace may be
311 * needed, i.e.
312 * LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT )
313 * Where:
314 * LWORK, as defined previously, depends upon the number
315 * of eigenvectors requested, and
316 * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
317 * ( NPS + 3 ) * NPS
318 *
319 * ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L',
320 * 0, 0, 0, 0)
321 * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
322 * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
323 *
324 * NUMROC is a ScaLAPACK tool functions;
325 * PJLAENV is a ScaLAPACK envionmental inquiry function
326 * MYROW, MYCOL, NPROW and NPCOL can be determined by
327 * calling the subroutine BLACS_GRIDINFO.
328 *
329 * For large N, no extra workspace is needed, however the
330 * biggest boost in performance comes for small N, so it
331 * is wise to provide the extra workspace (typically less
332 * than a Megabyte per process).
333 *
334 * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
335 * enough space to compute all the eigenvectors
336 * orthogonally will cause serious degradation in
337 * performance. In the limit (i.e. CLUSTERSIZE = N-1)
338 * PDSTEIN will perform no better than DSTEIN on 1
339 * processor.
340 * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
341 * all eigenvectors will increase the total execution time
342 * by a factor of 2 or more.
343 * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
344 * grow as the square of the cluster size, all other factors
345 * remaining equal and assuming enough workspace. Less
346 * workspace means less reorthogonalization but faster
347 * execution.
348 *
349 * If LWORK = -1, then LWORK is global input and a workspace
350 * query is assumed; the routine only calculates the size
351 * required for optimal performance for all work arrays. Each of
352 * these values is returned in the first entry of the
353 * corresponding work arrays, and no error message is issued by
354 * PXERBLA.
355 *
356 * IWORK (local workspace) INTEGER array
357 * On return, IWORK(1) contains the amount of integer workspace
358 * required.
359 *
360 * LIWORK (local input) INTEGER
361 * size of IWORK
362 * LIWORK >= 6 * NNP
363 * Where:
364 * NNP = MAX( N, NPROW*NPCOL + 1, 4 )
365 * If LIWORK = -1, then LIWORK is global input and a workspace
366 * query is assumed; the routine only calculates the minimum
367 * and optimal size for all work arrays. Each of these
368 * values is returned in the first entry of the corresponding
369 * work array, and no error message is issued by PXERBLA.
370 *
371 * IFAIL (global output) INTEGER array, dimension (N)
372 * If JOBZ = 'V', then on normal exit, the first M elements of
373 * IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
374 * IFAIL contains the
375 * indices of the eigenvectors that failed to converge.
376 * If JOBZ = 'N', then IFAIL is not referenced.
377 *
378 * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
379 * This array contains indices of eigenvectors corresponding to
380 * a cluster of eigenvalues that could not be reorthogonalized
381 * due to insufficient workspace (see LWORK, ORFAC and INFO).
382 * Eigenvectors corresponding to clusters of eigenvalues indexed
383 * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
384 * reorthogonalized due to lack of workspace. Hence the
385 * eigenvectors corresponding to these clusters may not be
386 * orthogonal. ICLUSTR() is a zero terminated array.
387 * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
388 * K is the number of clusters
389 * ICLUSTR is not referenced if JOBZ = 'N'
390 *
391 * GAP (global output) DOUBLE PRECISION array,
392 * dimension (NPROW*NPCOL)
393 * This array contains the gap between eigenvalues whose
394 * eigenvectors could not be reorthogonalized. The output
395 * values in this array correspond to the clusters indicated
396 * by the array ICLUSTR. As a result, the dot product between
397 * eigenvectors correspoding to the I^th cluster may be as high
398 * as ( C * n ) / GAP(I) where C is a small constant.
399 *
400 * INFO (global output) INTEGER
401 * = 0: successful exit
402 * < 0: If the i-th argument is an array and the j-entry had
403 * an illegal value, then INFO = -(i*100+j), if the i-th
404 * argument is a scalar and had an illegal value, then
405 * INFO = -i.
406 * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
407 * failed to converge. Their indices are stored
408 * in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
409 * Send e-mail to scalapack@cs.utk.edu
410 * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
411 * to one or more clusters of eigenvalues could not be
412 * reorthogonalized because of insufficient workspace.
413 * The indices of the clusters are stored in the array
414 * ICLUSTR.
415 * if (MOD(INFO/4,2).NE.0), then space limit prevented
416 * PDSYEVX from computing all of the eigenvectors
417 * between VL and VU. The number of eigenvectors
418 * computed is returned in NZ.
419 * if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to compute
420 * eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
421 * Send e-mail to scalapack@cs.utk.edu
422 *
423 * Alignment requirements
424 * ======================
425 *
426 * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
427 * must verify some alignment properties, namely the following
428 * expressions should be true:
429 *
430 * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
431 * IAROW.EQ.IZROW )
432 * where
433 * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
434 *
435 * =====================================================================
436 *
437 * Differences between PDSYEVX and DSYEVX
438 * ======================================
439 *
440 * A, LDA -> A, IA, JA, DESCA
441 * Z, LDZ -> Z, IZ, JZ, DESCZ
442 * WORKSPACE needs are larger for PDSYEVX.
443 * LIWORK parameter added
444 *
445 * ORFAC, ICLUSTER() and GAP() parameters added
446 * meaning of INFO is changed
447 *
448 * Functional differences:
449 * PDSYEVX does not promise orthogonality for eigenvectors associated
450 * with tighly clustered eigenvalues.
451 * PDSYEVX does not reorthogonalize eigenvectors
452 * that are on different processes. The extent of reorthogonalization
453 * is controlled by the input parameter LWORK.
454 *
455 * Version 1.4 limitations:
456 * DESCA(MB_) = DESCA(NB_)
457 * DESCA(M_) = DESCZ(M_)
458 * DESCA(N_) = DESCZ(N_)
459 * DESCA(MB_) = DESCZ(MB_)
460 * DESCA(NB_) = DESCZ(NB_)
461 * DESCA(RSRC_) = DESCZ(RSRC_)
462 *
463 * .. Parameters ..
464  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
465  $ MB_, NB_, RSRC_, CSRC_, LLD_
466  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
467  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
468  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
469  DOUBLE PRECISION ZERO, ONE, TEN, FIVE
470  PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0, ten = 10.0d+0,
471  $ five = 5.0d+0 )
472  INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
473  parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
474  $ ierrebz = 8 )
475 * ..
476 * .. Local Scalars ..
477  LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
478  $ VALEIG, WANTZ
479  CHARACTER ORDER
480  INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
481  $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
482  $ indtau, indwork, iroffa, iroffz, iscale,
483  $ isizestebz, isizestein, izrow, lallwork,
484  $ liwmin, llwork, lwmin, lwopt, maxeigs, mb_a,
485  $ mq0, mycol, myrow, nb, nb_a, neig, nn, nnp,
486  $ np0, npcol, nprocs, nprow, nps, nsplit,
487  $ nsytrd_lwopt, nzz, offset, rsrc_a, rsrc_z,
488  $ sizeormtr, sizestein, sizesyevx, sqnpc
489  DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
490  $ SIGMA, SMLNUM, VLL, VUU
491 * ..
492 * .. Local Arrays ..
493  INTEGER IDUM1( 4 ), IDUM2( 4 )
494 * ..
495 * .. External Functions ..
496  LOGICAL LSAME
497  INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
498  DOUBLE PRECISION PDLAMCH, PDLANSY
499  EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
500  $ pdlamch, pdlansy
501 * ..
502 * .. External Subroutines ..
503  EXTERNAL blacs_gridinfo, chk1mat, dgebr2d, dgebs2d,
504  $ dlasrt, dscal, igamn2d, pchk1mat, pchk2mat,
507 * ..
508 * .. Intrinsic Functions ..
509  INTRINSIC abs, dble, ichar, int, max, min, mod, sqrt
510 * ..
511 * .. Executable Statements ..
512 * This is just to keep ftnchek and toolpack/1 happy
513  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
514  $ rsrc_.LT.0 )RETURN
515 *
516  quickreturn = ( n.EQ.0 )
517 *
518 * Test the input arguments.
519 *
520  ictxt = desca( ctxt_ )
521  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
522  info = 0
523 *
524  wantz = lsame( jobz, 'V' )
525  IF( nprow.EQ.-1 ) THEN
526  info = -( 800+ctxt_ )
527  ELSE IF( wantz ) THEN
528  IF( ictxt.NE.descz( ctxt_ ) ) THEN
529  info = -( 2100+ctxt_ )
530  END IF
531  END IF
532  IF( info.EQ.0 ) THEN
533  CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
534  IF( wantz )
535  $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
536 *
537  IF( info.EQ.0 ) THEN
538 *
539 * Get machine constants.
540 *
541  safmin = pdlamch( ictxt, 'Safe minimum' )
542  eps = pdlamch( ictxt, 'Precision' )
543  smlnum = safmin / eps
544  bignum = one / smlnum
545  rmin = sqrt( smlnum )
546  rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
547 *
548  nprocs = nprow*npcol
549  lower = lsame( uplo, 'L' )
550  alleig = lsame( range, 'A' )
551  valeig = lsame( range, 'V' )
552  indeig = lsame( range, 'I' )
553 *
554 * Set up pointers into the WORK array
555 *
556  indtau = 1
557  inde = indtau + n
558  indd = inde + n
559  indd2 = indd + n
560  inde2 = indd2 + n
561  indwork = inde2 + n
562  llwork = lwork - indwork + 1
563 *
564 * Set up pointers into the IWORK array
565 *
566  isizestein = 3*n + nprocs + 1
567  isizestebz = max( 4*n, 14, nprocs )
568  indibl = ( max( isizestein, isizestebz ) ) + 1
569  indisp = indibl + n
570 *
571 * Compute the total amount of space needed
572 *
573  lquery = .false.
574  IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
575  $ lquery = .true.
576 *
577  nnp = max( n, nprocs+1, 4 )
578  liwmin = 6*nnp
579 *
580  nprocs = nprow*npcol
581  nb_a = desca( nb_ )
582  mb_a = desca( mb_ )
583  nb = nb_a
584  nn = max( n, nb, 2 )
585 *
586  rsrc_a = desca( rsrc_ )
587  csrc_a = desca( csrc_ )
588  iroffa = mod( ia-1, mb_a )
589  icoffa = mod( ja-1, nb_a )
590  iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
591  np0 = numroc( n+iroffa, nb, 0, 0, nprow )
592  mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
593  IF( wantz ) THEN
594  rsrc_z = descz( rsrc_ )
595  iroffz = mod( iz-1, mb_a )
596  izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
597  ELSE
598  iroffz = 0
599  izrow = 0
600  END IF
601 *
602  IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
603  $ THEN
604  lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
605  IF( wantz ) THEN
606  mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
607  lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
608  ELSE
609  lwopt = lwmin
610  END IF
611  neig = 0
612  ELSE
613  IF( alleig .OR. valeig ) THEN
614  neig = n
615  ELSE IF( indeig ) THEN
616  neig = iu - il + 1
617  END IF
618  mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
619  lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
620  $ iceil( neig, nprow*npcol )*nn
621  lwopt = lwmin
622 *
623  END IF
624 *
625 * Compute how much workspace is needed to use the
626 * new TRD code
627 *
628  anb = pjlaenv( ictxt, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
629  sqnpc = int( sqrt( dble( nprow*npcol ) ) )
630  nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
631  nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
632  lwopt = max( lwopt, 5*n+nsytrd_lwopt )
633 *
634  END IF
635  IF( info.EQ.0 ) THEN
636  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
637  work( 1 ) = abstol
638  IF( valeig ) THEN
639  work( 2 ) = vl
640  work( 3 ) = vu
641  ELSE
642  work( 2 ) = zero
643  work( 3 ) = zero
644  END IF
645  CALL dgebs2d( ictxt, 'ALL', ' ', 3, 1, work, 3 )
646  ELSE
647  CALL dgebr2d( ictxt, 'ALL', ' ', 3, 1, work, 3, 0, 0 )
648  END IF
649  IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
650  info = -1
651  ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
652  info = -2
653  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
654  info = -3
655  ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
656  info = -10
657  ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
658  $ THEN
659  info = -11
660  ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
661  $ THEN
662  info = -12
663  ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
664  info = -23
665  ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
666  info = -25
667  ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
668  $ abs( vl ) ) ) THEN
669  info = -9
670  ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
671  $ abs( vu ) ) ) THEN
672  info = -10
673  ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
674  $ THEN
675  info = -13
676  ELSE IF( iroffa.NE.0 ) THEN
677  info = -6
678  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
679  info = -( 800+nb_ )
680  END IF
681  IF( wantz ) THEN
682  IF( iroffa.NE.iroffz ) THEN
683  info = -19
684  ELSE IF( iarow.NE.izrow ) THEN
685  info = -19
686  ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
687  info = -( 2100+m_ )
688  ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
689  info = -( 2100+n_ )
690  ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
691  info = -( 2100+mb_ )
692  ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
693  info = -( 2100+nb_ )
694  ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
695  info = -( 2100+rsrc_ )
696  ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
697  info = -( 2100+csrc_ )
698  ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
699  info = -( 2100+ctxt_ )
700  END IF
701  END IF
702  END IF
703  IF( wantz ) THEN
704  idum1( 1 ) = ichar( 'V' )
705  ELSE
706  idum1( 1 ) = ichar( 'N' )
707  END IF
708  idum2( 1 ) = 1
709  IF( lower ) THEN
710  idum1( 2 ) = ichar( 'L' )
711  ELSE
712  idum1( 2 ) = ichar( 'U' )
713  END IF
714  idum2( 2 ) = 2
715  IF( alleig ) THEN
716  idum1( 3 ) = ichar( 'A' )
717  ELSE IF( indeig ) THEN
718  idum1( 3 ) = ichar( 'I' )
719  ELSE
720  idum1( 3 ) = ichar( 'V' )
721  END IF
722  idum2( 3 ) = 3
723  IF( lquery ) THEN
724  idum1( 4 ) = -1
725  ELSE
726  idum1( 4 ) = 1
727  END IF
728  idum2( 4 ) = 4
729  IF( wantz ) THEN
730  CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
731  $ jz, descz, 21, 4, idum1, idum2, info )
732  ELSE
733  CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
734  $ idum2, info )
735  END IF
736  work( 1 ) = dble( lwopt )
737  iwork( 1 ) = liwmin
738  END IF
739 *
740  IF( info.NE.0 ) THEN
741  CALL pxerbla( ictxt, 'PDSYEVX', -info )
742  RETURN
743  ELSE IF( lquery ) THEN
744  RETURN
745  END IF
746 *
747 * Quick return if possible
748 *
749  IF( quickreturn ) THEN
750  IF( wantz ) THEN
751  nz = 0
752  iclustr( 1 ) = 0
753  END IF
754  m = 0
755  work( 1 ) = dble( lwopt )
756  iwork( 1 ) = liwmin
757  RETURN
758  END IF
759 *
760 * Scale matrix to allowable range, if necessary.
761 *
762  abstll = abstol
763  iscale = 0
764  IF( valeig ) THEN
765  vll = vl
766  vuu = vu
767  ELSE
768  vll = zero
769  vuu = zero
770  END IF
771 *
772  anrm = pdlansy( 'M', uplo, n, a, ia, ja, desca, work( indwork ) )
773 *
774  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
775  iscale = 1
776  sigma = rmin / anrm
777  anrm = anrm*sigma
778  ELSE IF( anrm.GT.rmax ) THEN
779  iscale = 1
780  sigma = rmax / anrm
781  anrm = anrm*sigma
782  END IF
783 *
784  IF( iscale.EQ.1 ) THEN
785  CALL pdlascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
786  IF( abstol.GT.0 )
787  $ abstll = abstol*sigma
788  IF( valeig ) THEN
789  vll = vl*sigma
790  vuu = vu*sigma
791  IF( vuu.EQ.vll ) THEN
792  vuu = vuu + 2*max( abs( vuu )*eps, safmin )
793  END IF
794  END IF
795  END IF
796 *
797 * Call PDSYNTRD to reduce symmetric matrix to tridiagonal form.
798 *
799  lallwork = llwork
800 *
801  CALL pdsyntrd( uplo, n, a, ia, ja, desca, work( indd ),
802  $ work( inde ), work( indtau ), work( indwork ),
803  $ llwork, iinfo )
804 *
805 *
806 * Copy the values of D, E to all processes
807 *
808 * Here PxLARED1D is used to redistribute the tridiagonal matrix.
809 * PxLARED1D, however, doesn't yet work with arbritary matrix
810 * distributions so we have PxELGET as a backup.
811 *
812  offset = 0
813  IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
814  $ THEN
815  CALL pdlared1d( n, ia, ja, desca, work( indd ), work( indd2 ),
816  $ work( indwork ), llwork )
817 *
818  CALL pdlared1d( n, ia, ja, desca, work( inde ), work( inde2 ),
819  $ work( indwork ), llwork )
820  IF( .NOT.lower )
821  $ offset = 1
822  ELSE
823  DO 10 i = 1, n
824  CALL pdelget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
825  $ i+ja-1, desca )
826  10 CONTINUE
827  IF( lsame( uplo, 'U' ) ) THEN
828  DO 20 i = 1, n - 1
829  CALL pdelget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
830  $ i+ja, desca )
831  20 CONTINUE
832  ELSE
833  DO 30 i = 1, n - 1
834  CALL pdelget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
835  $ i+ja-1, desca )
836  30 CONTINUE
837  END IF
838  END IF
839 *
840 * Call PDSTEBZ and, if eigenvectors are desired, PDSTEIN.
841 *
842  IF( wantz ) THEN
843  order = 'B'
844  ELSE
845  order = 'E'
846  END IF
847 *
848  CALL pdstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
849  $ work( indd2 ), work( inde2+offset ), m, nsplit, w,
850  $ iwork( indibl ), iwork( indisp ), work( indwork ),
851  $ llwork, iwork( 1 ), isizestebz, iinfo )
852 *
853 *
854 * IF PDSTEBZ fails, the error propogates to INFO, but
855 * we do not propogate the eigenvalue(s) which failed because:
856 * 1) This should never happen if the user specifies
857 * ABSTOL = 2 * PDLAMCH( 'U' )
858 * 2) PDSTEIN will confirm/deny whether the eigenvalues are
859 * close enough.
860 *
861  IF( iinfo.NE.0 ) THEN
862  info = info + ierrebz
863  DO 40 i = 1, m
864  iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
865  40 CONTINUE
866  END IF
867  IF( wantz ) THEN
868 *
869  IF( valeig ) THEN
870 *
871 * Compute the maximum number of eigenvalues that we can
872 * compute in the
873 * workspace that we have, and that we can store in Z.
874 *
875 * Loop through the possibilities looking for the largest
876 * NZ that we can feed to PDSTEIN and PDORMTR
877 *
878 * Since all processes must end up with the same value
879 * of NZ, we first compute the minimum of LALLWORK
880 *
881  CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
882  $ -1, -1 )
883 *
884  maxeigs = descz( n_ )
885 *
886  DO 50 nz = min( maxeigs, m ), 0, -1
887  mq0 = numroc( nz, nb, 0, 0, npcol )
888  sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
889  sizeormtr = max( ( nb*( nb-1 ) ) / 2, ( mq0+np0 )*nb ) +
890  $ nb*nb
891 *
892  sizesyevx = max( sizestein, sizeormtr )
893  IF( sizesyevx.LE.lallwork )
894  $ GO TO 60
895  50 CONTINUE
896  60 CONTINUE
897  ELSE
898  nz = m
899  END IF
900  nz = max( nz, 0 )
901  IF( nz.NE.m ) THEN
902  info = info + ierrspc
903 *
904  DO 70 i = 1, m
905  ifail( i ) = 0
906  70 CONTINUE
907 *
908 * The following code handles a rare special case
909 * - NZ .NE. M means that we don't have enough room to store
910 * all the vectors.
911 * - NSPLIT .GT. 1 means that the matrix split
912 * In this case, we cannot simply take the first NZ eigenvalues
913 * because PDSTEBZ sorts the eigenvalues by block when
914 * a split occurs. So, we have to make another call to
915 * PDSTEBZ with a new upper limit - VUU.
916 *
917  IF( nsplit.GT.1 ) THEN
918  CALL dlasrt( 'I', m, w, iinfo )
919  nzz = 0
920  IF( nz.GT.0 ) THEN
921 *
922  vuu = w( nz ) - ten*( eps*anrm+safmin )
923  IF( vll.GE.vuu ) THEN
924  nzz = 0
925  ELSE
926  CALL pdstebz( ictxt, range, order, n, vll, vuu, il,
927  $ iu, abstll, work( indd2 ),
928  $ work( inde2+offset ), nzz, nsplit, w,
929  $ iwork( indibl ), iwork( indisp ),
930  $ work( indwork ), llwork, iwork( 1 ),
931  $ isizestebz, iinfo )
932  END IF
933 *
934  IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
935  IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
936  info = info + ierrebz
937  END IF
938  END IF
939  END IF
940  nz = min( nz, nzz )
941 *
942  END IF
943  END IF
944  CALL pdstein( n, work( indd2 ), work( inde2+offset ), nz, w,
945  $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
946  $ jz, descz, work( indwork ), lallwork, iwork( 1 ),
947  $ isizestein, ifail, iclustr, gap, iinfo )
948 *
949  IF( iinfo.GE.nz+1 )
950  $ info = info + ierrcls
951  IF( mod( iinfo, nz+1 ).NE.0 )
952  $ info = info + ierrein
953 *
954 * Z = Q * Z
955 *
956 *
957  IF( nz.GT.0 ) THEN
958  CALL pdormtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
959  $ work( indtau ), z, iz, jz, descz,
960  $ work( indwork ), llwork, iinfo )
961  END IF
962 *
963  END IF
964 *
965 * If matrix was scaled, then rescale eigenvalues appropriately.
966 *
967  IF( iscale.EQ.1 ) THEN
968  CALL dscal( m, one / sigma, w, 1 )
969  END IF
970 *
971  work( 1 ) = dble( lwopt )
972  iwork( 1 ) = liwmin
973 *
974  RETURN
975 *
976 * End of PDSYEVX
977 *
978  END
pdsyntrd
subroutine pdsyntrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pdsyntrd.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pdstein
subroutine pdstein(N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pdstein.f:4
pdelget
subroutine pdelget(SCOPE, TOP, ALPHA, A, IA, JA, DESCA)
Definition: pdelget.f:2
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pdlascl
subroutine pdlascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pdlascl.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pdsyevx
subroutine pdsyevx(JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pdsyevx.f:5
pdstebz
subroutine pdstebz(ICTXT, RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, M, NSPLIT, W, IBLOCK, ISPLIT, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: pdstebz.f:4
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pdlared1d
subroutine pdlared1d(N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK)
Definition: pdlared1d.f:2
pdormtr
subroutine pdormtr(SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: pdormtr.f:3
min
#define min(A, B)
Definition: pcgemr.c:181