ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pssyevx.f
Go to the documentation of this file.
1  SUBROUTINE pssyevx( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL,
2  $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,
3  $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL,
4  $ ICLUSTR, GAP, INFO )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * May 25, 2001
10 *
11 * .. Scalar Arguments ..
12  CHARACTER JOBZ, RANGE, UPLO
13  INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M,
14  $ N, NZ
15  REAL ABSTOL, ORFAC, VL, VU
16 * ..
17 * .. Array Arguments ..
18  INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19  $ IFAIL( * ), IWORK( * )
20  REAL A( * ), GAP( * ), W( * ), WORK( * ), Z( * )
21 * ..
22 *
23 * Purpose
24 * =======
25 *
26 * PSSYEVX computes selected eigenvalues and, optionally, eigenvectors
27 * of a real symmetric matrix A by calling the recommended sequence
28 * of ScaLAPACK routines. Eigenvalues/vectors can be selected by
29 * specifying a range of values or a range of indices for the desired
30 * eigenvalues.
31 *
32 * Notes
33 * =====
34 *
35 * Each global data object is described by an associated description
36 * vector. This vector stores the information required to establish
37 * the mapping between an object element and its corresponding process
38 * and memory location.
39 *
40 * Let A be a generic term for any 2D block cyclicly distributed array.
41 * Such a global array has an associated description vector DESCA.
42 * In the following comments, the character _ should be read as
43 * "of the global array".
44 *
45 * NOTATION STORED IN EXPLANATION
46 * --------------- -------------- --------------------------------------
47 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
48 * DTYPE_A = 1.
49 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
50 * the BLACS process grid A is distribu-
51 * ted over. The context itself is glo-
52 * bal, but the handle (the integer
53 * value) may vary.
54 * M_A (global) DESCA( M_ ) The number of rows in the global
55 * array A.
56 * N_A (global) DESCA( N_ ) The number of columns in the global
57 * array A.
58 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
59 * the rows of the array.
60 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
61 * the columns of the array.
62 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
63 * row of the array A is distributed.
64 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
65 * first column of the array A is
66 * distributed.
67 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
68 * array. LLD_A >= MAX(1,LOCr(M_A)).
69 *
70 * Let K be the number of rows or columns of a distributed matrix,
71 * and assume that its process grid has dimension p x q.
72 * LOCr( K ) denotes the number of elements of K that a process
73 * would receive if K were distributed over the p processes of its
74 * process column.
75 * Similarly, LOCc( K ) denotes the number of elements of K that a
76 * process would receive if K were distributed over the q processes of
77 * its process row.
78 * The values of LOCr() and LOCc() may be determined via a call to the
79 * ScaLAPACK tool function, NUMROC:
80 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
81 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
82 * An upper bound for these quantities may be computed by:
83 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
84 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
85 *
86 * PSSYEVX assumes IEEE 754 standard compliant arithmetic. To port
87 * to a system which does not have IEEE 754 arithmetic, modify
88 * the appropriate SLmake.inc file to include the compiler switch
89 * -DNO_IEEE. This switch only affects the compilation of pslaiect.c.
90 *
91 * Arguments
92 * =========
93 *
94 * NP = the number of rows local to a given process.
95 * NQ = the number of columns local to a given process.
96 *
97 * JOBZ (global input) CHARACTER*1
98 * Specifies whether or not to compute the eigenvectors:
99 * = 'N': Compute eigenvalues only.
100 * = 'V': Compute eigenvalues and eigenvectors.
101 *
102 * RANGE (global input) CHARACTER*1
103 * = 'A': all eigenvalues will be found.
104 * = 'V': all eigenvalues in the interval [VL,VU] will be found.
105 * = 'I': the IL-th through IU-th eigenvalues will be found.
106 *
107 * UPLO (global input) CHARACTER*1
108 * Specifies whether the upper or lower triangular part of the
109 * symmetric matrix A is stored:
110 * = 'U': Upper triangular
111 * = 'L': Lower triangular
112 *
113 * N (global input) INTEGER
114 * The number of rows and columns of the matrix A. N >= 0.
115 *
116 * A (local input/workspace) block cyclic REAL array,
117 * global dimension (N, N),
118 * local dimension ( LLD_A, LOCc(JA+N-1) )
119 *
120 * On entry, the symmetric matrix A. If UPLO = 'U', only the
121 * upper triangular part of A is used to define the elements of
122 * the symmetric matrix. If UPLO = 'L', only the lower
123 * triangular part of A is used to define the elements of the
124 * symmetric matrix.
125 *
126 * On exit, the lower triangle (if UPLO='L') or the upper
127 * triangle (if UPLO='U') of A, including the diagonal, is
128 * destroyed.
129 *
130 * IA (global input) INTEGER
131 * A's global row index, which points to the beginning of the
132 * submatrix which is to be operated on.
133 *
134 * JA (global input) INTEGER
135 * A's global column index, which points to the beginning of
136 * the submatrix which is to be operated on.
137 *
138 * DESCA (global and local input) INTEGER array of dimension DLEN_.
139 * The array descriptor for the distributed matrix A.
140 * If DESCA( CTXT_ ) is incorrect, PSSYEVX cannot guarantee
141 * correct error reporting.
142 *
143 * VL (global input) REAL
144 * If RANGE='V', the lower bound of the interval to be searched
145 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
146 *
147 * VU (global input) REAL
148 * If RANGE='V', the upper bound of the interval to be searched
149 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
150 *
151 * IL (global input) INTEGER
152 * If RANGE='I', the index (from smallest to largest) of the
153 * smallest eigenvalue to be returned. IL >= 1.
154 * Not referenced if RANGE = 'A' or 'V'.
155 *
156 * IU (global input) INTEGER
157 * If RANGE='I', the index (from smallest to largest) of the
158 * largest eigenvalue to be returned. min(IL,N) <= IU <= N.
159 * Not referenced if RANGE = 'A' or 'V'.
160 *
161 * ABSTOL (global input) REAL
162 * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
163 * the most orthogonal eigenvectors.
164 *
165 * The absolute error tolerance for the eigenvalues.
166 * An approximate eigenvalue is accepted as converged
167 * when it is determined to lie in an interval [a,b]
168 * of width less than or equal to
169 *
170 * ABSTOL + EPS * max( |a|,|b| ) ,
171 *
172 * where EPS is the machine precision. If ABSTOL is less than
173 * or equal to zero, then EPS*norm(T) will be used in its place,
174 * where norm(T) is the 1-norm of the tridiagonal matrix
175 * obtained by reducing A to tridiagonal form.
176 *
177 * Eigenvalues will be computed most accurately when ABSTOL is
178 * set to twice the underflow threshold 2*PSLAMCH('S') not zero.
179 * If this routine returns with ((MOD(INFO,2).NE.0) .OR.
180 * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
181 * eigenvectors did not converge, try setting ABSTOL to
182 * 2*PSLAMCH('S').
183 *
184 * See "Computing Small Singular Values of Bidiagonal Matrices
185 * with Guaranteed High Relative Accuracy," by Demmel and
186 * Kahan, LAPACK Working Note #3.
187 *
188 * See "On the correctness of Parallel Bisection in Floating
189 * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
190 *
191 * M (global output) INTEGER
192 * Total number of eigenvalues found. 0 <= M <= N.
193 *
194 * NZ (global output) INTEGER
195 * Total number of eigenvectors computed. 0 <= NZ <= M.
196 * The number of columns of Z that are filled.
197 * If JOBZ .NE. 'V', NZ is not referenced.
198 * If JOBZ .EQ. 'V', NZ = M unless the user supplies
199 * insufficient space and PSSYEVX is not able to detect this
200 * before beginning computation. To get all the eigenvectors
201 * requested, the user must supply both sufficient
202 * space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
203 * and sufficient workspace to compute them. (See LWORK below.)
204 * PSSYEVX is always able to detect insufficient space without
205 * computation unless RANGE .EQ. 'V'.
206 *
207 * W (global output) REAL array, dimension (N)
208 * On normal exit, the first M entries contain the selected
209 * eigenvalues in ascending order.
210 *
211 * ORFAC (global input) REAL
212 * Specifies which eigenvectors should be reorthogonalized.
213 * Eigenvectors that correspond to eigenvalues which are within
214 * tol=ORFAC*norm(A) of each other are to be reorthogonalized.
215 * However, if the workspace is insufficient (see LWORK),
216 * tol may be decreased until all eigenvectors to be
217 * reorthogonalized can be stored in one process.
218 * No reorthogonalization will be done if ORFAC equals zero.
219 * A default value of 10^-3 is used if ORFAC is negative.
220 * ORFAC should be identical on all processes.
221 *
222 * Z (local output) REAL array,
223 * global dimension (N, N),
224 * local dimension ( LLD_Z, LOCc(JZ+N-1) )
225 * If JOBZ = 'V', then on normal exit the first M columns of Z
226 * contain the orthonormal eigenvectors of the matrix
227 * corresponding to the selected eigenvalues. If an eigenvector
228 * fails to converge, then that column of Z contains the latest
229 * approximation to the eigenvector, and the index of the
230 * eigenvector is returned in IFAIL.
231 * If JOBZ = 'N', then Z is not referenced.
232 *
233 * IZ (global input) INTEGER
234 * Z's global row index, which points to the beginning of the
235 * submatrix which is to be operated on.
236 *
237 * JZ (global input) INTEGER
238 * Z's global column index, which points to the beginning of
239 * the submatrix which is to be operated on.
240 *
241 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
242 * The array descriptor for the distributed matrix Z.
243 * DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
244 *
245 * WORK (local workspace/output) REAL array,
246 * dimension max(3,LWORK)
247 * On return, WORK(1) contains the optimal amount of
248 * workspace required for efficient execution.
249 * if JOBZ='N' WORK(1) = optimal amount of workspace
250 * required to compute eigenvalues efficiently
251 * if JOBZ='V' WORK(1) = optimal amount of workspace
252 * required to compute eigenvalues and eigenvectors
253 * efficiently with no guarantee on orthogonality.
254 * If RANGE='V', it is assumed that all eigenvectors
255 * may be required.
256 *
257 * LWORK (local input) INTEGER
258 * Size of WORK
259 * See below for definitions of variables used to define LWORK.
260 * If no eigenvectors are requested (JOBZ = 'N') then
261 * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
262 * If eigenvectors are requested (JOBZ = 'V' ) then
263 * the amount of workspace required to guarantee that all
264 * eigenvectors are computed is:
265 * LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
266 * ICEIL( NEIG, NPROW*NPCOL)*NN
267 *
268 * The computed eigenvectors may not be orthogonal if the
269 * minimal workspace is supplied and ORFAC is too small.
270 * If you want to guarantee orthogonality (at the cost
271 * of potentially poor performance) you should add
272 * the following to LWORK:
273 * (CLUSTERSIZE-1)*N
274 * where CLUSTERSIZE is the number of eigenvalues in the
275 * largest cluster, where a cluster is defined as a set of
276 * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
277 * W(J+1) <= W(J) + ORFAC*2*norm(A) }
278 * Variable definitions:
279 * NEIG = number of eigenvectors requested
280 * NB = DESCA( MB_ ) = DESCA( NB_ ) =
281 * DESCZ( MB_ ) = DESCZ( NB_ )
282 * NN = MAX( N, NB, 2 )
283 * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
284 * DESCZ( CSRC_ ) = 0
285 * NP0 = NUMROC( NN, NB, 0, 0, NPROW )
286 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
287 * ICEIL( X, Y ) is a ScaLAPACK function returning
288 * ceiling(X/Y)
289 *
290 * When LWORK is too small:
291 * If LWORK is too small to guarantee orthogonality,
292 * PSSYEVX attempts to maintain orthogonality in
293 * the clusters with the smallest
294 * spacing between the eigenvalues.
295 * If LWORK is too small to compute all the eigenvectors
296 * requested, no computation is performed and INFO=-23
297 * is returned. Note that when RANGE='V', PSSYEVX does
298 * not know how many eigenvectors are requested until
299 * the eigenvalues are computed. Therefore, when RANGE='V'
300 * and as long as LWORK is large enough to allow PSSYEVX to
301 * compute the eigenvalues, PSSYEVX will compute the
302 * eigenvalues and as many eigenvectors as it can.
303 *
304 * Relationship between workspace, orthogonality & performance:
305 * Greater performance can be achieved if adequate workspace
306 * is provided. On the other hand, in some situations,
307 * performance can decrease as the workspace provided
308 * increases above the workspace amount shown below:
309 *
310 * For optimal performance, greater workspace may be
311 * needed, i.e.
312 * LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT )
313 * Where:
314 * LWORK, as defined previously, depends upon the number
315 * of eigenvectors requested, and
316 * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
317 * ( NPS + 3 ) * NPS
318 *
319 * ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L',
320 * 0, 0, 0, 0)
321 * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
322 * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
323 *
324 * NUMROC is a ScaLAPACK tool functions;
325 * PJLAENV is a ScaLAPACK envionmental inquiry function
326 * MYROW, MYCOL, NPROW and NPCOL can be determined by
327 * calling the subroutine BLACS_GRIDINFO.
328 *
329 * For large N, no extra workspace is needed, however the
330 * biggest boost in performance comes for small N, so it
331 * is wise to provide the extra workspace (typically less
332 * than a Megabyte per process).
333 *
334 * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
335 * enough space to compute all the eigenvectors
336 * orthogonally will cause serious degradation in
337 * performance. In the limit (i.e. CLUSTERSIZE = N-1)
338 * PSSTEIN will perform no better than SSTEIN on 1
339 * processor.
340 * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
341 * all eigenvectors will increase the total execution time
342 * by a factor of 2 or more.
343 * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
344 * grow as the square of the cluster size, all other factors
345 * remaining equal and assuming enough workspace. Less
346 * workspace means less reorthogonalization but faster
347 * execution.
348 *
349 * If LWORK = -1, then LWORK is global input and a workspace
350 * query is assumed; the routine only calculates the size
351 * required for optimal performance for all work arrays. Each of
352 * these values is returned in the first entry of the
353 * corresponding work arrays, and no error message is issued by
354 * PXERBLA.
355 *
356 * IWORK (local workspace) INTEGER array
357 * On return, IWORK(1) contains the amount of integer workspace
358 * required.
359 *
360 * LIWORK (local input) INTEGER
361 * size of IWORK
362 * LIWORK >= 6 * NNP
363 * Where:
364 * NNP = MAX( N, NPROW*NPCOL + 1, 4 )
365 * If LIWORK = -1, then LIWORK is global input and a workspace
366 * query is assumed; the routine only calculates the minimum
367 * and optimal size for all work arrays. Each of these
368 * values is returned in the first entry of the corresponding
369 * work array, and no error message is issued by PXERBLA.
370 *
371 * IFAIL (global output) INTEGER array, dimension (N)
372 * If JOBZ = 'V', then on normal exit, the first M elements of
373 * IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
374 * IFAIL contains the
375 * indices of the eigenvectors that failed to converge.
376 * If JOBZ = 'N', then IFAIL is not referenced.
377 *
378 * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
379 * This array contains indices of eigenvectors corresponding to
380 * a cluster of eigenvalues that could not be reorthogonalized
381 * due to insufficient workspace (see LWORK, ORFAC and INFO).
382 * Eigenvectors corresponding to clusters of eigenvalues indexed
383 * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
384 * reorthogonalized due to lack of workspace. Hence the
385 * eigenvectors corresponding to these clusters may not be
386 * orthogonal. ICLUSTR() is a zero terminated array.
387 * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
388 * K is the number of clusters
389 * ICLUSTR is not referenced if JOBZ = 'N'
390 *
391 * GAP (global output) REAL array,
392 * dimension (NPROW*NPCOL)
393 * This array contains the gap between eigenvalues whose
394 * eigenvectors could not be reorthogonalized. The output
395 * values in this array correspond to the clusters indicated
396 * by the array ICLUSTR. As a result, the dot product between
397 * eigenvectors correspoding to the I^th cluster may be as high
398 * as ( C * n ) / GAP(I) where C is a small constant.
399 *
400 * INFO (global output) INTEGER
401 * = 0: successful exit
402 * < 0: If the i-th argument is an array and the j-entry had
403 * an illegal value, then INFO = -(i*100+j), if the i-th
404 * argument is a scalar and had an illegal value, then
405 * INFO = -i.
406 * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
407 * failed to converge. Their indices are stored
408 * in IFAIL. Ensure ABSTOL=2.0*PSLAMCH( 'U' )
409 * Send e-mail to scalapack@cs.utk.edu
410 * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
411 * to one or more clusters of eigenvalues could not be
412 * reorthogonalized because of insufficient workspace.
413 * The indices of the clusters are stored in the array
414 * ICLUSTR.
415 * if (MOD(INFO/4,2).NE.0), then space limit prevented
416 * PSSYEVX from computing all of the eigenvectors
417 * between VL and VU. The number of eigenvectors
418 * computed is returned in NZ.
419 * if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to compute
420 * eigenvalues. Ensure ABSTOL=2.0*PSLAMCH( 'U' )
421 * Send e-mail to scalapack@cs.utk.edu
422 *
423 * Alignment requirements
424 * ======================
425 *
426 * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
427 * must verify some alignment properties, namely the following
428 * expressions should be true:
429 *
430 * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
431 * IAROW.EQ.IZROW )
432 * where
433 * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
434 *
435 * =====================================================================
436 *
437 * Differences between PSSYEVX and SSYEVX
438 * ======================================
439 *
440 * A, LDA -> A, IA, JA, DESCA
441 * Z, LDZ -> Z, IZ, JZ, DESCZ
442 * WORKSPACE needs are larger for PSSYEVX.
443 * LIWORK parameter added
444 *
445 * ORFAC, ICLUSTER() and GAP() parameters added
446 * meaning of INFO is changed
447 *
448 * Functional differences:
449 * PSSYEVX does not promise orthogonality for eigenvectors associated
450 * with tighly clustered eigenvalues.
451 * PSSYEVX does not reorthogonalize eigenvectors
452 * that are on different processes. The extent of reorthogonalization
453 * is controlled by the input parameter LWORK.
454 *
455 * Version 1.4 limitations:
456 * DESCA(MB_) = DESCA(NB_)
457 * DESCA(M_) = DESCZ(M_)
458 * DESCA(N_) = DESCZ(N_)
459 * DESCA(MB_) = DESCZ(MB_)
460 * DESCA(NB_) = DESCZ(NB_)
461 * DESCA(RSRC_) = DESCZ(RSRC_)
462 *
463 * .. Parameters ..
464  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
465  $ MB_, NB_, RSRC_, CSRC_, LLD_
466  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
467  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
468  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
469  REAL ZERO, ONE, TEN, FIVE
470  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0, ten = 10.0e+0,
471  $ five = 5.0e+0 )
472  INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
473  parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
474  $ ierrebz = 8 )
475 * ..
476 * .. Local Scalars ..
477  LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
478  $ VALEIG, WANTZ
479  CHARACTER ORDER
480  INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
481  $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
482  $ indtau, indwork, iroffa, iroffz, iscale,
483  $ isizestebz, isizestein, izrow, lallwork,
484  $ liwmin, llwork, lwmin, lwopt, maxeigs, mb_a,
485  $ mq0, mycol, myrow, nb, nb_a, neig, nn, nnp,
486  $ np0, npcol, nprocs, nprow, nps, nsplit,
487  $ nsytrd_lwopt, nzz, offset, rsrc_a, rsrc_z,
488  $ sizeormtr, sizestein, sizesyevx, sqnpc
489  REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
490  $ SIGMA, SMLNUM, VLL, VUU
491 * ..
492 * .. Local Arrays ..
493  INTEGER IDUM1( 4 ), IDUM2( 4 )
494 * ..
495 * .. External Functions ..
496  LOGICAL LSAME
497  INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
498  REAL PSLAMCH, PSLANSY
499  EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
500  $ pslamch, pslansy
501 * ..
502 * .. External Subroutines ..
503  EXTERNAL blacs_gridinfo, chk1mat, igamn2d, pchk1mat,
505  $ psstebz, psstein, pssyntrd, pxerbla, sgebr2d,
506  $ sgebs2d, slasrt, sscal
507 * ..
508 * .. Intrinsic Functions ..
509  INTRINSIC abs, dble, ichar, int, max, min, mod, real,
510  $ sqrt
511 * ..
512 * .. Executable Statements ..
513 * This is just to keep ftnchek and toolpack/1 happy
514  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
515  $ rsrc_.LT.0 )RETURN
516 *
517  quickreturn = ( n.EQ.0 )
518 *
519 * Test the input arguments.
520 *
521  ictxt = desca( ctxt_ )
522  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
523  info = 0
524 *
525  wantz = lsame( jobz, 'V' )
526  IF( nprow.EQ.-1 ) THEN
527  info = -( 800+ctxt_ )
528  ELSE IF( wantz ) THEN
529  IF( ictxt.NE.descz( ctxt_ ) ) THEN
530  info = -( 2100+ctxt_ )
531  END IF
532  END IF
533  IF( info.EQ.0 ) THEN
534  CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
535  IF( wantz )
536  $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
537 *
538  IF( info.EQ.0 ) THEN
539 *
540 * Get machine constants.
541 *
542  safmin = pslamch( ictxt, 'Safe minimum' )
543  eps = pslamch( ictxt, 'Precision' )
544  smlnum = safmin / eps
545  bignum = one / smlnum
546  rmin = sqrt( smlnum )
547  rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
548 *
549  nprocs = nprow*npcol
550  lower = lsame( uplo, 'L' )
551  alleig = lsame( range, 'A' )
552  valeig = lsame( range, 'V' )
553  indeig = lsame( range, 'I' )
554 *
555 * Set up pointers into the WORK array
556 *
557  indtau = 1
558  inde = indtau + n
559  indd = inde + n
560  indd2 = indd + n
561  inde2 = indd2 + n
562  indwork = inde2 + n
563  llwork = lwork - indwork + 1
564 *
565 * Set up pointers into the IWORK array
566 *
567  isizestein = 3*n + nprocs + 1
568  isizestebz = max( 4*n, 14, nprocs )
569  indibl = ( max( isizestein, isizestebz ) ) + 1
570  indisp = indibl + n
571 *
572 * Compute the total amount of space needed
573 *
574  lquery = .false.
575  IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
576  $ lquery = .true.
577 *
578  nnp = max( n, nprocs+1, 4 )
579  liwmin = 6*nnp
580 *
581  nprocs = nprow*npcol
582  nb_a = desca( nb_ )
583  mb_a = desca( mb_ )
584  nb = nb_a
585  nn = max( n, nb, 2 )
586 *
587  rsrc_a = desca( rsrc_ )
588  csrc_a = desca( csrc_ )
589  iroffa = mod( ia-1, mb_a )
590  icoffa = mod( ja-1, nb_a )
591  iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
592  np0 = numroc( n+iroffa, nb, 0, 0, nprow )
593  mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
594  IF( wantz ) THEN
595  rsrc_z = descz( rsrc_ )
596  iroffz = mod( iz-1, mb_a )
597  izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
598  ELSE
599  iroffz = 0
600  izrow = 0
601  END IF
602 *
603  IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
604  $ THEN
605  lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
606  IF( wantz ) THEN
607  mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
608  lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
609  ELSE
610  lwopt = lwmin
611  END IF
612  neig = 0
613  ELSE
614  IF( alleig .OR. valeig ) THEN
615  neig = n
616  ELSE IF( indeig ) THEN
617  neig = iu - il + 1
618  END IF
619  mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
620  lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
621  $ iceil( neig, nprow*npcol )*nn
622  lwopt = lwmin
623 *
624  END IF
625 *
626 * Compute how much workspace is needed to use the
627 * new TRD code
628 *
629  anb = pjlaenv( ictxt, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
630  sqnpc = int( sqrt( dble( nprow*npcol ) ) )
631  nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
632  nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
633  lwopt = max( lwopt, 5*n+nsytrd_lwopt )
634 *
635  END IF
636  IF( info.EQ.0 ) THEN
637  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
638  work( 1 ) = abstol
639  IF( valeig ) THEN
640  work( 2 ) = vl
641  work( 3 ) = vu
642  ELSE
643  work( 2 ) = zero
644  work( 3 ) = zero
645  END IF
646  CALL sgebs2d( ictxt, 'ALL', ' ', 3, 1, work, 3 )
647  ELSE
648  CALL sgebr2d( ictxt, 'ALL', ' ', 3, 1, work, 3, 0, 0 )
649  END IF
650  IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
651  info = -1
652  ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
653  info = -2
654  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
655  info = -3
656  ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
657  info = -10
658  ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
659  $ THEN
660  info = -11
661  ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
662  $ THEN
663  info = -12
664  ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
665  info = -23
666  ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
667  info = -25
668  ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
669  $ abs( vl ) ) ) THEN
670  info = -9
671  ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
672  $ abs( vu ) ) ) THEN
673  info = -10
674  ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
675  $ THEN
676  info = -13
677  ELSE IF( iroffa.NE.0 ) THEN
678  info = -6
679  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
680  info = -( 800+nb_ )
681  END IF
682  IF( wantz ) THEN
683  IF( iroffa.NE.iroffz ) THEN
684  info = -19
685  ELSE IF( iarow.NE.izrow ) THEN
686  info = -19
687  ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
688  info = -( 2100+m_ )
689  ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
690  info = -( 2100+n_ )
691  ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
692  info = -( 2100+mb_ )
693  ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
694  info = -( 2100+nb_ )
695  ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
696  info = -( 2100+rsrc_ )
697  ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
698  info = -( 2100+csrc_ )
699  ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
700  info = -( 2100+ctxt_ )
701  END IF
702  END IF
703  END IF
704  IF( wantz ) THEN
705  idum1( 1 ) = ichar( 'V' )
706  ELSE
707  idum1( 1 ) = ichar( 'N' )
708  END IF
709  idum2( 1 ) = 1
710  IF( lower ) THEN
711  idum1( 2 ) = ichar( 'L' )
712  ELSE
713  idum1( 2 ) = ichar( 'U' )
714  END IF
715  idum2( 2 ) = 2
716  IF( alleig ) THEN
717  idum1( 3 ) = ichar( 'A' )
718  ELSE IF( indeig ) THEN
719  idum1( 3 ) = ichar( 'I' )
720  ELSE
721  idum1( 3 ) = ichar( 'V' )
722  END IF
723  idum2( 3 ) = 3
724  IF( lquery ) THEN
725  idum1( 4 ) = -1
726  ELSE
727  idum1( 4 ) = 1
728  END IF
729  idum2( 4 ) = 4
730  IF( wantz ) THEN
731  CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
732  $ jz, descz, 21, 4, idum1, idum2, info )
733  ELSE
734  CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
735  $ idum2, info )
736  END IF
737  work( 1 ) = real( lwopt )
738  iwork( 1 ) = liwmin
739  END IF
740 *
741  IF( info.NE.0 ) THEN
742  CALL pxerbla( ictxt, 'PSSYEVX', -info )
743  RETURN
744  ELSE IF( lquery ) THEN
745  RETURN
746  END IF
747 *
748 * Quick return if possible
749 *
750  IF( quickreturn ) THEN
751  IF( wantz ) THEN
752  nz = 0
753  iclustr( 1 ) = 0
754  END IF
755  m = 0
756  work( 1 ) = real( lwopt )
757  iwork( 1 ) = liwmin
758  RETURN
759  END IF
760 *
761 * Scale matrix to allowable range, if necessary.
762 *
763  abstll = abstol
764  iscale = 0
765  IF( valeig ) THEN
766  vll = vl
767  vuu = vu
768  ELSE
769  vll = zero
770  vuu = zero
771  END IF
772 *
773  anrm = pslansy( 'M', uplo, n, a, ia, ja, desca, work( indwork ) )
774 *
775  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
776  iscale = 1
777  sigma = rmin / anrm
778  anrm = anrm*sigma
779  ELSE IF( anrm.GT.rmax ) THEN
780  iscale = 1
781  sigma = rmax / anrm
782  anrm = anrm*sigma
783  END IF
784 *
785  IF( iscale.EQ.1 ) THEN
786  CALL pslascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
787  IF( abstol.GT.0 )
788  $ abstll = abstol*sigma
789  IF( valeig ) THEN
790  vll = vl*sigma
791  vuu = vu*sigma
792  IF( vuu.EQ.vll ) THEN
793  vuu = vuu + 2*max( abs( vuu )*eps, safmin )
794  END IF
795  END IF
796  END IF
797 *
798 * Call PSSYNTRD to reduce symmetric matrix to tridiagonal form.
799 *
800  lallwork = llwork
801 *
802  CALL pssyntrd( uplo, n, a, ia, ja, desca, work( indd ),
803  $ work( inde ), work( indtau ), work( indwork ),
804  $ llwork, iinfo )
805 *
806 *
807 * Copy the values of D, E to all processes
808 *
809 * Here PxLARED1D is used to redistribute the tridiagonal matrix.
810 * PxLARED1D, however, doesn't yet work with arbritary matrix
811 * distributions so we have PxELGET as a backup.
812 *
813  offset = 0
814  IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
815  $ THEN
816  CALL pslared1d( n, ia, ja, desca, work( indd ), work( indd2 ),
817  $ work( indwork ), llwork )
818 *
819  CALL pslared1d( n, ia, ja, desca, work( inde ), work( inde2 ),
820  $ work( indwork ), llwork )
821  IF( .NOT.lower )
822  $ offset = 1
823  ELSE
824  DO 10 i = 1, n
825  CALL pselget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
826  $ i+ja-1, desca )
827  10 CONTINUE
828  IF( lsame( uplo, 'U' ) ) THEN
829  DO 20 i = 1, n - 1
830  CALL pselget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
831  $ i+ja, desca )
832  20 CONTINUE
833  ELSE
834  DO 30 i = 1, n - 1
835  CALL pselget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
836  $ i+ja-1, desca )
837  30 CONTINUE
838  END IF
839  END IF
840 *
841 * Call PSSTEBZ and, if eigenvectors are desired, PSSTEIN.
842 *
843  IF( wantz ) THEN
844  order = 'B'
845  ELSE
846  order = 'E'
847  END IF
848 *
849  CALL psstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
850  $ work( indd2 ), work( inde2+offset ), m, nsplit, w,
851  $ iwork( indibl ), iwork( indisp ), work( indwork ),
852  $ llwork, iwork( 1 ), isizestebz, iinfo )
853 *
854 *
855 * IF PSSTEBZ fails, the error propogates to INFO, but
856 * we do not propogate the eigenvalue(s) which failed because:
857 * 1) This should never happen if the user specifies
858 * ABSTOL = 2 * PSLAMCH( 'U' )
859 * 2) PSSTEIN will confirm/deny whether the eigenvalues are
860 * close enough.
861 *
862  IF( iinfo.NE.0 ) THEN
863  info = info + ierrebz
864  DO 40 i = 1, m
865  iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
866  40 CONTINUE
867  END IF
868  IF( wantz ) THEN
869 *
870  IF( valeig ) THEN
871 *
872 * Compute the maximum number of eigenvalues that we can
873 * compute in the
874 * workspace that we have, and that we can store in Z.
875 *
876 * Loop through the possibilities looking for the largest
877 * NZ that we can feed to PSSTEIN and PSORMTR
878 *
879 * Since all processes must end up with the same value
880 * of NZ, we first compute the minimum of LALLWORK
881 *
882  CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
883  $ -1, -1 )
884 *
885  maxeigs = descz( n_ )
886 *
887  DO 50 nz = min( maxeigs, m ), 0, -1
888  mq0 = numroc( nz, nb, 0, 0, npcol )
889  sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
890  sizeormtr = max( ( nb*( nb-1 ) ) / 2, ( mq0+np0 )*nb ) +
891  $ nb*nb
892 *
893  sizesyevx = max( sizestein, sizeormtr )
894  IF( sizesyevx.LE.lallwork )
895  $ GO TO 60
896  50 CONTINUE
897  60 CONTINUE
898  ELSE
899  nz = m
900  END IF
901  nz = max( nz, 0 )
902  IF( nz.NE.m ) THEN
903  info = info + ierrspc
904 *
905  DO 70 i = 1, m
906  ifail( i ) = 0
907  70 CONTINUE
908 *
909 * The following code handles a rare special case
910 * - NZ .NE. M means that we don't have enough room to store
911 * all the vectors.
912 * - NSPLIT .GT. 1 means that the matrix split
913 * In this case, we cannot simply take the first NZ eigenvalues
914 * because PSSTEBZ sorts the eigenvalues by block when
915 * a split occurs. So, we have to make another call to
916 * PSSTEBZ with a new upper limit - VUU.
917 *
918  IF( nsplit.GT.1 ) THEN
919  CALL slasrt( 'I', m, w, iinfo )
920  nzz = 0
921  IF( nz.GT.0 ) THEN
922 *
923  vuu = w( nz ) - ten*( eps*anrm+safmin )
924  IF( vll.GE.vuu ) THEN
925  nzz = 0
926  ELSE
927  CALL psstebz( ictxt, range, order, n, vll, vuu, il,
928  $ iu, abstll, work( indd2 ),
929  $ work( inde2+offset ), nzz, nsplit, w,
930  $ iwork( indibl ), iwork( indisp ),
931  $ work( indwork ), llwork, iwork( 1 ),
932  $ isizestebz, iinfo )
933  END IF
934 *
935  IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
936  IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
937  info = info + ierrebz
938  END IF
939  END IF
940  END IF
941  nz = min( nz, nzz )
942 *
943  END IF
944  END IF
945  CALL psstein( n, work( indd2 ), work( inde2+offset ), nz, w,
946  $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
947  $ jz, descz, work( indwork ), lallwork, iwork( 1 ),
948  $ isizestein, ifail, iclustr, gap, iinfo )
949 *
950  IF( iinfo.GE.nz+1 )
951  $ info = info + ierrcls
952  IF( mod( iinfo, nz+1 ).NE.0 )
953  $ info = info + ierrein
954 *
955 * Z = Q * Z
956 *
957 *
958  IF( nz.GT.0 ) THEN
959  CALL psormtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
960  $ work( indtau ), z, iz, jz, descz,
961  $ work( indwork ), llwork, iinfo )
962  END IF
963 *
964  END IF
965 *
966 * If matrix was scaled, then rescale eigenvalues appropriately.
967 *
968  IF( iscale.EQ.1 ) THEN
969  CALL sscal( m, one / sigma, w, 1 )
970  END IF
971 *
972  work( 1 ) = real( lwopt )
973  iwork( 1 ) = liwmin
974 *
975  RETURN
976 *
977 * End of PSSYEVX
978 *
979  END
max
#define max(A, B)
Definition: pcgemr.c:180
pslascl
subroutine pslascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pslascl.f:3
pssyevx
subroutine pssyevx(JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pssyevx.f:5
psstebz
subroutine psstebz(ICTXT, RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, M, NSPLIT, W, IBLOCK, ISPLIT, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: psstebz.f:4
pssyntrd
subroutine pssyntrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pssyntrd.f:3
psstein
subroutine psstein(N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: psstein.f:4
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pslared1d
subroutine pslared1d(N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK)
Definition: pslared1d.f:2
psormtr
subroutine psormtr(SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: psormtr.f:3
pselget
subroutine pselget(SCOPE, TOP, ALPHA, A, IA, JA, DESCA)
Definition: pselget.f:2
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181