SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdsyevx.f
Go to the documentation of this file.
1 SUBROUTINE pdsyevx( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL,
2 $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,
3 $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL,
4 $ ICLUSTR, GAP, INFO )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* May 25, 2001
10*
11* .. Scalar Arguments ..
12 CHARACTER JOBZ, RANGE, UPLO
13 INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M,
14 $ N, NZ
15 DOUBLE PRECISION ABSTOL, ORFAC, VL, VU
16* ..
17* .. Array Arguments ..
18 INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19 $ IFAIL( * ), IWORK( * )
20 DOUBLE PRECISION A( * ), GAP( * ), W( * ), WORK( * ), Z( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PDSYEVX computes selected eigenvalues and, optionally, eigenvectors
27* of a real symmetric matrix A by calling the recommended sequence
28* of ScaLAPACK routines. Eigenvalues/vectors can be selected by
29* specifying a range of values or a range of indices for the desired
30* eigenvalues.
31*
32* Notes
33* =====
34*
35* Each global data object is described by an associated description
36* vector. This vector stores the information required to establish
37* the mapping between an object element and its corresponding process
38* and memory location.
39*
40* Let A be a generic term for any 2D block cyclicly distributed array.
41* Such a global array has an associated description vector DESCA.
42* In the following comments, the character _ should be read as
43* "of the global array".
44*
45* NOTATION STORED IN EXPLANATION
46* --------------- -------------- --------------------------------------
47* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
48* DTYPE_A = 1.
49* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
50* the BLACS process grid A is distribu-
51* ted over. The context itself is glo-
52* bal, but the handle (the integer
53* value) may vary.
54* M_A (global) DESCA( M_ ) The number of rows in the global
55* array A.
56* N_A (global) DESCA( N_ ) The number of columns in the global
57* array A.
58* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
59* the rows of the array.
60* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
61* the columns of the array.
62* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
63* row of the array A is distributed.
64* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
65* first column of the array A is
66* distributed.
67* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
68* array. LLD_A >= MAX(1,LOCr(M_A)).
69*
70* Let K be the number of rows or columns of a distributed matrix,
71* and assume that its process grid has dimension p x q.
72* LOCr( K ) denotes the number of elements of K that a process
73* would receive if K were distributed over the p processes of its
74* process column.
75* Similarly, LOCc( K ) denotes the number of elements of K that a
76* process would receive if K were distributed over the q processes of
77* its process row.
78* The values of LOCr() and LOCc() may be determined via a call to the
79* ScaLAPACK tool function, NUMROC:
80* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
81* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
82* An upper bound for these quantities may be computed by:
83* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
84* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
85*
86* PDSYEVX assumes IEEE 754 standard compliant arithmetic. To port
87* to a system which does not have IEEE 754 arithmetic, modify
88* the appropriate SLmake.inc file to include the compiler switch
89* -DNO_IEEE. This switch only affects the compilation of pdlaiect.c.
90*
91* Arguments
92* =========
93*
94* NP = the number of rows local to a given process.
95* NQ = the number of columns local to a given process.
96*
97* JOBZ (global input) CHARACTER*1
98* Specifies whether or not to compute the eigenvectors:
99* = 'N': Compute eigenvalues only.
100* = 'V': Compute eigenvalues and eigenvectors.
101*
102* RANGE (global input) CHARACTER*1
103* = 'A': all eigenvalues will be found.
104* = 'V': all eigenvalues in the interval [VL,VU] will be found.
105* = 'I': the IL-th through IU-th eigenvalues will be found.
106*
107* UPLO (global input) CHARACTER*1
108* Specifies whether the upper or lower triangular part of the
109* symmetric matrix A is stored:
110* = 'U': Upper triangular
111* = 'L': Lower triangular
112*
113* N (global input) INTEGER
114* The number of rows and columns of the matrix A. N >= 0.
115*
116* A (local input/workspace) block cyclic DOUBLE PRECISION array,
117* global dimension (N, N),
118* local dimension ( LLD_A, LOCc(JA+N-1) )
119*
120* On entry, the symmetric matrix A. If UPLO = 'U', only the
121* upper triangular part of A is used to define the elements of
122* the symmetric matrix. If UPLO = 'L', only the lower
123* triangular part of A is used to define the elements of the
124* symmetric matrix.
125*
126* On exit, the lower triangle (if UPLO='L') or the upper
127* triangle (if UPLO='U') of A, including the diagonal, is
128* destroyed.
129*
130* IA (global input) INTEGER
131* A's global row index, which points to the beginning of the
132* submatrix which is to be operated on.
133*
134* JA (global input) INTEGER
135* A's global column index, which points to the beginning of
136* the submatrix which is to be operated on.
137*
138* DESCA (global and local input) INTEGER array of dimension DLEN_.
139* The array descriptor for the distributed matrix A.
140* If DESCA( CTXT_ ) is incorrect, PDSYEVX cannot guarantee
141* correct error reporting.
142*
143* VL (global input) DOUBLE PRECISION
144* If RANGE='V', the lower bound of the interval to be searched
145* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
146*
147* VU (global input) DOUBLE PRECISION
148* If RANGE='V', the upper bound of the interval to be searched
149* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
150*
151* IL (global input) INTEGER
152* If RANGE='I', the index (from smallest to largest) of the
153* smallest eigenvalue to be returned. IL >= 1.
154* Not referenced if RANGE = 'A' or 'V'.
155*
156* IU (global input) INTEGER
157* If RANGE='I', the index (from smallest to largest) of the
158* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
159* Not referenced if RANGE = 'A' or 'V'.
160*
161* ABSTOL (global input) DOUBLE PRECISION
162* If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
163* the most orthogonal eigenvectors.
164*
165* The absolute error tolerance for the eigenvalues.
166* An approximate eigenvalue is accepted as converged
167* when it is determined to lie in an interval [a,b]
168* of width less than or equal to
169*
170* ABSTOL + EPS * max( |a|,|b| ) ,
171*
172* where EPS is the machine precision. If ABSTOL is less than
173* or equal to zero, then EPS*norm(T) will be used in its place,
174* where norm(T) is the 1-norm of the tridiagonal matrix
175* obtained by reducing A to tridiagonal form.
176*
177* Eigenvalues will be computed most accurately when ABSTOL is
178* set to twice the underflow threshold 2*PDLAMCH('S') not zero.
179* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
180* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
181* eigenvectors did not converge, try setting ABSTOL to
182* 2*PDLAMCH('S').
183*
184* See "Computing Small Singular Values of Bidiagonal Matrices
185* with Guaranteed High Relative Accuracy," by Demmel and
186* Kahan, LAPACK Working Note #3.
187*
188* See "On the correctness of Parallel Bisection in Floating
189* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
190*
191* M (global output) INTEGER
192* Total number of eigenvalues found. 0 <= M <= N.
193*
194* NZ (global output) INTEGER
195* Total number of eigenvectors computed. 0 <= NZ <= M.
196* The number of columns of Z that are filled.
197* If JOBZ .NE. 'V', NZ is not referenced.
198* If JOBZ .EQ. 'V', NZ = M unless the user supplies
199* insufficient space and PDSYEVX is not able to detect this
200* before beginning computation. To get all the eigenvectors
201* requested, the user must supply both sufficient
202* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
203* and sufficient workspace to compute them. (See LWORK below.)
204* PDSYEVX is always able to detect insufficient space without
205* computation unless RANGE .EQ. 'V'.
206*
207* W (global output) DOUBLE PRECISION array, dimension (N)
208* On normal exit, the first M entries contain the selected
209* eigenvalues in ascending order.
210*
211* ORFAC (global input) DOUBLE PRECISION
212* Specifies which eigenvectors should be reorthogonalized.
213* Eigenvectors that correspond to eigenvalues which are within
214* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
215* However, if the workspace is insufficient (see LWORK),
216* tol may be decreased until all eigenvectors to be
217* reorthogonalized can be stored in one process.
218* No reorthogonalization will be done if ORFAC equals zero.
219* A default value of 10^-3 is used if ORFAC is negative.
220* ORFAC should be identical on all processes.
221*
222* Z (local output) DOUBLE PRECISION array,
223* global dimension (N, N),
224* local dimension ( LLD_Z, LOCc(JZ+N-1) )
225* If JOBZ = 'V', then on normal exit the first M columns of Z
226* contain the orthonormal eigenvectors of the matrix
227* corresponding to the selected eigenvalues. If an eigenvector
228* fails to converge, then that column of Z contains the latest
229* approximation to the eigenvector, and the index of the
230* eigenvector is returned in IFAIL.
231* If JOBZ = 'N', then Z is not referenced.
232*
233* IZ (global input) INTEGER
234* Z's global row index, which points to the beginning of the
235* submatrix which is to be operated on.
236*
237* JZ (global input) INTEGER
238* Z's global column index, which points to the beginning of
239* the submatrix which is to be operated on.
240*
241* DESCZ (global and local input) INTEGER array of dimension DLEN_.
242* The array descriptor for the distributed matrix Z.
243* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
244*
245* WORK (local workspace/output) DOUBLE PRECISION array,
246* dimension max(3,LWORK)
247* On return, WORK(1) contains the optimal amount of
248* workspace required for efficient execution.
249* if JOBZ='N' WORK(1) = optimal amount of workspace
250* required to compute eigenvalues efficiently
251* if JOBZ='V' WORK(1) = optimal amount of workspace
252* required to compute eigenvalues and eigenvectors
253* efficiently with no guarantee on orthogonality.
254* If RANGE='V', it is assumed that all eigenvectors
255* may be required.
256*
257* LWORK (local input) INTEGER
258* Size of WORK
259* See below for definitions of variables used to define LWORK.
260* If no eigenvectors are requested (JOBZ = 'N') then
261* LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
262* If eigenvectors are requested (JOBZ = 'V' ) then
263* the amount of workspace required to guarantee that all
264* eigenvectors are computed is:
265* LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
266* ICEIL( NEIG, NPROW*NPCOL)*NN
267*
268* The computed eigenvectors may not be orthogonal if the
269* minimal workspace is supplied and ORFAC is too small.
270* If you want to guarantee orthogonality (at the cost
271* of potentially poor performance) you should add
272* the following to LWORK:
273* (CLUSTERSIZE-1)*N
274* where CLUSTERSIZE is the number of eigenvalues in the
275* largest cluster, where a cluster is defined as a set of
276* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
277* W(J+1) <= W(J) + ORFAC*2*norm(A) }
278* Variable definitions:
279* NEIG = number of eigenvectors requested
280* NB = DESCA( MB_ ) = DESCA( NB_ ) =
281* DESCZ( MB_ ) = DESCZ( NB_ )
282* NN = MAX( N, NB, 2 )
283* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
284* DESCZ( CSRC_ ) = 0
285* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
286* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
287* ICEIL( X, Y ) is a ScaLAPACK function returning
288* ceiling(X/Y)
289*
290* When LWORK is too small:
291* If LWORK is too small to guarantee orthogonality,
292* PDSYEVX attempts to maintain orthogonality in
293* the clusters with the smallest
294* spacing between the eigenvalues.
295* If LWORK is too small to compute all the eigenvectors
296* requested, no computation is performed and INFO=-23
297* is returned. Note that when RANGE='V', PDSYEVX does
298* not know how many eigenvectors are requested until
299* the eigenvalues are computed. Therefore, when RANGE='V'
300* and as long as LWORK is large enough to allow PDSYEVX to
301* compute the eigenvalues, PDSYEVX will compute the
302* eigenvalues and as many eigenvectors as it can.
303*
304* Relationship between workspace, orthogonality & performance:
305* Greater performance can be achieved if adequate workspace
306* is provided. On the other hand, in some situations,
307* performance can decrease as the workspace provided
308* increases above the workspace amount shown below:
309*
310* For optimal performance, greater workspace may be
311* needed, i.e.
312* LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT )
313* Where:
314* LWORK, as defined previously, depends upon the number
315* of eigenvectors requested, and
316* NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
317* ( NPS + 3 ) * NPS
318*
319* ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L',
320* 0, 0, 0, 0)
321* SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
322* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
323*
324* NUMROC is a ScaLAPACK tool functions;
325* PJLAENV is a ScaLAPACK envionmental inquiry function
326* MYROW, MYCOL, NPROW and NPCOL can be determined by
327* calling the subroutine BLACS_GRIDINFO.
328*
329* For large N, no extra workspace is needed, however the
330* biggest boost in performance comes for small N, so it
331* is wise to provide the extra workspace (typically less
332* than a Megabyte per process).
333*
334* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
335* enough space to compute all the eigenvectors
336* orthogonally will cause serious degradation in
337* performance. In the limit (i.e. CLUSTERSIZE = N-1)
338* PDSTEIN will perform no better than DSTEIN on 1
339* processor.
340* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
341* all eigenvectors will increase the total execution time
342* by a factor of 2 or more.
343* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
344* grow as the square of the cluster size, all other factors
345* remaining equal and assuming enough workspace. Less
346* workspace means less reorthogonalization but faster
347* execution.
348*
349* If LWORK = -1, then LWORK is global input and a workspace
350* query is assumed; the routine only calculates the size
351* required for optimal performance for all work arrays. Each of
352* these values is returned in the first entry of the
353* corresponding work arrays, and no error message is issued by
354* PXERBLA.
355*
356* IWORK (local workspace) INTEGER array
357* On return, IWORK(1) contains the amount of integer workspace
358* required.
359*
360* LIWORK (local input) INTEGER
361* size of IWORK
362* LIWORK >= 6 * NNP
363* Where:
364* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
365* If LIWORK = -1, then LIWORK is global input and a workspace
366* query is assumed; the routine only calculates the minimum
367* and optimal size for all work arrays. Each of these
368* values is returned in the first entry of the corresponding
369* work array, and no error message is issued by PXERBLA.
370*
371* IFAIL (global output) INTEGER array, dimension (N)
372* If JOBZ = 'V', then on normal exit, the first M elements of
373* IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
374* IFAIL contains the
375* indices of the eigenvectors that failed to converge.
376* If JOBZ = 'N', then IFAIL is not referenced.
377*
378* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
379* This array contains indices of eigenvectors corresponding to
380* a cluster of eigenvalues that could not be reorthogonalized
381* due to insufficient workspace (see LWORK, ORFAC and INFO).
382* Eigenvectors corresponding to clusters of eigenvalues indexed
383* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
384* reorthogonalized due to lack of workspace. Hence the
385* eigenvectors corresponding to these clusters may not be
386* orthogonal. ICLUSTR() is a zero terminated array.
387* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
388* K is the number of clusters
389* ICLUSTR is not referenced if JOBZ = 'N'
390*
391* GAP (global output) DOUBLE PRECISION array,
392* dimension (NPROW*NPCOL)
393* This array contains the gap between eigenvalues whose
394* eigenvectors could not be reorthogonalized. The output
395* values in this array correspond to the clusters indicated
396* by the array ICLUSTR. As a result, the dot product between
397* eigenvectors correspoding to the I^th cluster may be as high
398* as ( C * n ) / GAP(I) where C is a small constant.
399*
400* INFO (global output) INTEGER
401* = 0: successful exit
402* < 0: If the i-th argument is an array and the j-entry had
403* an illegal value, then INFO = -(i*100+j), if the i-th
404* argument is a scalar and had an illegal value, then
405* INFO = -i.
406* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
407* failed to converge. Their indices are stored
408* in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
409* Send e-mail to scalapack@cs.utk.edu
410* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
411* to one or more clusters of eigenvalues could not be
412* reorthogonalized because of insufficient workspace.
413* The indices of the clusters are stored in the array
414* ICLUSTR.
415* if (MOD(INFO/4,2).NE.0), then space limit prevented
416* PDSYEVX from computing all of the eigenvectors
417* between VL and VU. The number of eigenvectors
418* computed is returned in NZ.
419* if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to compute
420* eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
421* Send e-mail to scalapack@cs.utk.edu
422*
423* Alignment requirements
424* ======================
425*
426* The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
427* must verify some alignment properties, namely the following
428* expressions should be true:
429*
430* ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
431* IAROW.EQ.IZROW )
432* where
433* IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
434*
435* =====================================================================
436*
437* Differences between PDSYEVX and DSYEVX
438* ======================================
439*
440* A, LDA -> A, IA, JA, DESCA
441* Z, LDZ -> Z, IZ, JZ, DESCZ
442* WORKSPACE needs are larger for PDSYEVX.
443* LIWORK parameter added
444*
445* ORFAC, ICLUSTER() and GAP() parameters added
446* meaning of INFO is changed
447*
448* Functional differences:
449* PDSYEVX does not promise orthogonality for eigenvectors associated
450* with tighly clustered eigenvalues.
451* PDSYEVX does not reorthogonalize eigenvectors
452* that are on different processes. The extent of reorthogonalization
453* is controlled by the input parameter LWORK.
454*
455* Version 1.4 limitations:
456* DESCA(MB_) = DESCA(NB_)
457* DESCA(M_) = DESCZ(M_)
458* DESCA(N_) = DESCZ(N_)
459* DESCA(MB_) = DESCZ(MB_)
460* DESCA(NB_) = DESCZ(NB_)
461* DESCA(RSRC_) = DESCZ(RSRC_)
462*
463* .. Parameters ..
464 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
465 $ MB_, NB_, RSRC_, CSRC_, LLD_
466 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
467 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
468 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
469 DOUBLE PRECISION ZERO, ONE, TEN, FIVE
470 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0, ten = 10.0d+0,
471 $ five = 5.0d+0 )
472 INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
473 parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
474 $ ierrebz = 8 )
475* ..
476* .. Local Scalars ..
477 LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
478 $ VALEIG, WANTZ
479 CHARACTER ORDER
480 INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
481 $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
482 $ indtau, indwork, iroffa, iroffz, iscale,
483 $ isizestebz, isizestein, izrow, lallwork,
484 $ liwmin, llwork, lwmin, lwopt, maxeigs, mb_a,
485 $ mq0, mycol, myrow, nb, nb_a, neig, nn, nnp,
486 $ np0, npcol, nprocs, nprow, nps, nsplit,
487 $ nsytrd_lwopt, nzz, offset, rsrc_a, rsrc_z,
488 $ sizeormtr, sizestein, sizesyevx, sqnpc
489 DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
490 $ SIGMA, SMLNUM, VLL, VUU
491* ..
492* .. Local Arrays ..
493 INTEGER IDUM1( 4 ), IDUM2( 4 )
494* ..
495* .. External Functions ..
496 LOGICAL LSAME
497 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
498 DOUBLE PRECISION PDLAMCH, PDLANSY
499 EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
500 $ pdlamch, pdlansy
501* ..
502* .. External Subroutines ..
503 EXTERNAL blacs_gridinfo, chk1mat, dgebr2d, dgebs2d,
504 $ dlasrt, dscal, igamn2d, pchk1mat, pchk2mat,
507* ..
508* .. Intrinsic Functions ..
509 INTRINSIC abs, dble, ichar, int, max, min, mod, sqrt
510* ..
511* .. Executable Statements ..
512* This is just to keep ftnchek and toolpack/1 happy
513 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
514 $ rsrc_.LT.0 )RETURN
515*
516 quickreturn = ( n.EQ.0 )
517*
518* Test the input arguments.
519*
520 ictxt = desca( ctxt_ )
521 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
522 info = 0
523*
524 wantz = lsame( jobz, 'V' )
525 IF( nprow.EQ.-1 ) THEN
526 info = -( 800+ctxt_ )
527 ELSE IF( wantz ) THEN
528 IF( ictxt.NE.descz( ctxt_ ) ) THEN
529 info = -( 2100+ctxt_ )
530 END IF
531 END IF
532 IF( info.EQ.0 ) THEN
533 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
534 IF( wantz )
535 $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
536*
537 IF( info.EQ.0 ) THEN
538*
539* Get machine constants.
540*
541 safmin = pdlamch( ictxt, 'Safe minimum' )
542 eps = pdlamch( ictxt, 'Precision' )
543 smlnum = safmin / eps
544 bignum = one / smlnum
545 rmin = sqrt( smlnum )
546 rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
547*
548 nprocs = nprow*npcol
549 lower = lsame( uplo, 'L' )
550 alleig = lsame( range, 'A' )
551 valeig = lsame( range, 'V' )
552 indeig = lsame( range, 'I' )
553*
554* Set up pointers into the WORK array
555*
556 indtau = 1
557 inde = indtau + n
558 indd = inde + n
559 indd2 = indd + n
560 inde2 = indd2 + n
561 indwork = inde2 + n
562 llwork = lwork - indwork + 1
563*
564* Set up pointers into the IWORK array
565*
566 isizestein = 3*n + nprocs + 1
567 isizestebz = max( 4*n, 14, nprocs )
568 indibl = ( max( isizestein, isizestebz ) ) + 1
569 indisp = indibl + n
570*
571* Compute the total amount of space needed
572*
573 lquery = .false.
574 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
575 $ lquery = .true.
576*
577 nnp = max( n, nprocs+1, 4 )
578 liwmin = 6*nnp
579*
580 nprocs = nprow*npcol
581 nb_a = desca( nb_ )
582 mb_a = desca( mb_ )
583 nb = nb_a
584 nn = max( n, nb, 2 )
585*
586 rsrc_a = desca( rsrc_ )
587 csrc_a = desca( csrc_ )
588 iroffa = mod( ia-1, mb_a )
589 icoffa = mod( ja-1, nb_a )
590 iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
591 np0 = numroc( n+iroffa, nb, 0, 0, nprow )
592 mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
593 IF( wantz ) THEN
594 rsrc_z = descz( rsrc_ )
595 iroffz = mod( iz-1, mb_a )
596 izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
597 ELSE
598 iroffz = 0
599 izrow = 0
600 END IF
601*
602 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
603 $ THEN
604 lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
605 IF( wantz ) THEN
606 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
607 lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
608 ELSE
609 lwopt = lwmin
610 END IF
611 neig = 0
612 ELSE
613 IF( alleig .OR. valeig ) THEN
614 neig = n
615 ELSE IF( indeig ) THEN
616 neig = iu - il + 1
617 END IF
618 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
619 lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
620 $ iceil( neig, nprow*npcol )*nn
621 lwopt = lwmin
622*
623 END IF
624*
625* Compute how much workspace is needed to use the
626* new TRD code
627*
628 anb = pjlaenv( ictxt, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
629 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
630 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
631 nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
632 lwopt = max( lwopt, 5*n+nsytrd_lwopt )
633*
634 END IF
635 IF( info.EQ.0 ) THEN
636 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
637 work( 1 ) = abstol
638 IF( valeig ) THEN
639 work( 2 ) = vl
640 work( 3 ) = vu
641 ELSE
642 work( 2 ) = zero
643 work( 3 ) = zero
644 END IF
645 CALL dgebs2d( ictxt, 'ALL', ' ', 3, 1, work, 3 )
646 ELSE
647 CALL dgebr2d( ictxt, 'ALL', ' ', 3, 1, work, 3, 0, 0 )
648 END IF
649 IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
650 info = -1
651 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
652 info = -2
653 ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
654 info = -3
655 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
656 info = -10
657 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
658 $ THEN
659 info = -11
660 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
661 $ THEN
662 info = -12
663 ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
664 info = -23
665 ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
666 info = -25
667 ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
668 $ abs( vl ) ) ) THEN
669 info = -9
670 ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
671 $ abs( vu ) ) ) THEN
672 info = -10
673 ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
674 $ THEN
675 info = -13
676 ELSE IF( iroffa.NE.0 ) THEN
677 info = -6
678 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
679 info = -( 800+nb_ )
680 END IF
681 IF( wantz ) THEN
682 IF( iroffa.NE.iroffz ) THEN
683 info = -19
684 ELSE IF( iarow.NE.izrow ) THEN
685 info = -19
686 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
687 info = -( 2100+m_ )
688 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
689 info = -( 2100+n_ )
690 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
691 info = -( 2100+mb_ )
692 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
693 info = -( 2100+nb_ )
694 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
695 info = -( 2100+rsrc_ )
696 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
697 info = -( 2100+csrc_ )
698 ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
699 info = -( 2100+ctxt_ )
700 END IF
701 END IF
702 END IF
703 IF( wantz ) THEN
704 idum1( 1 ) = ichar( 'V' )
705 ELSE
706 idum1( 1 ) = ichar( 'N' )
707 END IF
708 idum2( 1 ) = 1
709 IF( lower ) THEN
710 idum1( 2 ) = ichar( 'L' )
711 ELSE
712 idum1( 2 ) = ichar( 'U' )
713 END IF
714 idum2( 2 ) = 2
715 IF( alleig ) THEN
716 idum1( 3 ) = ichar( 'A' )
717 ELSE IF( indeig ) THEN
718 idum1( 3 ) = ichar( 'I' )
719 ELSE
720 idum1( 3 ) = ichar( 'V' )
721 END IF
722 idum2( 3 ) = 3
723 IF( lquery ) THEN
724 idum1( 4 ) = -1
725 ELSE
726 idum1( 4 ) = 1
727 END IF
728 idum2( 4 ) = 4
729 IF( wantz ) THEN
730 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
731 $ jz, descz, 21, 4, idum1, idum2, info )
732 ELSE
733 CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
734 $ idum2, info )
735 END IF
736 work( 1 ) = dble( lwopt )
737 iwork( 1 ) = liwmin
738 END IF
739*
740 IF( info.NE.0 ) THEN
741 CALL pxerbla( ictxt, 'PDSYEVX', -info )
742 RETURN
743 ELSE IF( lquery ) THEN
744 RETURN
745 END IF
746*
747* Quick return if possible
748*
749 IF( quickreturn ) THEN
750 IF( wantz ) THEN
751 nz = 0
752 iclustr( 1 ) = 0
753 END IF
754 m = 0
755 work( 1 ) = dble( lwopt )
756 iwork( 1 ) = liwmin
757 RETURN
758 END IF
759*
760* Scale matrix to allowable range, if necessary.
761*
762 abstll = abstol
763 iscale = 0
764 IF( valeig ) THEN
765 vll = vl
766 vuu = vu
767 ELSE
768 vll = zero
769 vuu = zero
770 END IF
771*
772 anrm = pdlansy( 'M', uplo, n, a, ia, ja, desca, work( indwork ) )
773*
774 IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
775 iscale = 1
776 sigma = rmin / anrm
777 anrm = anrm*sigma
778 ELSE IF( anrm.GT.rmax ) THEN
779 iscale = 1
780 sigma = rmax / anrm
781 anrm = anrm*sigma
782 END IF
783*
784 IF( iscale.EQ.1 ) THEN
785 CALL pdlascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
786 IF( abstol.GT.0 )
787 $ abstll = abstol*sigma
788 IF( valeig ) THEN
789 vll = vl*sigma
790 vuu = vu*sigma
791 IF( vuu.EQ.vll ) THEN
792 vuu = vuu + 2*max( abs( vuu )*eps, safmin )
793 END IF
794 END IF
795 END IF
796*
797* Call PDSYNTRD to reduce symmetric matrix to tridiagonal form.
798*
799 lallwork = llwork
800*
801 CALL pdsyntrd( uplo, n, a, ia, ja, desca, work( indd ),
802 $ work( inde ), work( indtau ), work( indwork ),
803 $ llwork, iinfo )
804*
805*
806* Copy the values of D, E to all processes
807*
808* Here PxLARED1D is used to redistribute the tridiagonal matrix.
809* PxLARED1D, however, doesn't yet work with arbritary matrix
810* distributions so we have PxELGET as a backup.
811*
812 offset = 0
813 IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
814 $ THEN
815 CALL pdlared1d( n, ia, ja, desca, work( indd ), work( indd2 ),
816 $ work( indwork ), llwork )
817*
818 CALL pdlared1d( n, ia, ja, desca, work( inde ), work( inde2 ),
819 $ work( indwork ), llwork )
820 IF( .NOT.lower )
821 $ offset = 1
822 ELSE
823 DO 10 i = 1, n
824 CALL pdelget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
825 $ i+ja-1, desca )
826 10 CONTINUE
827 IF( lsame( uplo, 'U' ) ) THEN
828 DO 20 i = 1, n - 1
829 CALL pdelget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
830 $ i+ja, desca )
831 20 CONTINUE
832 ELSE
833 DO 30 i = 1, n - 1
834 CALL pdelget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
835 $ i+ja-1, desca )
836 30 CONTINUE
837 END IF
838 END IF
839*
840* Call PDSTEBZ and, if eigenvectors are desired, PDSTEIN.
841*
842 IF( wantz ) THEN
843 order = 'B'
844 ELSE
845 order = 'E'
846 END IF
847*
848 CALL pdstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
849 $ work( indd2 ), work( inde2+offset ), m, nsplit, w,
850 $ iwork( indibl ), iwork( indisp ), work( indwork ),
851 $ llwork, iwork( 1 ), isizestebz, iinfo )
852*
853*
854* IF PDSTEBZ fails, the error propogates to INFO, but
855* we do not propogate the eigenvalue(s) which failed because:
856* 1) This should never happen if the user specifies
857* ABSTOL = 2 * PDLAMCH( 'U' )
858* 2) PDSTEIN will confirm/deny whether the eigenvalues are
859* close enough.
860*
861 IF( iinfo.NE.0 ) THEN
862 info = info + ierrebz
863 DO 40 i = 1, m
864 iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
865 40 CONTINUE
866 END IF
867 IF( wantz ) THEN
868*
869 IF( valeig ) THEN
870*
871* Compute the maximum number of eigenvalues that we can
872* compute in the
873* workspace that we have, and that we can store in Z.
874*
875* Loop through the possibilities looking for the largest
876* NZ that we can feed to PDSTEIN and PDORMTR
877*
878* Since all processes must end up with the same value
879* of NZ, we first compute the minimum of LALLWORK
880*
881 CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
882 $ -1, -1 )
883*
884 maxeigs = descz( n_ )
885*
886 DO 50 nz = min( maxeigs, m ), 0, -1
887 mq0 = numroc( nz, nb, 0, 0, npcol )
888 sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
889 sizeormtr = max( ( nb*( nb-1 ) ) / 2, ( mq0+np0 )*nb ) +
890 $ nb*nb
891*
892 sizesyevx = max( sizestein, sizeormtr )
893 IF( sizesyevx.LE.lallwork )
894 $ GO TO 60
895 50 CONTINUE
896 60 CONTINUE
897 ELSE
898 nz = m
899 END IF
900 nz = max( nz, 0 )
901 IF( nz.NE.m ) THEN
902 info = info + ierrspc
903*
904 DO 70 i = 1, m
905 ifail( i ) = 0
906 70 CONTINUE
907*
908* The following code handles a rare special case
909* - NZ .NE. M means that we don't have enough room to store
910* all the vectors.
911* - NSPLIT .GT. 1 means that the matrix split
912* In this case, we cannot simply take the first NZ eigenvalues
913* because PDSTEBZ sorts the eigenvalues by block when
914* a split occurs. So, we have to make another call to
915* PDSTEBZ with a new upper limit - VUU.
916*
917 IF( nsplit.GT.1 ) THEN
918 CALL dlasrt( 'I', m, w, iinfo )
919 nzz = 0
920 IF( nz.GT.0 ) THEN
921*
922 vuu = w( nz ) - ten*( eps*anrm+safmin )
923 IF( vll.GE.vuu ) THEN
924 nzz = 0
925 ELSE
926 CALL pdstebz( ictxt, range, order, n, vll, vuu, il,
927 $ iu, abstll, work( indd2 ),
928 $ work( inde2+offset ), nzz, nsplit, w,
929 $ iwork( indibl ), iwork( indisp ),
930 $ work( indwork ), llwork, iwork( 1 ),
931 $ isizestebz, iinfo )
932 END IF
933*
934 IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
935 IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
936 info = info + ierrebz
937 END IF
938 END IF
939 END IF
940 nz = min( nz, nzz )
941*
942 END IF
943 END IF
944 CALL pdstein( n, work( indd2 ), work( inde2+offset ), nz, w,
945 $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
946 $ jz, descz, work( indwork ), lallwork, iwork( 1 ),
947 $ isizestein, ifail, iclustr, gap, iinfo )
948*
949 IF( iinfo.GE.nz+1 )
950 $ info = info + ierrcls
951 IF( mod( iinfo, nz+1 ).NE.0 )
952 $ info = info + ierrein
953*
954* Z = Q * Z
955*
956*
957 IF( nz.GT.0 ) THEN
958 CALL pdormtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
959 $ work( indtau ), z, iz, jz, descz,
960 $ work( indwork ), llwork, iinfo )
961 END IF
962*
963 END IF
964*
965* If matrix was scaled, then rescale eigenvalues appropriately.
966*
967 IF( iscale.EQ.1 ) THEN
968 CALL dscal( m, one / sigma, w, 1 )
969 END IF
970*
971 work( 1 ) = dble( lwopt )
972 iwork( 1 ) = liwmin
973*
974 RETURN
975*
976* End of PDSYEVX
977*
978 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pdelget(scope, top, alpha, a, ia, ja, desca)
Definition pdelget.f:2
subroutine pdlared1d(n, ia, ja, desc, bycol, byall, work, lwork)
Definition pdlared1d.f:2
subroutine pdlascl(type, cfrom, cto, m, n, a, ia, ja, desca, info)
Definition pdlascl.f:3
subroutine pdormtr(side, uplo, trans, m, n, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition pdormtr.f:3
subroutine pdstebz(ictxt, range, order, n, vl, vu, il, iu, abstol, d, e, m, nsplit, w, iblock, isplit, work, lwork, iwork, liwork, info)
Definition pdstebz.f:4
subroutine pdstein(n, d, e, m, w, iblock, isplit, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pdstein.f:4
subroutine pdsyevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pdsyevx.f:5
subroutine pdsyntrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pdsyntrd.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2