SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pdsyevx()

subroutine pdsyevx ( character  jobz,
character  range,
character  uplo,
integer  n,
double precision, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
double precision  vl,
double precision  vu,
integer  il,
integer  iu,
double precision  abstol,
integer  m,
integer  nz,
double precision, dimension( * )  w,
double precision  orfac,
double precision, dimension( * )  z,
integer  iz,
integer  jz,
integer, dimension( * )  descz,
double precision, dimension( * )  work,
integer  lwork,
integer, dimension( * )  iwork,
integer  liwork,
integer, dimension( * )  ifail,
integer, dimension( * )  iclustr,
double precision, dimension( * )  gap,
integer  info 
)

Definition at line 1 of file pdsyevx.f.

5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* May 25, 2001
10*
11* .. Scalar Arguments ..
12 CHARACTER JOBZ, RANGE, UPLO
13 INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M,
14 $ N, NZ
15 DOUBLE PRECISION ABSTOL, ORFAC, VL, VU
16* ..
17* .. Array Arguments ..
18 INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19 $ IFAIL( * ), IWORK( * )
20 DOUBLE PRECISION A( * ), GAP( * ), W( * ), WORK( * ), Z( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PDSYEVX computes selected eigenvalues and, optionally, eigenvectors
27* of a real symmetric matrix A by calling the recommended sequence
28* of ScaLAPACK routines. Eigenvalues/vectors can be selected by
29* specifying a range of values or a range of indices for the desired
30* eigenvalues.
31*
32* Notes
33* =====
34*
35* Each global data object is described by an associated description
36* vector. This vector stores the information required to establish
37* the mapping between an object element and its corresponding process
38* and memory location.
39*
40* Let A be a generic term for any 2D block cyclicly distributed array.
41* Such a global array has an associated description vector DESCA.
42* In the following comments, the character _ should be read as
43* "of the global array".
44*
45* NOTATION STORED IN EXPLANATION
46* --------------- -------------- --------------------------------------
47* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
48* DTYPE_A = 1.
49* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
50* the BLACS process grid A is distribu-
51* ted over. The context itself is glo-
52* bal, but the handle (the integer
53* value) may vary.
54* M_A (global) DESCA( M_ ) The number of rows in the global
55* array A.
56* N_A (global) DESCA( N_ ) The number of columns in the global
57* array A.
58* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
59* the rows of the array.
60* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
61* the columns of the array.
62* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
63* row of the array A is distributed.
64* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
65* first column of the array A is
66* distributed.
67* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
68* array. LLD_A >= MAX(1,LOCr(M_A)).
69*
70* Let K be the number of rows or columns of a distributed matrix,
71* and assume that its process grid has dimension p x q.
72* LOCr( K ) denotes the number of elements of K that a process
73* would receive if K were distributed over the p processes of its
74* process column.
75* Similarly, LOCc( K ) denotes the number of elements of K that a
76* process would receive if K were distributed over the q processes of
77* its process row.
78* The values of LOCr() and LOCc() may be determined via a call to the
79* ScaLAPACK tool function, NUMROC:
80* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
81* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
82* An upper bound for these quantities may be computed by:
83* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
84* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
85*
86* PDSYEVX assumes IEEE 754 standard compliant arithmetic. To port
87* to a system which does not have IEEE 754 arithmetic, modify
88* the appropriate SLmake.inc file to include the compiler switch
89* -DNO_IEEE. This switch only affects the compilation of pdlaiect.c.
90*
91* Arguments
92* =========
93*
94* NP = the number of rows local to a given process.
95* NQ = the number of columns local to a given process.
96*
97* JOBZ (global input) CHARACTER*1
98* Specifies whether or not to compute the eigenvectors:
99* = 'N': Compute eigenvalues only.
100* = 'V': Compute eigenvalues and eigenvectors.
101*
102* RANGE (global input) CHARACTER*1
103* = 'A': all eigenvalues will be found.
104* = 'V': all eigenvalues in the interval [VL,VU] will be found.
105* = 'I': the IL-th through IU-th eigenvalues will be found.
106*
107* UPLO (global input) CHARACTER*1
108* Specifies whether the upper or lower triangular part of the
109* symmetric matrix A is stored:
110* = 'U': Upper triangular
111* = 'L': Lower triangular
112*
113* N (global input) INTEGER
114* The number of rows and columns of the matrix A. N >= 0.
115*
116* A (local input/workspace) block cyclic DOUBLE PRECISION array,
117* global dimension (N, N),
118* local dimension ( LLD_A, LOCc(JA+N-1) )
119*
120* On entry, the symmetric matrix A. If UPLO = 'U', only the
121* upper triangular part of A is used to define the elements of
122* the symmetric matrix. If UPLO = 'L', only the lower
123* triangular part of A is used to define the elements of the
124* symmetric matrix.
125*
126* On exit, the lower triangle (if UPLO='L') or the upper
127* triangle (if UPLO='U') of A, including the diagonal, is
128* destroyed.
129*
130* IA (global input) INTEGER
131* A's global row index, which points to the beginning of the
132* submatrix which is to be operated on.
133*
134* JA (global input) INTEGER
135* A's global column index, which points to the beginning of
136* the submatrix which is to be operated on.
137*
138* DESCA (global and local input) INTEGER array of dimension DLEN_.
139* The array descriptor for the distributed matrix A.
140* If DESCA( CTXT_ ) is incorrect, PDSYEVX cannot guarantee
141* correct error reporting.
142*
143* VL (global input) DOUBLE PRECISION
144* If RANGE='V', the lower bound of the interval to be searched
145* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
146*
147* VU (global input) DOUBLE PRECISION
148* If RANGE='V', the upper bound of the interval to be searched
149* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
150*
151* IL (global input) INTEGER
152* If RANGE='I', the index (from smallest to largest) of the
153* smallest eigenvalue to be returned. IL >= 1.
154* Not referenced if RANGE = 'A' or 'V'.
155*
156* IU (global input) INTEGER
157* If RANGE='I', the index (from smallest to largest) of the
158* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
159* Not referenced if RANGE = 'A' or 'V'.
160*
161* ABSTOL (global input) DOUBLE PRECISION
162* If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
163* the most orthogonal eigenvectors.
164*
165* The absolute error tolerance for the eigenvalues.
166* An approximate eigenvalue is accepted as converged
167* when it is determined to lie in an interval [a,b]
168* of width less than or equal to
169*
170* ABSTOL + EPS * max( |a|,|b| ) ,
171*
172* where EPS is the machine precision. If ABSTOL is less than
173* or equal to zero, then EPS*norm(T) will be used in its place,
174* where norm(T) is the 1-norm of the tridiagonal matrix
175* obtained by reducing A to tridiagonal form.
176*
177* Eigenvalues will be computed most accurately when ABSTOL is
178* set to twice the underflow threshold 2*PDLAMCH('S') not zero.
179* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
180* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
181* eigenvectors did not converge, try setting ABSTOL to
182* 2*PDLAMCH('S').
183*
184* See "Computing Small Singular Values of Bidiagonal Matrices
185* with Guaranteed High Relative Accuracy," by Demmel and
186* Kahan, LAPACK Working Note #3.
187*
188* See "On the correctness of Parallel Bisection in Floating
189* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
190*
191* M (global output) INTEGER
192* Total number of eigenvalues found. 0 <= M <= N.
193*
194* NZ (global output) INTEGER
195* Total number of eigenvectors computed. 0 <= NZ <= M.
196* The number of columns of Z that are filled.
197* If JOBZ .NE. 'V', NZ is not referenced.
198* If JOBZ .EQ. 'V', NZ = M unless the user supplies
199* insufficient space and PDSYEVX is not able to detect this
200* before beginning computation. To get all the eigenvectors
201* requested, the user must supply both sufficient
202* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
203* and sufficient workspace to compute them. (See LWORK below.)
204* PDSYEVX is always able to detect insufficient space without
205* computation unless RANGE .EQ. 'V'.
206*
207* W (global output) DOUBLE PRECISION array, dimension (N)
208* On normal exit, the first M entries contain the selected
209* eigenvalues in ascending order.
210*
211* ORFAC (global input) DOUBLE PRECISION
212* Specifies which eigenvectors should be reorthogonalized.
213* Eigenvectors that correspond to eigenvalues which are within
214* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
215* However, if the workspace is insufficient (see LWORK),
216* tol may be decreased until all eigenvectors to be
217* reorthogonalized can be stored in one process.
218* No reorthogonalization will be done if ORFAC equals zero.
219* A default value of 10^-3 is used if ORFAC is negative.
220* ORFAC should be identical on all processes.
221*
222* Z (local output) DOUBLE PRECISION array,
223* global dimension (N, N),
224* local dimension ( LLD_Z, LOCc(JZ+N-1) )
225* If JOBZ = 'V', then on normal exit the first M columns of Z
226* contain the orthonormal eigenvectors of the matrix
227* corresponding to the selected eigenvalues. If an eigenvector
228* fails to converge, then that column of Z contains the latest
229* approximation to the eigenvector, and the index of the
230* eigenvector is returned in IFAIL.
231* If JOBZ = 'N', then Z is not referenced.
232*
233* IZ (global input) INTEGER
234* Z's global row index, which points to the beginning of the
235* submatrix which is to be operated on.
236*
237* JZ (global input) INTEGER
238* Z's global column index, which points to the beginning of
239* the submatrix which is to be operated on.
240*
241* DESCZ (global and local input) INTEGER array of dimension DLEN_.
242* The array descriptor for the distributed matrix Z.
243* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
244*
245* WORK (local workspace/output) DOUBLE PRECISION array,
246* dimension max(3,LWORK)
247* On return, WORK(1) contains the optimal amount of
248* workspace required for efficient execution.
249* if JOBZ='N' WORK(1) = optimal amount of workspace
250* required to compute eigenvalues efficiently
251* if JOBZ='V' WORK(1) = optimal amount of workspace
252* required to compute eigenvalues and eigenvectors
253* efficiently with no guarantee on orthogonality.
254* If RANGE='V', it is assumed that all eigenvectors
255* may be required.
256*
257* LWORK (local input) INTEGER
258* Size of WORK
259* See below for definitions of variables used to define LWORK.
260* If no eigenvectors are requested (JOBZ = 'N') then
261* LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
262* If eigenvectors are requested (JOBZ = 'V' ) then
263* the amount of workspace required to guarantee that all
264* eigenvectors are computed is:
265* LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
266* ICEIL( NEIG, NPROW*NPCOL)*NN
267*
268* The computed eigenvectors may not be orthogonal if the
269* minimal workspace is supplied and ORFAC is too small.
270* If you want to guarantee orthogonality (at the cost
271* of potentially poor performance) you should add
272* the following to LWORK:
273* (CLUSTERSIZE-1)*N
274* where CLUSTERSIZE is the number of eigenvalues in the
275* largest cluster, where a cluster is defined as a set of
276* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
277* W(J+1) <= W(J) + ORFAC*2*norm(A) }
278* Variable definitions:
279* NEIG = number of eigenvectors requested
280* NB = DESCA( MB_ ) = DESCA( NB_ ) =
281* DESCZ( MB_ ) = DESCZ( NB_ )
282* NN = MAX( N, NB, 2 )
283* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
284* DESCZ( CSRC_ ) = 0
285* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
286* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
287* ICEIL( X, Y ) is a ScaLAPACK function returning
288* ceiling(X/Y)
289*
290* When LWORK is too small:
291* If LWORK is too small to guarantee orthogonality,
292* PDSYEVX attempts to maintain orthogonality in
293* the clusters with the smallest
294* spacing between the eigenvalues.
295* If LWORK is too small to compute all the eigenvectors
296* requested, no computation is performed and INFO=-23
297* is returned. Note that when RANGE='V', PDSYEVX does
298* not know how many eigenvectors are requested until
299* the eigenvalues are computed. Therefore, when RANGE='V'
300* and as long as LWORK is large enough to allow PDSYEVX to
301* compute the eigenvalues, PDSYEVX will compute the
302* eigenvalues and as many eigenvectors as it can.
303*
304* Relationship between workspace, orthogonality & performance:
305* Greater performance can be achieved if adequate workspace
306* is provided. On the other hand, in some situations,
307* performance can decrease as the workspace provided
308* increases above the workspace amount shown below:
309*
310* For optimal performance, greater workspace may be
311* needed, i.e.
312* LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT )
313* Where:
314* LWORK, as defined previously, depends upon the number
315* of eigenvectors requested, and
316* NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
317* ( NPS + 3 ) * NPS
318*
319* ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L',
320* 0, 0, 0, 0)
321* SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
322* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
323*
324* NUMROC is a ScaLAPACK tool functions;
325* PJLAENV is a ScaLAPACK envionmental inquiry function
326* MYROW, MYCOL, NPROW and NPCOL can be determined by
327* calling the subroutine BLACS_GRIDINFO.
328*
329* For large N, no extra workspace is needed, however the
330* biggest boost in performance comes for small N, so it
331* is wise to provide the extra workspace (typically less
332* than a Megabyte per process).
333*
334* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
335* enough space to compute all the eigenvectors
336* orthogonally will cause serious degradation in
337* performance. In the limit (i.e. CLUSTERSIZE = N-1)
338* PDSTEIN will perform no better than DSTEIN on 1
339* processor.
340* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
341* all eigenvectors will increase the total execution time
342* by a factor of 2 or more.
343* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
344* grow as the square of the cluster size, all other factors
345* remaining equal and assuming enough workspace. Less
346* workspace means less reorthogonalization but faster
347* execution.
348*
349* If LWORK = -1, then LWORK is global input and a workspace
350* query is assumed; the routine only calculates the size
351* required for optimal performance for all work arrays. Each of
352* these values is returned in the first entry of the
353* corresponding work arrays, and no error message is issued by
354* PXERBLA.
355*
356* IWORK (local workspace) INTEGER array
357* On return, IWORK(1) contains the amount of integer workspace
358* required.
359*
360* LIWORK (local input) INTEGER
361* size of IWORK
362* LIWORK >= 6 * NNP
363* Where:
364* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
365* If LIWORK = -1, then LIWORK is global input and a workspace
366* query is assumed; the routine only calculates the minimum
367* and optimal size for all work arrays. Each of these
368* values is returned in the first entry of the corresponding
369* work array, and no error message is issued by PXERBLA.
370*
371* IFAIL (global output) INTEGER array, dimension (N)
372* If JOBZ = 'V', then on normal exit, the first M elements of
373* IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
374* IFAIL contains the
375* indices of the eigenvectors that failed to converge.
376* If JOBZ = 'N', then IFAIL is not referenced.
377*
378* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
379* This array contains indices of eigenvectors corresponding to
380* a cluster of eigenvalues that could not be reorthogonalized
381* due to insufficient workspace (see LWORK, ORFAC and INFO).
382* Eigenvectors corresponding to clusters of eigenvalues indexed
383* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
384* reorthogonalized due to lack of workspace. Hence the
385* eigenvectors corresponding to these clusters may not be
386* orthogonal. ICLUSTR() is a zero terminated array.
387* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
388* K is the number of clusters
389* ICLUSTR is not referenced if JOBZ = 'N'
390*
391* GAP (global output) DOUBLE PRECISION array,
392* dimension (NPROW*NPCOL)
393* This array contains the gap between eigenvalues whose
394* eigenvectors could not be reorthogonalized. The output
395* values in this array correspond to the clusters indicated
396* by the array ICLUSTR. As a result, the dot product between
397* eigenvectors correspoding to the I^th cluster may be as high
398* as ( C * n ) / GAP(I) where C is a small constant.
399*
400* INFO (global output) INTEGER
401* = 0: successful exit
402* < 0: If the i-th argument is an array and the j-entry had
403* an illegal value, then INFO = -(i*100+j), if the i-th
404* argument is a scalar and had an illegal value, then
405* INFO = -i.
406* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
407* failed to converge. Their indices are stored
408* in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
409* Send e-mail to scalapack@cs.utk.edu
410* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
411* to one or more clusters of eigenvalues could not be
412* reorthogonalized because of insufficient workspace.
413* The indices of the clusters are stored in the array
414* ICLUSTR.
415* if (MOD(INFO/4,2).NE.0), then space limit prevented
416* PDSYEVX from computing all of the eigenvectors
417* between VL and VU. The number of eigenvectors
418* computed is returned in NZ.
419* if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to compute
420* eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
421* Send e-mail to scalapack@cs.utk.edu
422*
423* Alignment requirements
424* ======================
425*
426* The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
427* must verify some alignment properties, namely the following
428* expressions should be true:
429*
430* ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
431* IAROW.EQ.IZROW )
432* where
433* IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
434*
435* =====================================================================
436*
437* Differences between PDSYEVX and DSYEVX
438* ======================================
439*
440* A, LDA -> A, IA, JA, DESCA
441* Z, LDZ -> Z, IZ, JZ, DESCZ
442* WORKSPACE needs are larger for PDSYEVX.
443* LIWORK parameter added
444*
445* ORFAC, ICLUSTER() and GAP() parameters added
446* meaning of INFO is changed
447*
448* Functional differences:
449* PDSYEVX does not promise orthogonality for eigenvectors associated
450* with tighly clustered eigenvalues.
451* PDSYEVX does not reorthogonalize eigenvectors
452* that are on different processes. The extent of reorthogonalization
453* is controlled by the input parameter LWORK.
454*
455* Version 1.4 limitations:
456* DESCA(MB_) = DESCA(NB_)
457* DESCA(M_) = DESCZ(M_)
458* DESCA(N_) = DESCZ(N_)
459* DESCA(MB_) = DESCZ(MB_)
460* DESCA(NB_) = DESCZ(NB_)
461* DESCA(RSRC_) = DESCZ(RSRC_)
462*
463* .. Parameters ..
464 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
465 $ MB_, NB_, RSRC_, CSRC_, LLD_
466 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
467 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
468 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
469 DOUBLE PRECISION ZERO, ONE, TEN, FIVE
470 parameter( zero = 0.0d+0, one = 1.0d+0, ten = 10.0d+0,
471 $ five = 5.0d+0 )
472 INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
473 parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
474 $ ierrebz = 8 )
475* ..
476* .. Local Scalars ..
477 LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
478 $ VALEIG, WANTZ
479 CHARACTER ORDER
480 INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
481 $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
482 $ INDTAU, INDWORK, IROFFA, IROFFZ, ISCALE,
483 $ ISIZESTEBZ, ISIZESTEIN, IZROW, LALLWORK,
484 $ LIWMIN, LLWORK, LWMIN, LWOPT, MAXEIGS, MB_A,
485 $ MQ0, MYCOL, MYROW, NB, NB_A, NEIG, NN, NNP,
486 $ NP0, NPCOL, NPROCS, NPROW, NPS, NSPLIT,
487 $ NSYTRD_LWOPT, NZZ, OFFSET, RSRC_A, RSRC_Z,
488 $ SIZEORMTR, SIZESTEIN, SIZESYEVX, SQNPC
489 DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
490 $ SIGMA, SMLNUM, VLL, VUU
491* ..
492* .. Local Arrays ..
493 INTEGER IDUM1( 4 ), IDUM2( 4 )
494* ..
495* .. External Functions ..
496 LOGICAL LSAME
497 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
498 DOUBLE PRECISION PDLAMCH, PDLANSY
499 EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
501* ..
502* .. External Subroutines ..
503 EXTERNAL blacs_gridinfo, chk1mat, dgebr2d, dgebs2d,
504 $ dlasrt, dscal, igamn2d, pchk1mat, pchk2mat,
507* ..
508* .. Intrinsic Functions ..
509 INTRINSIC abs, dble, ichar, int, max, min, mod, sqrt
510* ..
511* .. Executable Statements ..
512* This is just to keep ftnchek and toolpack/1 happy
513 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
514 $ rsrc_.LT.0 )RETURN
515*
516 quickreturn = ( n.EQ.0 )
517*
518* Test the input arguments.
519*
520 ictxt = desca( ctxt_ )
521 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
522 info = 0
523*
524 wantz = lsame( jobz, 'V' )
525 IF( nprow.EQ.-1 ) THEN
526 info = -( 800+ctxt_ )
527 ELSE IF( wantz ) THEN
528 IF( ictxt.NE.descz( ctxt_ ) ) THEN
529 info = -( 2100+ctxt_ )
530 END IF
531 END IF
532 IF( info.EQ.0 ) THEN
533 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
534 IF( wantz )
535 $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
536*
537 IF( info.EQ.0 ) THEN
538*
539* Get machine constants.
540*
541 safmin = pdlamch( ictxt, 'Safe minimum' )
542 eps = pdlamch( ictxt, 'Precision' )
543 smlnum = safmin / eps
544 bignum = one / smlnum
545 rmin = sqrt( smlnum )
546 rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
547*
548 nprocs = nprow*npcol
549 lower = lsame( uplo, 'L' )
550 alleig = lsame( range, 'A' )
551 valeig = lsame( range, 'V' )
552 indeig = lsame( range, 'I' )
553*
554* Set up pointers into the WORK array
555*
556 indtau = 1
557 inde = indtau + n
558 indd = inde + n
559 indd2 = indd + n
560 inde2 = indd2 + n
561 indwork = inde2 + n
562 llwork = lwork - indwork + 1
563*
564* Set up pointers into the IWORK array
565*
566 isizestein = 3*n + nprocs + 1
567 isizestebz = max( 4*n, 14, nprocs )
568 indibl = ( max( isizestein, isizestebz ) ) + 1
569 indisp = indibl + n
570*
571* Compute the total amount of space needed
572*
573 lquery = .false.
574 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
575 $ lquery = .true.
576*
577 nnp = max( n, nprocs+1, 4 )
578 liwmin = 6*nnp
579*
580 nprocs = nprow*npcol
581 nb_a = desca( nb_ )
582 mb_a = desca( mb_ )
583 nb = nb_a
584 nn = max( n, nb, 2 )
585*
586 rsrc_a = desca( rsrc_ )
587 csrc_a = desca( csrc_ )
588 iroffa = mod( ia-1, mb_a )
589 icoffa = mod( ja-1, nb_a )
590 iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
591 np0 = numroc( n+iroffa, nb, 0, 0, nprow )
592 mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
593 IF( wantz ) THEN
594 rsrc_z = descz( rsrc_ )
595 iroffz = mod( iz-1, mb_a )
596 izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
597 ELSE
598 iroffz = 0
599 izrow = 0
600 END IF
601*
602 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
603 $ THEN
604 lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
605 IF( wantz ) THEN
606 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
607 lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
608 ELSE
609 lwopt = lwmin
610 END IF
611 neig = 0
612 ELSE
613 IF( alleig .OR. valeig ) THEN
614 neig = n
615 ELSE IF( indeig ) THEN
616 neig = iu - il + 1
617 END IF
618 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
619 lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
620 $ iceil( neig, nprow*npcol )*nn
621 lwopt = lwmin
622*
623 END IF
624*
625* Compute how much workspace is needed to use the
626* new TRD code
627*
628 anb = pjlaenv( ictxt, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
629 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
630 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
631 nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
632 lwopt = max( lwopt, 5*n+nsytrd_lwopt )
633*
634 END IF
635 IF( info.EQ.0 ) THEN
636 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
637 work( 1 ) = abstol
638 IF( valeig ) THEN
639 work( 2 ) = vl
640 work( 3 ) = vu
641 ELSE
642 work( 2 ) = zero
643 work( 3 ) = zero
644 END IF
645 CALL dgebs2d( ictxt, 'ALL', ' ', 3, 1, work, 3 )
646 ELSE
647 CALL dgebr2d( ictxt, 'ALL', ' ', 3, 1, work, 3, 0, 0 )
648 END IF
649 IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
650 info = -1
651 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
652 info = -2
653 ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
654 info = -3
655 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
656 info = -10
657 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
658 $ THEN
659 info = -11
660 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
661 $ THEN
662 info = -12
663 ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
664 info = -23
665 ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
666 info = -25
667 ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
668 $ abs( vl ) ) ) THEN
669 info = -9
670 ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
671 $ abs( vu ) ) ) THEN
672 info = -10
673 ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
674 $ THEN
675 info = -13
676 ELSE IF( iroffa.NE.0 ) THEN
677 info = -6
678 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
679 info = -( 800+nb_ )
680 END IF
681 IF( wantz ) THEN
682 IF( iroffa.NE.iroffz ) THEN
683 info = -19
684 ELSE IF( iarow.NE.izrow ) THEN
685 info = -19
686 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
687 info = -( 2100+m_ )
688 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
689 info = -( 2100+n_ )
690 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
691 info = -( 2100+mb_ )
692 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
693 info = -( 2100+nb_ )
694 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
695 info = -( 2100+rsrc_ )
696 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
697 info = -( 2100+csrc_ )
698 ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
699 info = -( 2100+ctxt_ )
700 END IF
701 END IF
702 END IF
703 IF( wantz ) THEN
704 idum1( 1 ) = ichar( 'V' )
705 ELSE
706 idum1( 1 ) = ichar( 'N' )
707 END IF
708 idum2( 1 ) = 1
709 IF( lower ) THEN
710 idum1( 2 ) = ichar( 'L' )
711 ELSE
712 idum1( 2 ) = ichar( 'U' )
713 END IF
714 idum2( 2 ) = 2
715 IF( alleig ) THEN
716 idum1( 3 ) = ichar( 'A' )
717 ELSE IF( indeig ) THEN
718 idum1( 3 ) = ichar( 'I' )
719 ELSE
720 idum1( 3 ) = ichar( 'V' )
721 END IF
722 idum2( 3 ) = 3
723 IF( lquery ) THEN
724 idum1( 4 ) = -1
725 ELSE
726 idum1( 4 ) = 1
727 END IF
728 idum2( 4 ) = 4
729 IF( wantz ) THEN
730 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
731 $ jz, descz, 21, 4, idum1, idum2, info )
732 ELSE
733 CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
734 $ idum2, info )
735 END IF
736 work( 1 ) = dble( lwopt )
737 iwork( 1 ) = liwmin
738 END IF
739*
740 IF( info.NE.0 ) THEN
741 CALL pxerbla( ictxt, 'PDSYEVX', -info )
742 RETURN
743 ELSE IF( lquery ) THEN
744 RETURN
745 END IF
746*
747* Quick return if possible
748*
749 IF( quickreturn ) THEN
750 IF( wantz ) THEN
751 nz = 0
752 iclustr( 1 ) = 0
753 END IF
754 m = 0
755 work( 1 ) = dble( lwopt )
756 iwork( 1 ) = liwmin
757 RETURN
758 END IF
759*
760* Scale matrix to allowable range, if necessary.
761*
762 abstll = abstol
763 iscale = 0
764 IF( valeig ) THEN
765 vll = vl
766 vuu = vu
767 ELSE
768 vll = zero
769 vuu = zero
770 END IF
771*
772 anrm = pdlansy( 'M', uplo, n, a, ia, ja, desca, work( indwork ) )
773*
774 IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
775 iscale = 1
776 sigma = rmin / anrm
777 anrm = anrm*sigma
778 ELSE IF( anrm.GT.rmax ) THEN
779 iscale = 1
780 sigma = rmax / anrm
781 anrm = anrm*sigma
782 END IF
783*
784 IF( iscale.EQ.1 ) THEN
785 CALL pdlascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
786 IF( abstol.GT.0 )
787 $ abstll = abstol*sigma
788 IF( valeig ) THEN
789 vll = vl*sigma
790 vuu = vu*sigma
791 IF( vuu.EQ.vll ) THEN
792 vuu = vuu + 2*max( abs( vuu )*eps, safmin )
793 END IF
794 END IF
795 END IF
796*
797* Call PDSYNTRD to reduce symmetric matrix to tridiagonal form.
798*
799 lallwork = llwork
800*
801 CALL pdsyntrd( uplo, n, a, ia, ja, desca, work( indd ),
802 $ work( inde ), work( indtau ), work( indwork ),
803 $ llwork, iinfo )
804*
805*
806* Copy the values of D, E to all processes
807*
808* Here PxLARED1D is used to redistribute the tridiagonal matrix.
809* PxLARED1D, however, doesn't yet work with arbritary matrix
810* distributions so we have PxELGET as a backup.
811*
812 offset = 0
813 IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
814 $ THEN
815 CALL pdlared1d( n, ia, ja, desca, work( indd ), work( indd2 ),
816 $ work( indwork ), llwork )
817*
818 CALL pdlared1d( n, ia, ja, desca, work( inde ), work( inde2 ),
819 $ work( indwork ), llwork )
820 IF( .NOT.lower )
821 $ offset = 1
822 ELSE
823 DO 10 i = 1, n
824 CALL pdelget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
825 $ i+ja-1, desca )
826 10 CONTINUE
827 IF( lsame( uplo, 'U' ) ) THEN
828 DO 20 i = 1, n - 1
829 CALL pdelget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
830 $ i+ja, desca )
831 20 CONTINUE
832 ELSE
833 DO 30 i = 1, n - 1
834 CALL pdelget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
835 $ i+ja-1, desca )
836 30 CONTINUE
837 END IF
838 END IF
839*
840* Call PDSTEBZ and, if eigenvectors are desired, PDSTEIN.
841*
842 IF( wantz ) THEN
843 order = 'B'
844 ELSE
845 order = 'E'
846 END IF
847*
848 CALL pdstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
849 $ work( indd2 ), work( inde2+offset ), m, nsplit, w,
850 $ iwork( indibl ), iwork( indisp ), work( indwork ),
851 $ llwork, iwork( 1 ), isizestebz, iinfo )
852*
853*
854* IF PDSTEBZ fails, the error propogates to INFO, but
855* we do not propogate the eigenvalue(s) which failed because:
856* 1) This should never happen if the user specifies
857* ABSTOL = 2 * PDLAMCH( 'U' )
858* 2) PDSTEIN will confirm/deny whether the eigenvalues are
859* close enough.
860*
861 IF( iinfo.NE.0 ) THEN
862 info = info + ierrebz
863 DO 40 i = 1, m
864 iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
865 40 CONTINUE
866 END IF
867 IF( wantz ) THEN
868*
869 IF( valeig ) THEN
870*
871* Compute the maximum number of eigenvalues that we can
872* compute in the
873* workspace that we have, and that we can store in Z.
874*
875* Loop through the possibilities looking for the largest
876* NZ that we can feed to PDSTEIN and PDORMTR
877*
878* Since all processes must end up with the same value
879* of NZ, we first compute the minimum of LALLWORK
880*
881 CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
882 $ -1, -1 )
883*
884 maxeigs = descz( n_ )
885*
886 DO 50 nz = min( maxeigs, m ), 0, -1
887 mq0 = numroc( nz, nb, 0, 0, npcol )
888 sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
889 sizeormtr = max( ( nb*( nb-1 ) ) / 2, ( mq0+np0 )*nb ) +
890 $ nb*nb
891*
892 sizesyevx = max( sizestein, sizeormtr )
893 IF( sizesyevx.LE.lallwork )
894 $ GO TO 60
895 50 CONTINUE
896 60 CONTINUE
897 ELSE
898 nz = m
899 END IF
900 nz = max( nz, 0 )
901 IF( nz.NE.m ) THEN
902 info = info + ierrspc
903*
904 DO 70 i = 1, m
905 ifail( i ) = 0
906 70 CONTINUE
907*
908* The following code handles a rare special case
909* - NZ .NE. M means that we don't have enough room to store
910* all the vectors.
911* - NSPLIT .GT. 1 means that the matrix split
912* In this case, we cannot simply take the first NZ eigenvalues
913* because PDSTEBZ sorts the eigenvalues by block when
914* a split occurs. So, we have to make another call to
915* PDSTEBZ with a new upper limit - VUU.
916*
917 IF( nsplit.GT.1 ) THEN
918 CALL dlasrt( 'I', m, w, iinfo )
919 nzz = 0
920 IF( nz.GT.0 ) THEN
921*
922 vuu = w( nz ) - ten*( eps*anrm+safmin )
923 IF( vll.GE.vuu ) THEN
924 nzz = 0
925 ELSE
926 CALL pdstebz( ictxt, range, order, n, vll, vuu, il,
927 $ iu, abstll, work( indd2 ),
928 $ work( inde2+offset ), nzz, nsplit, w,
929 $ iwork( indibl ), iwork( indisp ),
930 $ work( indwork ), llwork, iwork( 1 ),
931 $ isizestebz, iinfo )
932 END IF
933*
934 IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
935 IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
936 info = info + ierrebz
937 END IF
938 END IF
939 END IF
940 nz = min( nz, nzz )
941*
942 END IF
943 END IF
944 CALL pdstein( n, work( indd2 ), work( inde2+offset ), nz, w,
945 $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
946 $ jz, descz, work( indwork ), lallwork, iwork( 1 ),
947 $ isizestein, ifail, iclustr, gap, iinfo )
948*
949 IF( iinfo.GE.nz+1 )
950 $ info = info + ierrcls
951 IF( mod( iinfo, nz+1 ).NE.0 )
952 $ info = info + ierrein
953*
954* Z = Q * Z
955*
956*
957 IF( nz.GT.0 ) THEN
958 CALL pdormtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
959 $ work( indtau ), z, iz, jz, descz,
960 $ work( indwork ), llwork, iinfo )
961 END IF
962*
963 END IF
964*
965* If matrix was scaled, then rescale eigenvalues appropriately.
966*
967 IF( iscale.EQ.1 ) THEN
968 CALL dscal( m, one / sigma, w, 1 )
969 END IF
970*
971 work( 1 ) = dble( lwopt )
972 iwork( 1 ) = liwmin
973*
974 RETURN
975*
976* End of PDSYEVX
977*
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine pdelget(scope, top, alpha, a, ia, ja, desca)
Definition pdelget.f:2
double precision function pdlansy(norm, uplo, n, a, ia, ja, desca, work)
Definition pdlansy.f:3
subroutine pdlared1d(n, ia, ja, desc, bycol, byall, work, lwork)
Definition pdlared1d.f:2
subroutine pdlascl(type, cfrom, cto, m, n, a, ia, ja, desca, info)
Definition pdlascl.f:3
subroutine pdormtr(side, uplo, trans, m, n, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition pdormtr.f:3
subroutine pdstebz(ictxt, range, order, n, vl, vu, il, iu, abstol, d, e, m, nsplit, w, iblock, isplit, work, lwork, iwork, liwork, info)
Definition pdstebz.f:4
subroutine pdstein(n, d, e, m, w, iblock, isplit, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pdstein.f:4
subroutine pdsyntrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pdsyntrd.f:3
integer function pjlaenv(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pjlaenv.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: