SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pzheevx()

subroutine pzheevx ( character  jobz,
character  range,
character  uplo,
integer  n,
complex*16, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
double precision  vl,
double precision  vu,
integer  il,
integer  iu,
double precision  abstol,
integer  m,
integer  nz,
double precision, dimension( * )  w,
double precision  orfac,
complex*16, dimension( * )  z,
integer  iz,
integer  jz,
integer, dimension( * )  descz,
complex*16, dimension( * )  work,
integer  lwork,
double precision, dimension( * )  rwork,
integer  lrwork,
integer, dimension( * )  iwork,
integer  liwork,
integer, dimension( * )  ifail,
integer, dimension( * )  iclustr,
double precision, dimension( * )  gap,
integer  info 
)

Definition at line 1 of file pzheevx.f.

5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* May 25, 2001
10*
11* .. Scalar Arguments ..
12 CHARACTER JOBZ, RANGE, UPLO
13 INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LRWORK,
14 $ LWORK, M, N, NZ
15 DOUBLE PRECISION ABSTOL, ORFAC, VL, VU
16* ..
17* .. Array Arguments ..
18 INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19 $ IFAIL( * ), IWORK( * )
20 DOUBLE PRECISION GAP( * ), RWORK( * ), W( * )
21 COMPLEX*16 A( * ), WORK( * ), Z( * )
22* ..
23*
24* Purpose
25* =======
26*
27* PZHEEVX computes selected eigenvalues and, optionally, eigenvectors
28* of a complex hermitian matrix A by calling the recommended sequence
29* of ScaLAPACK routines. Eigenvalues/vectors can be selected by
30* specifying a range of values or a range of indices for the desired
31* eigenvalues.
32*
33* Notes
34* =====
35*
36* Each global data object is described by an associated description
37* vector. This vector stores the information required to establish
38* the mapping between an object element and its corresponding process
39* and memory location.
40*
41* Let A be a generic term for any 2D block cyclicly distributed array.
42* Such a global array has an associated description vector DESCA.
43* In the following comments, the character _ should be read as
44* "of the global array".
45*
46* NOTATION STORED IN EXPLANATION
47* --------------- -------------- --------------------------------------
48* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49* DTYPE_A = 1.
50* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51* the BLACS process grid A is distribu-
52* ted over. The context itself is glo-
53* bal, but the handle (the integer
54* value) may vary.
55* M_A (global) DESCA( M_ ) The number of rows in the global
56* array A.
57* N_A (global) DESCA( N_ ) The number of columns in the global
58* array A.
59* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60* the rows of the array.
61* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62* the columns of the array.
63* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64* row of the array A is distributed.
65* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66* first column of the array A is
67* distributed.
68* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69* array. LLD_A >= MAX(1,LOCr(M_A)).
70*
71* Let K be the number of rows or columns of a distributed matrix,
72* and assume that its process grid has dimension p x q.
73* LOCr( K ) denotes the number of elements of K that a process
74* would receive if K were distributed over the p processes of its
75* process column.
76* Similarly, LOCc( K ) denotes the number of elements of K that a
77* process would receive if K were distributed over the q processes of
78* its process row.
79* The values of LOCr() and LOCc() may be determined via a call to the
80* ScaLAPACK tool function, NUMROC:
81* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83* An upper bound for these quantities may be computed by:
84* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86*
87* PZHEEVX assumes IEEE 754 standard compliant arithmetic. To port
88* to a system which does not have IEEE 754 arithmetic, modify
89* the appropriate SLmake.inc file to include the compiler switch
90* -DNO_IEEE. This switch only affects the compilation of pdlaiect.c.
91*
92* Arguments
93* =========
94*
95* NP = the number of rows local to a given process.
96* NQ = the number of columns local to a given process.
97*
98* JOBZ (global input) CHARACTER*1
99* Specifies whether or not to compute the eigenvectors:
100* = 'N': Compute eigenvalues only.
101* = 'V': Compute eigenvalues and eigenvectors.
102*
103* RANGE (global input) CHARACTER*1
104* = 'A': all eigenvalues will be found.
105* = 'V': all eigenvalues in the interval [VL,VU] will be found.
106* = 'I': the IL-th through IU-th eigenvalues will be found.
107*
108* UPLO (global input) CHARACTER*1
109* Specifies whether the upper or lower triangular part of the
110* Hermitian matrix A is stored:
111* = 'U': Upper triangular
112* = 'L': Lower triangular
113*
114* N (global input) INTEGER
115* The number of rows and columns of the matrix A. N >= 0.
116*
117* A (local input/workspace) block cyclic COMPLEX*16 array,
118* global dimension (N, N),
119* local dimension ( LLD_A, LOCc(JA+N-1) )
120*
121* On entry, the Hermitian matrix A. If UPLO = 'U', only the
122* upper triangular part of A is used to define the elements of
123* the Hermitian matrix. If UPLO = 'L', only the lower
124* triangular part of A is used to define the elements of the
125* Hermitian matrix.
126*
127* On exit, the lower triangle (if UPLO='L') or the upper
128* triangle (if UPLO='U') of A, including the diagonal, is
129* destroyed.
130*
131* IA (global input) INTEGER
132* A's global row index, which points to the beginning of the
133* submatrix which is to be operated on.
134*
135* JA (global input) INTEGER
136* A's global column index, which points to the beginning of
137* the submatrix which is to be operated on.
138*
139* DESCA (global and local input) INTEGER array of dimension DLEN_.
140* The array descriptor for the distributed matrix A.
141* If DESCA( CTXT_ ) is incorrect, PZHEEVX cannot guarantee
142* correct error reporting.
143*
144* VL (global input) DOUBLE PRECISION
145* If RANGE='V', the lower bound of the interval to be searched
146* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
147*
148* VU (global input) DOUBLE PRECISION
149* If RANGE='V', the upper bound of the interval to be searched
150* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
151*
152* IL (global input) INTEGER
153* If RANGE='I', the index (from smallest to largest) of the
154* smallest eigenvalue to be returned. IL >= 1.
155* Not referenced if RANGE = 'A' or 'V'.
156*
157* IU (global input) INTEGER
158* If RANGE='I', the index (from smallest to largest) of the
159* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
160* Not referenced if RANGE = 'A' or 'V'.
161*
162* ABSTOL (global input) DOUBLE PRECISION
163* If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
164* the most orthogonal eigenvectors.
165*
166* The absolute error tolerance for the eigenvalues.
167* An approximate eigenvalue is accepted as converged
168* when it is determined to lie in an interval [a,b]
169* of width less than or equal to
170*
171* ABSTOL + EPS * max( |a|,|b| ) ,
172*
173* where EPS is the machine precision. If ABSTOL is less than
174* or equal to zero, then EPS*norm(T) will be used in its place,
175* where norm(T) is the 1-norm of the tridiagonal matrix
176* obtained by reducing A to tridiagonal form.
177*
178* Eigenvalues will be computed most accurately when ABSTOL is
179* set to twice the underflow threshold 2*PDLAMCH('S') not zero.
180* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
181* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
182* eigenvectors did not converge, try setting ABSTOL to
183* 2*PDLAMCH('S').
184*
185* See "Computing Small Singular Values of Bidiagonal Matrices
186* with Guaranteed High Relative Accuracy," by Demmel and
187* Kahan, LAPACK Working Note #3.
188*
189* See "On the correctness of Parallel Bisection in Floating
190* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
191*
192* M (global output) INTEGER
193* Total number of eigenvalues found. 0 <= M <= N.
194*
195* NZ (global output) INTEGER
196* Total number of eigenvectors computed. 0 <= NZ <= M.
197* The number of columns of Z that are filled.
198* If JOBZ .NE. 'V', NZ is not referenced.
199* If JOBZ .EQ. 'V', NZ = M unless the user supplies
200* insufficient space and PZHEEVX is not able to detect this
201* before beginning computation. To get all the eigenvectors
202* requested, the user must supply both sufficient
203* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
204* and sufficient workspace to compute them. (See LWORK below.)
205* PZHEEVX is always able to detect insufficient space without
206* computation unless RANGE .EQ. 'V'.
207*
208* W (global output) DOUBLE PRECISION array, dimension (N)
209* On normal exit, the first M entries contain the selected
210* eigenvalues in ascending order.
211*
212* ORFAC (global input) DOUBLE PRECISION
213* Specifies which eigenvectors should be reorthogonalized.
214* Eigenvectors that correspond to eigenvalues which are within
215* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
216* However, if the workspace is insufficient (see LWORK),
217* tol may be decreased until all eigenvectors to be
218* reorthogonalized can be stored in one process.
219* No reorthogonalization will be done if ORFAC equals zero.
220* A default value of 10^-3 is used if ORFAC is negative.
221* ORFAC should be identical on all processes.
222*
223* Z (local output) COMPLEX*16 array,
224* global dimension (N, N),
225* local dimension ( LLD_Z, LOCc(JZ+N-1) )
226* If JOBZ = 'V', then on normal exit the first M columns of Z
227* contain the orthonormal eigenvectors of the matrix
228* corresponding to the selected eigenvalues. If an eigenvector
229* fails to converge, then that column of Z contains the latest
230* approximation to the eigenvector, and the index of the
231* eigenvector is returned in IFAIL.
232* If JOBZ = 'N', then Z is not referenced.
233*
234* IZ (global input) INTEGER
235* Z's global row index, which points to the beginning of the
236* submatrix which is to be operated on.
237*
238* JZ (global input) INTEGER
239* Z's global column index, which points to the beginning of
240* the submatrix which is to be operated on.
241*
242* DESCZ (global and local input) INTEGER array of dimension DLEN_.
243* The array descriptor for the distributed matrix Z.
244* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
245*
246* WORK (local workspace/output) COMPLEX*16 array,
247* dimension (LWORK)
248* WORK(1) returns workspace adequate workspace to allow
249* optimal performance.
250*
251* LWORK (local input) INTEGER
252* Size of WORK array. If only eigenvalues are requested:
253* LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 )
254* If eigenvectors are requested:
255* LWORK >= N + ( NP0 + MQ0 + NB ) * NB
256* with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ).
257*
258* For optimal performance, greater workspace is needed, i.e.
259* LWORK >= MAX( LWORK, NHETRD_LWORK )
260* Where LWORK is as defined above, and
261* NHETRD_LWORK = N + 2*( ANB+1 )*( 4*NPS+2 ) +
262* ( NPS + 1 ) * NPS
263*
264* ICTXT = DESCA( CTXT_ )
265* ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
266* SQNPC = SQRT( DBLE( NPROW * NPCOL ) )
267* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
268*
269* NUMROC is a ScaLAPACK tool functions;
270* PJLAENV is a ScaLAPACK envionmental inquiry function
271* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
272* the subroutine BLACS_GRIDINFO.
273*
274* If LWORK = -1, then LWORK is global input and a workspace
275* query is assumed; the routine only calculates the
276* optimal size for all work arrays. Each of these
277* values is returned in the first entry of the corresponding
278* work array, and no error message is issued by PXERBLA.
279*
280* RWORK (local workspace/output) DOUBLE PRECISION array,
281* dimension max(3,LRWORK)
282* On return, WORK(1) contains the optimal amount of
283* workspace required for efficient execution.
284* if JOBZ='N' RWORK(1) = optimal amount of workspace
285* required to compute eigenvalues efficiently
286* if JOBZ='V' RWORK(1) = optimal amount of workspace
287* required to compute eigenvalues and eigenvectors
288* efficiently with no guarantee on orthogonality.
289* If RANGE='V', it is assumed that all eigenvectors
290* may be required.
291*
292* LRWORK (local input) INTEGER
293* Size of RWORK
294* See below for definitions of variables used to define LRWORK.
295* If no eigenvectors are requested (JOBZ = 'N') then
296* LRWORK >= 5 * NN + 4 * N
297* If eigenvectors are requested (JOBZ = 'V' ) then
298* the amount of workspace required to guarantee that all
299* eigenvectors are computed is:
300* LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) +
301* ICEIL( NEIG, NPROW*NPCOL)*NN
302*
303* The computed eigenvectors may not be orthogonal if the
304* minimal workspace is supplied and ORFAC is too small.
305* If you want to guarantee orthogonality (at the cost
306* of potentially poor performance) you should add
307* the following to LRWORK:
308* (CLUSTERSIZE-1)*N
309* where CLUSTERSIZE is the number of eigenvalues in the
310* largest cluster, where a cluster is defined as a set of
311* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
312* W(J+1) <= W(J) + ORFAC*2*norm(A) }
313* Variable definitions:
314* NEIG = number of eigenvectors requested
315* NB = DESCA( MB_ ) = DESCA( NB_ ) =
316* DESCZ( MB_ ) = DESCZ( NB_ )
317* NN = MAX( N, NB, 2 )
318* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
319* DESCZ( CSRC_ ) = 0
320* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
321* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
322* ICEIL( X, Y ) is a ScaLAPACK function returning
323* ceiling(X/Y)
324*
325* When LRWORK is too small:
326* If LRWORK is too small to guarantee orthogonality,
327* PZHEEVX attempts to maintain orthogonality in
328* the clusters with the smallest
329* spacing between the eigenvalues.
330* If LRWORK is too small to compute all the eigenvectors
331* requested, no computation is performed and INFO=-25
332* is returned. Note that when RANGE='V', PZHEEVX does
333* not know how many eigenvectors are requested until
334* the eigenvalues are computed. Therefore, when RANGE='V'
335* and as long as LRWORK is large enough to allow PZHEEVX to
336* compute the eigenvalues, PZHEEVX will compute the
337* eigenvalues and as many eigenvectors as it can.
338*
339* Relationship between workspace, orthogonality & performance:
340* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
341* enough space to compute all the eigenvectors
342* orthogonally will cause serious degradation in
343* performance. In the limit (i.e. CLUSTERSIZE = N-1)
344* PZSTEIN will perform no better than ZSTEIN on 1
345* processor.
346* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
347* all eigenvectors will increase the total execution time
348* by a factor of 2 or more.
349* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
350* grow as the square of the cluster size, all other factors
351* remaining equal and assuming enough workspace. Less
352* workspace means less reorthogonalization but faster
353* execution.
354*
355* If LRWORK = -1, then LRWORK is global input and a workspace
356* query is assumed; the routine only calculates the size
357* required for optimal performance for all work arrays. Each of
358* these values is returned in the first entry of the
359* corresponding work arrays, and no error message is issued by
360* PXERBLA.
361*
362* IWORK (local workspace) INTEGER array
363* On return, IWORK(1) contains the amount of integer workspace
364* required.
365*
366* LIWORK (local input) INTEGER
367* size of IWORK
368* LIWORK >= 6 * NNP
369* Where:
370* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
371* If LIWORK = -1, then LIWORK is global input and a workspace
372* query is assumed; the routine only calculates the minimum
373* and optimal size for all work arrays. Each of these
374* values is returned in the first entry of the corresponding
375* work array, and no error message is issued by PXERBLA.
376*
377* IFAIL (global output) INTEGER array, dimension (N)
378* If JOBZ = 'V', then on normal exit, the first M elements of
379* IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
380* IFAIL contains the
381* indices of the eigenvectors that failed to converge.
382* If JOBZ = 'N', then IFAIL is not referenced.
383*
384* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
385* This array contains indices of eigenvectors corresponding to
386* a cluster of eigenvalues that could not be reorthogonalized
387* due to insufficient workspace (see LWORK, ORFAC and INFO).
388* Eigenvectors corresponding to clusters of eigenvalues indexed
389* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
390* reorthogonalized due to lack of workspace. Hence the
391* eigenvectors corresponding to these clusters may not be
392* orthogonal. ICLUSTR() is a zero terminated array.
393* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
394* K is the number of clusters
395* ICLUSTR is not referenced if JOBZ = 'N'
396*
397* GAP (global output) DOUBLE PRECISION array,
398* dimension (NPROW*NPCOL)
399* This array contains the gap between eigenvalues whose
400* eigenvectors could not be reorthogonalized. The output
401* values in this array correspond to the clusters indicated
402* by the array ICLUSTR. As a result, the dot product between
403* eigenvectors correspoding to the I^th cluster may be as high
404* as ( C * n ) / GAP(I) where C is a small constant.
405*
406* INFO (global output) INTEGER
407* = 0: successful exit
408* < 0: If the i-th argument is an array and the j-entry had
409* an illegal value, then INFO = -(i*100+j), if the i-th
410* argument is a scalar and had an illegal value, then
411* INFO = -i.
412* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
413* failed to converge. Their indices are stored
414* in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
415* Send e-mail to scalapack@cs.utk.edu
416* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
417* to one or more clusters of eigenvalues could not be
418* reorthogonalized because of insufficient workspace.
419* The indices of the clusters are stored in the array
420* ICLUSTR.
421* if (MOD(INFO/4,2).NE.0), then space limit prevented
422* PZHEEVX from computing all of the eigenvectors
423* between VL and VU. The number of eigenvectors
424* computed is returned in NZ.
425* if (MOD(INFO/8,2).NE.0), then PZSTEBZ failed to compute
426* eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
427* Send e-mail to scalapack@cs.utk.edu
428*
429* Alignment requirements
430* ======================
431*
432* The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
433* must verify some alignment properties, namely the following
434* expressions should be true:
435*
436* ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
437* IAROW.EQ.IZROW )
438* where
439* IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
440*
441* =====================================================================
442*
443* Differences between PZHEEVX and ZHEEVX
444* ======================================
445*
446* A, LDA -> A, IA, JA, DESCA
447* Z, LDZ -> Z, IZ, JZ, DESCZ
448* WORKSPACE needs are larger for PZHEEVX.
449* LIWORK parameter added
450*
451* ORFAC, ICLUSTER() and GAP() parameters added
452* meaning of INFO is changed
453*
454* Functional differences:
455* PZHEEVX does not promise orthogonality for eigenvectors associated
456* with tighly clustered eigenvalues.
457* PZHEEVX does not reorthogonalize eigenvectors
458* that are on different processes. The extent of reorthogonalization
459* is controlled by the input parameter LWORK.
460*
461* Version 1.4 limitations:
462* DESCA(MB_) = DESCA(NB_)
463* DESCA(M_) = DESCZ(M_)
464* DESCA(N_) = DESCZ(N_)
465* DESCA(MB_) = DESCZ(MB_)
466* DESCA(NB_) = DESCZ(NB_)
467* DESCA(RSRC_) = DESCZ(RSRC_)
468*
469* .. Parameters ..
470 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
471 $ MB_, NB_, RSRC_, CSRC_, LLD_
472 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
473 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
474 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
475 DOUBLE PRECISION ZERO, ONE, TEN, FIVE
476 parameter( zero = 0.0d+0, one = 1.0d+0, ten = 10.0d+0,
477 $ five = 5.0d+0 )
478 INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
479 parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
480 $ ierrebz = 8 )
481* ..
482* .. Local Scalars ..
483 LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
484 $ VALEIG, WANTZ
485 CHARACTER ORDER
486 INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
487 $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
488 $ INDRWORK, INDTAU, INDWORK, IROFFA, IROFFZ,
489 $ ISCALE, ISIZESTEBZ, ISIZESTEIN, IZROW,
490 $ LALLWORK, LIWMIN, LLRWORK, LLWORK, LRWMIN,
491 $ LRWOPT, LWMIN, LWOPT, MAXEIGS, MB_A, MQ0,
492 $ MYCOL, MYROW, NB, NB_A, NEIG, NHETRD_LWOPT, NN,
493 $ NNP, NP0, NPCOL, NPROCS, NPROW, NPS, NQ0,
494 $ NSPLIT, NZZ, OFFSET, RSRC_A, RSRC_Z, SIZEHEEVX,
495 $ SIZESTEIN, SQNPC
496 DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
497 $ SIGMA, SMLNUM, VLL, VUU
498* ..
499* .. Local Arrays ..
500 INTEGER IDUM1( 4 ), IDUM2( 4 )
501* ..
502* .. External Functions ..
503 LOGICAL LSAME
504 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
505 DOUBLE PRECISION PDLAMCH, PZLANHE
506 EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
508* ..
509* .. External Subroutines ..
510 EXTERNAL blacs_gridinfo, chk1mat, dgebr2d, dgebs2d,
511 $ dlasrt, dscal, igamn2d, pchk1mat, pchk2mat,
514* ..
515* .. Intrinsic Functions ..
516 INTRINSIC abs, dble, dcmplx, ichar, int, max, min, mod,
517 $ sqrt
518* ..
519* .. Executable Statements ..
520* This is just to keep ftnchek and toolpack/1 happy
521 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
522 $ rsrc_.LT.0 )RETURN
523*
524 quickreturn = ( n.EQ.0 )
525*
526* Test the input arguments.
527*
528 ictxt = desca( ctxt_ )
529 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
530 info = 0
531*
532 wantz = lsame( jobz, 'V' )
533 IF( nprow.EQ.-1 ) THEN
534 info = -( 800+ctxt_ )
535 ELSE IF( wantz ) THEN
536 IF( ictxt.NE.descz( ctxt_ ) ) THEN
537 info = -( 2100+ctxt_ )
538 END IF
539 END IF
540 IF( info.EQ.0 ) THEN
541 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
542 IF( wantz )
543 $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
544*
545 IF( info.EQ.0 ) THEN
546*
547* Get machine constants.
548*
549 safmin = pdlamch( ictxt, 'Safe minimum' )
550 eps = pdlamch( ictxt, 'Precision' )
551 smlnum = safmin / eps
552 bignum = one / smlnum
553 rmin = sqrt( smlnum )
554 rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
555*
556 nprocs = nprow*npcol
557 lower = lsame( uplo, 'L' )
558 alleig = lsame( range, 'A' )
559 valeig = lsame( range, 'V' )
560 indeig = lsame( range, 'I' )
561*
562* Set up pointers into the WORK array
563*
564 indtau = 1
565 indwork = indtau + n
566 llwork = lwork - indwork + 1
567*
568* Set up pointers into the RWORK array
569*
570 inde = 1
571 indd = inde + n
572 indd2 = indd + n
573 inde2 = indd2 + n
574 indrwork = inde2 + n
575 llrwork = lrwork - indrwork + 1
576*
577* Set up pointers into the IWORK array
578*
579 isizestein = 3*n + nprocs + 1
580 isizestebz = max( 4*n, 14, nprocs )
581 indibl = ( max( isizestein, isizestebz ) ) + 1
582 indisp = indibl + n
583*
584* Compute the total amount of space needed
585*
586 lquery = .false.
587 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 .OR. lrwork.EQ.-1 )
588 $ lquery = .true.
589*
590 nnp = max( n, nprocs+1, 4 )
591 liwmin = 6*nnp
592*
593 nprocs = nprow*npcol
594 nb_a = desca( nb_ )
595 mb_a = desca( mb_ )
596 nb = nb_a
597 nn = max( n, nb, 2 )
598*
599 rsrc_a = desca( rsrc_ )
600 csrc_a = desca( csrc_ )
601 iroffa = mod( ia-1, mb_a )
602 icoffa = mod( ja-1, nb_a )
603 iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
604 np0 = numroc( n+iroffa, nb, 0, 0, nprow )
605 mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
606 IF( wantz ) THEN
607 rsrc_z = descz( rsrc_ )
608 iroffz = mod( iz-1, mb_a )
609 izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
610 ELSE
611 iroffz = 0
612 izrow = 0
613 END IF
614*
615 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
616 $ THEN
617 lwmin = n + max( nb*( np0+1 ), 3 )
618 lwopt = lwmin
619 lrwmin = 5*nn + 4*n
620 IF( wantz ) THEN
621 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
622 lrwopt = 4*n + max( 5*nn, np0*mq0 ) +
623 $ iceil( n, nprow*npcol )*nn
624 ELSE
625 lrwopt = lrwmin
626 END IF
627 neig = 0
628 ELSE
629 IF( alleig .OR. valeig ) THEN
630 neig = n
631 ELSE IF( indeig ) THEN
632 neig = iu - il + 1
633 END IF
634 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
635 nq0 = numroc( nn, nb, 0, 0, npcol )
636 lwmin = n + ( np0+nq0+nb )*nb
637 lrwmin = 4*n + max( 5*nn, np0*mq0 ) +
638 $ iceil( neig, nprow*npcol )*nn
639 lrwopt = lrwmin
640 lwopt = lwmin
641*
642 END IF
643*
644* Compute how much workspace is needed to use the
645* new TRD code
646*
647 anb = pjlaenv( ictxt, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
648 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
649 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
650 nhetrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+2 )*nps
651 lwopt = max( lwopt, n+nhetrd_lwopt )
652*
653 END IF
654 IF( info.EQ.0 ) THEN
655 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
656 rwork( 1 ) = abstol
657 IF( valeig ) THEN
658 rwork( 2 ) = vl
659 rwork( 3 ) = vu
660 ELSE
661 rwork( 2 ) = zero
662 rwork( 3 ) = zero
663 END IF
664 CALL dgebs2d( ictxt, 'ALL', ' ', 3, 1, rwork, 3 )
665 ELSE
666 CALL dgebr2d( ictxt, 'ALL', ' ', 3, 1, rwork, 3, 0, 0 )
667 END IF
668 IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
669 info = -1
670 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
671 info = -2
672 ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
673 info = -3
674 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
675 info = -10
676 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
677 $ THEN
678 info = -11
679 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
680 $ THEN
681 info = -12
682 ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
683 info = -23
684 ELSE IF( lrwork.LT.lrwmin .AND. lrwork.NE.-1 ) THEN
685 info = -25
686 ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
687 info = -27
688 ELSE IF( valeig .AND. ( abs( rwork( 2 )-vl ).GT.five*eps*
689 $ abs( vl ) ) ) THEN
690 info = -9
691 ELSE IF( valeig .AND. ( abs( rwork( 3 )-vu ).GT.five*eps*
692 $ abs( vu ) ) ) THEN
693 info = -10
694 ELSE IF( abs( rwork( 1 )-abstol ).GT.five*eps*
695 $ abs( abstol ) ) THEN
696 info = -13
697 ELSE IF( iroffa.NE.0 ) THEN
698 info = -6
699 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
700 info = -( 800+nb_ )
701 END IF
702 IF( wantz ) THEN
703 IF( iroffa.NE.iroffz ) THEN
704 info = -19
705 ELSE IF( iarow.NE.izrow ) THEN
706 info = -19
707 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
708 info = -( 2100+m_ )
709 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
710 info = -( 2100+n_ )
711 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
712 info = -( 2100+mb_ )
713 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
714 info = -( 2100+nb_ )
715 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
716 info = -( 2100+rsrc_ )
717 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
718 info = -( 2100+csrc_ )
719 ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
720 info = -( 2100+ctxt_ )
721 END IF
722 END IF
723 END IF
724 IF( wantz ) THEN
725 idum1( 1 ) = ichar( 'V' )
726 ELSE
727 idum1( 1 ) = ichar( 'N' )
728 END IF
729 idum2( 1 ) = 1
730 IF( lower ) THEN
731 idum1( 2 ) = ichar( 'L' )
732 ELSE
733 idum1( 2 ) = ichar( 'U' )
734 END IF
735 idum2( 2 ) = 2
736 IF( alleig ) THEN
737 idum1( 3 ) = ichar( 'A' )
738 ELSE IF( indeig ) THEN
739 idum1( 3 ) = ichar( 'I' )
740 ELSE
741 idum1( 3 ) = ichar( 'V' )
742 END IF
743 idum2( 3 ) = 3
744 IF( lquery ) THEN
745 idum1( 4 ) = -1
746 ELSE
747 idum1( 4 ) = 1
748 END IF
749 idum2( 4 ) = 4
750 IF( wantz ) THEN
751 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
752 $ jz, descz, 21, 4, idum1, idum2, info )
753 ELSE
754 CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
755 $ idum2, info )
756 END IF
757 work( 1 ) = dcmplx( lwopt )
758 rwork( 1 ) = dble( lrwopt )
759 iwork( 1 ) = liwmin
760 END IF
761*
762 IF( info.NE.0 ) THEN
763 CALL pxerbla( ictxt, 'PZHEEVX', -info )
764 RETURN
765 ELSE IF( lquery ) THEN
766 RETURN
767 END IF
768*
769* Quick return if possible
770*
771 IF( quickreturn ) THEN
772 IF( wantz ) THEN
773 nz = 0
774 iclustr( 1 ) = 0
775 END IF
776 m = 0
777 work( 1 ) = dcmplx( lwopt )
778 rwork( 1 ) = dble( lrwmin )
779 iwork( 1 ) = liwmin
780 RETURN
781 END IF
782*
783* Scale matrix to allowable range, if necessary.
784*
785 abstll = abstol
786 iscale = 0
787 IF( valeig ) THEN
788 vll = vl
789 vuu = vu
790 ELSE
791 vll = zero
792 vuu = zero
793 END IF
794*
795 anrm = pzlanhe( 'M', uplo, n, a, ia, ja, desca,
796 $ rwork( indrwork ) )
797*
798 IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
799 iscale = 1
800 sigma = rmin / anrm
801 anrm = anrm*sigma
802 ELSE IF( anrm.GT.rmax ) THEN
803 iscale = 1
804 sigma = rmax / anrm
805 anrm = anrm*sigma
806 END IF
807*
808 IF( iscale.EQ.1 ) THEN
809 CALL pzlascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
810 IF( abstol.GT.0 )
811 $ abstll = abstol*sigma
812 IF( valeig ) THEN
813 vll = vl*sigma
814 vuu = vu*sigma
815 IF( vuu.EQ.vll ) THEN
816 vuu = vuu + 2*max( abs( vuu )*eps, safmin )
817 END IF
818 END IF
819 END IF
820*
821* Call PZHENTRD to reduce Hermitian matrix to tridiagonal form.
822*
823 lallwork = llrwork
824*
825 CALL pzhentrd( uplo, n, a, ia, ja, desca, rwork( indd ),
826 $ rwork( inde ), work( indtau ), work( indwork ),
827 $ llwork, rwork( indrwork ), llrwork, iinfo )
828*
829*
830* Copy the values of D, E to all processes
831*
832* Here PxLARED1D is used to redistribute the tridiagonal matrix.
833* PxLARED1D, however, doesn't yet work with arbritary matrix
834* distributions so we have PxELGET as a backup.
835*
836 offset = 0
837 IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
838 $ THEN
839 CALL pdlared1d( n, ia, ja, desca, rwork( indd ),
840 $ rwork( indd2 ), rwork( indrwork ), llrwork )
841*
842 CALL pdlared1d( n, ia, ja, desca, rwork( inde ),
843 $ rwork( inde2 ), rwork( indrwork ), llrwork )
844 IF( .NOT.lower )
845 $ offset = 1
846 ELSE
847 DO 10 i = 1, n
848 CALL pzelget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
849 $ i+ja-1, desca )
850 rwork( indd2+i-1 ) = dble( work( indd2+i-1 ) )
851 10 CONTINUE
852 IF( lsame( uplo, 'U' ) ) THEN
853 DO 20 i = 1, n - 1
854 CALL pzelget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
855 $ i+ja, desca )
856 rwork( inde2+i-1 ) = dble( work( inde2+i-1 ) )
857 20 CONTINUE
858 ELSE
859 DO 30 i = 1, n - 1
860 CALL pzelget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
861 $ i+ja-1, desca )
862 rwork( inde2+i-1 ) = dble( work( inde2+i-1 ) )
863 30 CONTINUE
864 END IF
865 END IF
866*
867* Call PDSTEBZ and, if eigenvectors are desired, PZSTEIN.
868*
869 IF( wantz ) THEN
870 order = 'B'
871 ELSE
872 order = 'E'
873 END IF
874*
875 CALL pdstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
876 $ rwork( indd2 ), rwork( inde2+offset ), m, nsplit, w,
877 $ iwork( indibl ), iwork( indisp ), rwork( indrwork ),
878 $ llrwork, iwork( 1 ), isizestebz, iinfo )
879*
880*
881* IF PDSTEBZ fails, the error propogates to INFO, but
882* we do not propogate the eigenvalue(s) which failed because:
883* 1) This should never happen if the user specifies
884* ABSTOL = 2 * PDLAMCH( 'U' )
885* 2) PDSTEIN will confirm/deny whether the eigenvalues are
886* close enough.
887*
888 IF( iinfo.NE.0 ) THEN
889 info = info + ierrebz
890 DO 40 i = 1, m
891 iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
892 40 CONTINUE
893 END IF
894 IF( wantz ) THEN
895*
896 IF( valeig ) THEN
897*
898* Compute the maximum number of eigenvalues that we can
899* compute in the
900* workspace that we have, and that we can store in Z.
901*
902* Loop through the possibilities looking for the largest
903* NZ that we can feed to PZSTEIN and PZUNMTR
904*
905* Since all processes must end up with the same value
906* of NZ, we first compute the minimum of LALLWORK
907*
908 CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
909 $ -1, -1 )
910*
911 maxeigs = descz( n_ )
912*
913 DO 50 nz = min( maxeigs, m ), 0, -1
914 mq0 = numroc( nz, nb, 0, 0, npcol )
915 sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
916 sizeheevx = sizestein
917 IF( sizeheevx.LE.lallwork )
918 $ GO TO 60
919 50 CONTINUE
920 60 CONTINUE
921 ELSE
922 nz = m
923 END IF
924 nz = max( nz, 0 )
925 IF( nz.NE.m ) THEN
926 info = info + ierrspc
927*
928 DO 70 i = 1, m
929 ifail( i ) = 0
930 70 CONTINUE
931*
932* The following code handles a rare special case
933* - NZ .NE. M means that we don't have enough room to store
934* all the vectors.
935* - NSPLIT .GT. 1 means that the matrix split
936* In this case, we cannot simply take the first NZ eigenvalues
937* because PDSTEBZ sorts the eigenvalues by block when
938* a split occurs. So, we have to make another call to
939* PDSTEBZ with a new upper limit - VUU.
940*
941 IF( nsplit.GT.1 ) THEN
942 CALL dlasrt( 'I', m, w, iinfo )
943 nzz = 0
944 IF( nz.GT.0 ) THEN
945*
946 vuu = w( nz ) - ten*( eps*anrm+safmin )
947 IF( vll.GE.vuu ) THEN
948 nzz = 0
949 ELSE
950 CALL pdstebz( ictxt, range, order, n, vll, vuu, il,
951 $ iu, abstll, rwork( indd2 ),
952 $ rwork( inde2+offset ), nzz, nsplit,
953 $ w, iwork( indibl ), iwork( indisp ),
954 $ rwork( indrwork ), llrwork,
955 $ iwork( 1 ), isizestebz, iinfo )
956 END IF
957*
958 IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
959 IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
960 info = info + ierrebz
961 END IF
962 END IF
963 END IF
964 nz = min( nz, nzz )
965*
966 END IF
967 END IF
968 CALL pzstein( n, rwork( indd2 ), rwork( inde2+offset ), nz, w,
969 $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
970 $ jz, descz, rwork( indrwork ), lallwork,
971 $ iwork( 1 ), isizestein, ifail, iclustr, gap,
972 $ iinfo )
973*
974 IF( iinfo.GE.nz+1 )
975 $ info = info + ierrcls
976 IF( mod( iinfo, nz+1 ).NE.0 )
977 $ info = info + ierrein
978*
979* Z = Q * Z
980*
981*
982 IF( nz.GT.0 ) THEN
983 CALL pzunmtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
984 $ work( indtau ), z, iz, jz, descz,
985 $ work( indwork ), llwork, iinfo )
986 END IF
987*
988 END IF
989*
990* If matrix was scaled, then rescale eigenvalues appropriately.
991*
992 IF( iscale.EQ.1 ) THEN
993 CALL dscal( m, one / sigma, w, 1 )
994 END IF
995*
996 work( 1 ) = dcmplx( lwopt )
997 rwork( 1 ) = dble( lrwopt )
998 iwork( 1 ) = liwmin
999*
1000 RETURN
1001*
1002* End of PZHEEVX
1003*
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine pdlared1d(n, ia, ja, desc, bycol, byall, work, lwork)
Definition pdlared1d.f:2
subroutine pdstebz(ictxt, range, order, n, vl, vu, il, iu, abstol, d, e, m, nsplit, w, iblock, isplit, work, lwork, iwork, liwork, info)
Definition pdstebz.f:4
integer function pjlaenv(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pjlaenv.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzelget(scope, top, alpha, a, ia, ja, desca)
Definition pzelget.f:2
subroutine pzhentrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, rwork, lrwork, info)
Definition pzhentrd.f:3
double precision function pzlanhe(norm, uplo, n, a, ia, ja, desca, work)
Definition pzlanhe.f:3
subroutine pzlascl(type, cfrom, cto, m, n, a, ia, ja, desca, info)
Definition pzlascl.f:3
subroutine pzstein(n, d, e, m, w, iblock, isplit, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pzstein.f:4
subroutine pzunmtr(side, uplo, trans, m, n, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition pzunmtr.f:3
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: