SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pdsygvx()

subroutine pdsygvx ( integer  ibtype,
character  jobz,
character  range,
character  uplo,
integer  n,
double precision, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
double precision, dimension( * )  b,
integer  ib,
integer  jb,
integer, dimension( * )  descb,
double precision  vl,
double precision  vu,
integer  il,
integer  iu,
double precision  abstol,
integer  m,
integer  nz,
double precision, dimension( * )  w,
double precision  orfac,
double precision, dimension( * )  z,
integer  iz,
integer  jz,
integer, dimension( * )  descz,
double precision, dimension( * )  work,
integer  lwork,
integer, dimension( * )  iwork,
integer  liwork,
integer, dimension( * )  ifail,
integer, dimension( * )  iclustr,
double precision, dimension( * )  gap,
integer  info 
)

Definition at line 1 of file pdsygvx.f.

6*
7* -- ScaLAPACK routine (version 1.7) --
8* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9* and University of California, Berkeley.
10* October 15, 1999
11*
12* .. Scalar Arguments ..
13 CHARACTER JOBZ, RANGE, UPLO
14 INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15 $ LIWORK, LWORK, M, N, NZ
16 DOUBLE PRECISION ABSTOL, ORFAC, VL, VU
17* ..
18* .. Array Arguments ..
19*
20 INTEGER DESCA( * ), DESCB( * ), DESCZ( * ),
21 $ ICLUSTR( * ), IFAIL( * ), IWORK( * )
22 DOUBLE PRECISION A( * ), B( * ), GAP( * ), W( * ), WORK( * ),
23 $ Z( * )
24* ..
25*
26* Purpose
27*
28* =======
29*
30* PDSYGVX computes all the eigenvalues, and optionally,
31* the eigenvectors
32* of a real generalized SY-definite eigenproblem, of the form
33* sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or
34* sub( B )*sub( A )*x=(lambda)*x.
35* Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36* SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37* to be symmetric positive definite.
38*
39* Notes
40* =====
41*
42*
43* Each global data object is described by an associated description
44* vector. This vector stores the information required to establish
45* the mapping between an object element and its corresponding process
46* and memory location.
47*
48* Let A be a generic term for any 2D block cyclicly distributed array.
49* Such a global array has an associated description vector DESCA.
50* In the following comments, the character _ should be read as
51* "of the global array".
52*
53* NOTATION STORED IN EXPLANATION
54* --------------- -------------- --------------------------------------
55* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
56* DTYPE_A = 1.
57* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58* the BLACS process grid A is distribu-
59* ted over. The context itself is glo-
60* bal, but the handle (the integer
61* value) may vary.
62* M_A (global) DESCA( M_ ) The number of rows in the global
63* array A.
64* N_A (global) DESCA( N_ ) The number of columns in the global
65* array A.
66* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
67* the rows of the array.
68* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
69* the columns of the array.
70* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71* row of the array A is distributed.
72* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73* first column of the array A is
74* distributed.
75* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
76* array. LLD_A >= MAX(1,LOCr(M_A)).
77*
78* Let K be the number of rows or columns of a distributed matrix,
79* and assume that its process grid has dimension p x q.
80* LOCr( K ) denotes the number of elements of K that a process
81* would receive if K were distributed over the p processes of its
82* process column.
83* Similarly, LOCc( K ) denotes the number of elements of K that a
84* process would receive if K were distributed over the q processes of
85* its process row.
86* The values of LOCr() and LOCc() may be determined via a call to the
87* ScaLAPACK tool function, NUMROC:
88* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90* An upper bound for these quantities may be computed by:
91* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93*
94*
95* Arguments
96* =========
97*
98* IBTYPE (global input) INTEGER
99* Specifies the problem type to be solved:
100* = 1: sub( A )*x = (lambda)*sub( B )*x
101* = 2: sub( A )*sub( B )*x = (lambda)*x
102* = 3: sub( B )*sub( A )*x = (lambda)*x
103*
104* JOBZ (global input) CHARACTER*1
105* = 'N': Compute eigenvalues only;
106* = 'V': Compute eigenvalues and eigenvectors.
107*
108* RANGE (global input) CHARACTER*1
109* = 'A': all eigenvalues will be found.
110* = 'V': all eigenvalues in the interval [VL,VU] will be found.
111* = 'I': the IL-th through IU-th eigenvalues will be found.
112*
113* UPLO (global input) CHARACTER*1
114* = 'U': Upper triangles of sub( A ) and sub( B ) are stored;
115* = 'L': Lower triangles of sub( A ) and sub( B ) are stored.
116*
117* N (global input) INTEGER
118* The order of the matrices sub( A ) and sub( B ). N >= 0.
119*
120* A (local input/local output) DOUBLE PRECISION pointer into the
121* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122* On entry, this array contains the local pieces of the
123* N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U',
124* the leading N-by-N upper triangular part of sub( A ) contains
125* the upper triangular part of the matrix. If UPLO = 'L', the
126* leading N-by-N lower triangular part of sub( A ) contains
127* the lower triangular part of the matrix.
128*
129* On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130* the distributed matrix Z of eigenvectors. The eigenvectors
131* are normalized as follows:
132* if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I;
133* if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I.
134* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135* or the lower triangle (if UPLO='L') of sub( A ), including
136* the diagonal, is destroyed.
137*
138* IA (global input) INTEGER
139* The row index in the global array A indicating the first
140* row of sub( A ).
141*
142* JA (global input) INTEGER
143* The column index in the global array A indicating the
144* first column of sub( A ).
145*
146* DESCA (global and local input) INTEGER array of dimension DLEN_.
147* The array descriptor for the distributed matrix A.
148* If DESCA( CTXT_ ) is incorrect, PDSYGVX cannot guarantee
149* correct error reporting.
150*
151* B (local input/local output) DOUBLE PRECISION pointer into the
152* local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153* On entry, this array contains the local pieces of the
154* N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U',
155* the leading N-by-N upper triangular part of sub( B ) contains
156* the upper triangular part of the matrix. If UPLO = 'L', the
157* leading N-by-N lower triangular part of sub( B ) contains
158* the lower triangular part of the matrix.
159*
160* On exit, if INFO <= N, the part of sub( B ) containing the
161* matrix is overwritten by the triangular factor U or L from
162* the Cholesky factorization sub( B ) = U**T*U or
163* sub( B ) = L*L**T.
164*
165* IB (global input) INTEGER
166* The row index in the global array B indicating the first
167* row of sub( B ).
168*
169* JB (global input) INTEGER
170* The column index in the global array B indicating the
171* first column of sub( B ).
172*
173* DESCB (global and local input) INTEGER array of dimension DLEN_.
174* The array descriptor for the distributed matrix B.
175* DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176*
177* VL (global input) DOUBLE PRECISION
178* If RANGE='V', the lower bound of the interval to be searched
179* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
180*
181* VU (global input) DOUBLE PRECISION
182* If RANGE='V', the upper bound of the interval to be searched
183* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
184*
185* IL (global input) INTEGER
186* If RANGE='I', the index (from smallest to largest) of the
187* smallest eigenvalue to be returned. IL >= 1.
188* Not referenced if RANGE = 'A' or 'V'.
189*
190* IU (global input) INTEGER
191* If RANGE='I', the index (from smallest to largest) of the
192* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
193* Not referenced if RANGE = 'A' or 'V'.
194*
195* ABSTOL (global input) DOUBLE PRECISION
196* If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
197* the most orthogonal eigenvectors.
198*
199* The absolute error tolerance for the eigenvalues.
200* An approximate eigenvalue is accepted as converged
201* when it is determined to lie in an interval [a,b]
202* of width less than or equal to
203*
204* ABSTOL + EPS * max( |a|,|b| ) ,
205*
206* where EPS is the machine precision. If ABSTOL is less than
207* or equal to zero, then EPS*norm(T) will be used in its place,
208* where norm(T) is the 1-norm of the tridiagonal matrix
209* obtained by reducing A to tridiagonal form.
210*
211* Eigenvalues will be computed most accurately when ABSTOL is
212* set to twice the underflow threshold 2*PDLAMCH('S') not zero.
213* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215* eigenvectors did not converge, try setting ABSTOL to
216* 2*PDLAMCH('S').
217*
218* See "Computing Small Singular Values of Bidiagonal Matrices
219* with Guaranteed High Relative Accuracy," by Demmel and
220* Kahan, LAPACK Working Note #3.
221*
222* See "On the correctness of Parallel Bisection in Floating
223* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224*
225* M (global output) INTEGER
226* Total number of eigenvalues found. 0 <= M <= N.
227*
228* NZ (global output) INTEGER
229* Total number of eigenvectors computed. 0 <= NZ <= M.
230* The number of columns of Z that are filled.
231* If JOBZ .NE. 'V', NZ is not referenced.
232* If JOBZ .EQ. 'V', NZ = M unless the user supplies
233* insufficient space and PDSYGVX is not able to detect this
234* before beginning computation. To get all the eigenvectors
235* requested, the user must supply both sufficient
236* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237* and sufficient workspace to compute them. (See LWORK below.)
238* PDSYGVX is always able to detect insufficient space without
239* computation unless RANGE .EQ. 'V'.
240*
241* W (global output) DOUBLE PRECISION array, dimension (N)
242* On normal exit, the first M entries contain the selected
243* eigenvalues in ascending order.
244*
245* ORFAC (global input) DOUBLE PRECISION
246* Specifies which eigenvectors should be reorthogonalized.
247* Eigenvectors that correspond to eigenvalues which are within
248* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249* However, if the workspace is insufficient (see LWORK),
250* tol may be decreased until all eigenvectors to be
251* reorthogonalized can be stored in one process.
252* No reorthogonalization will be done if ORFAC equals zero.
253* A default value of 10^-3 is used if ORFAC is negative.
254* ORFAC should be identical on all processes.
255*
256* Z (local output) DOUBLE PRECISION array,
257* global dimension (N, N),
258* local dimension ( LLD_Z, LOCc(JZ+N-1) )
259* If JOBZ = 'V', then on normal exit the first M columns of Z
260* contain the orthonormal eigenvectors of the matrix
261* corresponding to the selected eigenvalues. If an eigenvector
262* fails to converge, then that column of Z contains the latest
263* approximation to the eigenvector, and the index of the
264* eigenvector is returned in IFAIL.
265* If JOBZ = 'N', then Z is not referenced.
266*
267* IZ (global input) INTEGER
268* The row index in the global array Z indicating the first
269* row of sub( Z ).
270*
271* JZ (global input) INTEGER
272* The column index in the global array Z indicating the
273* first column of sub( Z ).
274*
275* DESCZ (global and local input) INTEGER array of dimension DLEN_.
276* The array descriptor for the distributed matrix Z.
277* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278*
279* WORK (local workspace/output) DOUBLE PRECISION array,
280* dimension max(3,LWORK)
281* if JOBZ='N' WORK(1) = optimal amount of workspace
282* required to compute eigenvalues efficiently
283* if JOBZ='V' WORK(1) = optimal amount of workspace
284* required to compute eigenvalues and eigenvectors
285* efficiently with no guarantee on orthogonality.
286* If RANGE='V', it is assumed that all eigenvectors
287* may be required.
288*
289* LWORK (local input) INTEGER
290* See below for definitions of variables used to define LWORK.
291* If no eigenvectors are requested (JOBZ = 'N') then
292* LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
293* If eigenvectors are requested (JOBZ = 'V' ) then
294* the amount of workspace required to guarantee that all
295* eigenvectors are computed is:
296* LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
297* ICEIL( NEIG, NPROW*NPCOL)*NN
298*
299* The computed eigenvectors may not be orthogonal if the
300* minimal workspace is supplied and ORFAC is too small.
301* If you want to guarantee orthogonality (at the cost
302* of potentially poor performance) you should add
303* the following to LWORK:
304* (CLUSTERSIZE-1)*N
305* where CLUSTERSIZE is the number of eigenvalues in the
306* largest cluster, where a cluster is defined as a set of
307* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
308* W(J+1) <= W(J) + ORFAC*2*norm(A) }
309* Variable definitions:
310* NEIG = number of eigenvectors requested
311* NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
312* DESCZ( NB_ )
313* NN = MAX( N, NB, 2 )
314* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
315* DESCZ( CSRC_ ) = 0
316* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
317* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
318* ICEIL( X, Y ) is a ScaLAPACK function returning
319* ceiling(X/Y)
320*
321* When LWORK is too small:
322* If LWORK is too small to guarantee orthogonality,
323* PDSYGVX attempts to maintain orthogonality in
324* the clusters with the smallest
325* spacing between the eigenvalues.
326* If LWORK is too small to compute all the eigenvectors
327* requested, no computation is performed and INFO=-23
328* is returned. Note that when RANGE='V', PDSYGVX does
329* not know how many eigenvectors are requested until
330* the eigenvalues are computed. Therefore, when RANGE='V'
331* and as long as LWORK is large enough to allow PDSYGVX to
332* compute the eigenvalues, PDSYGVX will compute the
333* eigenvalues and as many eigenvectors as it can.
334*
335* Relationship between workspace, orthogonality & performance:
336* Greater performance can be achieved if adequate workspace
337* is provided. On the other hand, in some situations,
338* performance can decrease as the workspace provided
339* increases above the workspace amount shown below:
340*
341* For optimal performance, greater workspace may be
342* needed, i.e.
343* LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT,
344* NSYGST_LWOPT )
345* Where:
346* LWORK, as defined previously, depends upon the number
347* of eigenvectors requested, and
348* NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
349* ( NPS + 3 ) * NPS
350* NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
351*
352* ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L',
353* 0, 0, 0, 0)
354* SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
355* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
356* NB = DESCA( MB_ )
357* NP0 = NUMROC( N, NB, 0, 0, NPROW )
358* NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
359*
360* NUMROC is a ScaLAPACK tool functions;
361* PJLAENV is a ScaLAPACK envionmental inquiry function
362* MYROW, MYCOL, NPROW and NPCOL can be determined by
363* calling the subroutine BLACS_GRIDINFO.
364*
365* For large N, no extra workspace is needed, however the
366* biggest boost in performance comes for small N, so it
367* is wise to provide the extra workspace (typically less
368* than a Megabyte per process).
369*
370* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
371* enough space to compute all the eigenvectors
372* orthogonally will cause serious degradation in
373* performance. In the limit (i.e. CLUSTERSIZE = N-1)
374* PDSTEIN will perform no better than DSTEIN on 1 processor.
375* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
376* all eigenvectors will increase the total execution time
377* by a factor of 2 or more.
378* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
379* grow as the square of the cluster size, all other factors
380* remaining equal and assuming enough workspace. Less
381* workspace means less reorthogonalization but faster
382* execution.
383*
384* If LWORK = -1, then LWORK is global input and a workspace
385* query is assumed; the routine only calculates the size
386* required for optimal performance on all work arrays.
387* Each of these values is returned in the first entry of the
388* corresponding work array, and no error message is issued by
389* PXERBLA.
390*
391*
392* IWORK (local workspace) INTEGER array
393* On return, IWORK(1) contains the amount of integer workspace
394* required.
395*
396* LIWORK (local input) INTEGER
397* size of IWORK
398* LIWORK >= 6 * NNP
399* Where:
400* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
401*
402* If LIWORK = -1, then LIWORK is global input and a workspace
403* query is assumed; the routine only calculates the minimum
404* and optimal size for all work arrays. Each of these
405* values is returned in the first entry of the corresponding
406* work array, and no error message is issued by PXERBLA.
407*
408* IFAIL (output) INTEGER array, dimension (N)
409* IFAIL provides additional information when INFO .NE. 0
410* If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
411* the smallest minor which is not positive definite.
412* If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
413* indices of the eigenvectors that failed to converge.
414*
415* If neither of the above error conditions hold and JOBZ = 'V',
416* then the first M elements of IFAIL are set to zero.
417*
418* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
419* This array contains indices of eigenvectors corresponding to
420* a cluster of eigenvalues that could not be reorthogonalized
421* due to insufficient workspace (see LWORK, ORFAC and INFO).
422* Eigenvectors corresponding to clusters of eigenvalues indexed
423* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
424* reorthogonalized due to lack of workspace. Hence the
425* eigenvectors corresponding to these clusters may not be
426* orthogonal. ICLUSTR() is a zero terminated array.
427* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
428* K is the number of clusters
429* ICLUSTR is not referenced if JOBZ = 'N'
430*
431* GAP (global output) DOUBLE PRECISION array,
432* dimension (NPROW*NPCOL)
433* This array contains the gap between eigenvalues whose
434* eigenvectors could not be reorthogonalized. The output
435* values in this array correspond to the clusters indicated
436* by the array ICLUSTR. As a result, the dot product between
437* eigenvectors correspoding to the I^th cluster may be as high
438* as ( C * n ) / GAP(I) where C is a small constant.
439*
440* INFO (global output) INTEGER
441* = 0: successful exit
442* < 0: If the i-th argument is an array and the j-entry had
443* an illegal value, then INFO = -(i*100+j), if the i-th
444* argument is a scalar and had an illegal value, then
445* INFO = -i.
446* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
447* failed to converge. Their indices are stored
448* in IFAIL. Send e-mail to scalapack@cs.utk.edu
449* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
450* to one or more clusters of eigenvalues could not be
451* reorthogonalized because of insufficient workspace.
452* The indices of the clusters are stored in the array
453* ICLUSTR.
454* if (MOD(INFO/4,2).NE.0), then space limit prevented
455* PDSYGVX from computing all of the eigenvectors
456* between VL and VU. The number of eigenvectors
457* computed is returned in NZ.
458* if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to
459* compute eigenvalues.
460* Send e-mail to scalapack@cs.utk.edu
461* if (MOD(INFO/16,2).NE.0), then B was not positive
462* definite. IFAIL(1) indicates the order of
463* the smallest minor which is not positive definite.
464*
465* Alignment requirements
466* ======================
467*
468* The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
469* and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
470* namely the following expressions should be true:
471*
472* DESCA(MB_) = DESCA(NB_)
473* IA = IB = IZ
474* JA = IB = JZ
475* DESCA(M_) = DESCB(M_) =DESCZ(M_)
476* DESCA(N_) = DESCB(N_)= DESCZ(N_)
477* DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
478* DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
479* DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
480* DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
481* MOD( IA-1, DESCA( MB_ ) ) = 0
482* MOD( JA-1, DESCA( NB_ ) ) = 0
483* MOD( IB-1, DESCB( MB_ ) ) = 0
484* MOD( JB-1, DESCB( NB_ ) ) = 0
485*
486* =====================================================================
487*
488* .. Parameters ..
489 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
490 $ MB_, NB_, RSRC_, CSRC_, LLD_
491 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
492 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
493 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
494 DOUBLE PRECISION ONE
495 parameter( one = 1.0d+0 )
496 DOUBLE PRECISION FIVE, ZERO
497 parameter( five = 5.0d+0, zero = 0.0d+0 )
498 INTEGER IERRNPD
499 parameter( ierrnpd = 16 )
500* ..
501* .. Local Scalars ..
502 LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
503 CHARACTER TRANS
504 INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
505 $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LWMIN,
506 $ LWOPT, MQ0, MYCOL, MYROW, NB, NEIG, NN, NP0,
507 $ NPCOL, NPROW, NPS, NQ0, NSYGST_LWOPT,
508 $ NSYTRD_LWOPT, SQNPC
509 DOUBLE PRECISION EPS, SCALE
510* ..
511* .. Local Arrays ..
512 INTEGER IDUM1( 5 ), IDUM2( 5 )
513* ..
514* .. External Functions ..
515 LOGICAL LSAME
516 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
517 DOUBLE PRECISION PDLAMCH
519* ..
520* .. External Subroutines ..
521 EXTERNAL blacs_gridinfo, chk1mat, dgebr2d, dgebs2d,
522 $ dscal, pchk1mat, pchk2mat, pdpotrf, pdsyevx,
523 $ pdsyngst, pdtrmm, pdtrsm, pxerbla
524* ..
525* .. Intrinsic Functions ..
526 INTRINSIC abs, dble, ichar, int, max, min, mod, sqrt
527* ..
528* .. Executable Statements ..
529* This is just to keep ftnchek and toolpack/1 happy
530 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
531 $ rsrc_.LT.0 )RETURN
532*
533* Get grid parameters
534*
535 ictxt = desca( ctxt_ )
536 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
537*
538* Test the input parameters
539*
540 info = 0
541 IF( nprow.EQ.-1 ) THEN
542 info = -( 900+ctxt_ )
543 ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
544 info = -( 1300+ctxt_ )
545 ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
546 info = -( 2600+ctxt_ )
547 ELSE
548*
549* Get machine constants.
550*
551 eps = pdlamch( desca( ctxt_ ), 'Precision' )
552*
553 wantz = lsame( jobz, 'V' )
554 upper = lsame( uplo, 'U' )
555 alleig = lsame( range, 'A' )
556 valeig = lsame( range, 'V' )
557 indeig = lsame( range, 'I' )
558 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 9, info )
559 CALL chk1mat( n, 4, n, 4, ib, jb, descb, 13, info )
560 CALL chk1mat( n, 4, n, 4, iz, jz, descz, 26, info )
561 IF( info.EQ.0 ) THEN
562 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
563 work( 1 ) = abstol
564 IF( valeig ) THEN
565 work( 2 ) = vl
566 work( 3 ) = vu
567 ELSE
568 work( 2 ) = zero
569 work( 3 ) = zero
570 END IF
571 CALL dgebs2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3 )
572 ELSE
573 CALL dgebr2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3,
574 $ 0, 0 )
575 END IF
576 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
577 $ nprow )
578 ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
579 $ nprow )
580 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
581 $ npcol )
582 ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
583 $ npcol )
584 iroffa = mod( ia-1, desca( mb_ ) )
585 icoffa = mod( ja-1, desca( nb_ ) )
586 iroffb = mod( ib-1, descb( mb_ ) )
587 icoffb = mod( jb-1, descb( nb_ ) )
588*
589* Compute the total amount of space needed
590*
591 lquery = .false.
592 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
593 $ lquery = .true.
594*
595 liwmin = 6*max( n, ( nprow*npcol )+1, 4 )
596*
597 nb = desca( mb_ )
598 nn = max( n, nb, 2 )
599 np0 = numroc( nn, nb, 0, 0, nprow )
600*
601 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
602 $ THEN
603 lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
604 IF( wantz ) THEN
605 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
606 lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
607 ELSE
608 lwopt = lwmin
609 END IF
610 neig = 0
611 ELSE
612 IF( alleig .OR. valeig ) THEN
613 neig = n
614 ELSE IF( indeig ) THEN
615 neig = iu - il + 1
616 END IF
617 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
618 lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
619 $ iceil( neig, nprow*npcol )*nn
620 lwopt = lwmin
621*
622 END IF
623*
624* Compute how much workspace is needed to use the
625* new TRD and GST algorithms
626*
627 anb = pjlaenv( ictxt, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
628 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
629 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
630 nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
631 nb = desca( mb_ )
632 np0 = numroc( n, nb, 0, 0, nprow )
633 nq0 = numroc( n, nb, 0, 0, npcol )
634 nsygst_lwopt = 2*np0*nb + nq0*nb + nb*nb
635 lwopt = max( lwopt, n+nsytrd_lwopt, nsygst_lwopt )
636*
637* Version 1.0 Limitations
638*
639 IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
640 info = -1
641 ELSE IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
642 info = -2
643 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
644 info = -3
645 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
646 info = -4
647 ELSE IF( n.LT.0 ) THEN
648 info = -5
649 ELSE IF( iroffa.NE.0 ) THEN
650 info = -7
651 ELSE IF( icoffa.NE.0 ) THEN
652 info = -8
653 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
654 info = -( 900+nb_ )
655 ELSE IF( desca( m_ ).NE.descb( m_ ) ) THEN
656 info = -( 1300+m_ )
657 ELSE IF( desca( n_ ).NE.descb( n_ ) ) THEN
658 info = -( 1300+n_ )
659 ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
660 info = -( 1300+mb_ )
661 ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
662 info = -( 1300+nb_ )
663 ELSE IF( desca( rsrc_ ).NE.descb( rsrc_ ) ) THEN
664 info = -( 1300+rsrc_ )
665 ELSE IF( desca( csrc_ ).NE.descb( csrc_ ) ) THEN
666 info = -( 1300+csrc_ )
667 ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
668 info = -( 1300+ctxt_ )
669 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
670 info = -( 2200+m_ )
671 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
672 info = -( 2200+n_ )
673 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
674 info = -( 2200+mb_ )
675 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
676 info = -( 2200+nb_ )
677 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
678 info = -( 2200+rsrc_ )
679 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
680 info = -( 2200+csrc_ )
681 ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
682 info = -( 2200+ctxt_ )
683 ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
684 info = -11
685 ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
686 info = -12
687 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
688 info = -15
689 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
690 $ THEN
691 info = -16
692 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
693 $ THEN
694 info = -17
695 ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
696 $ abs( vl ) ) ) THEN
697 info = -14
698 ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
699 $ abs( vu ) ) ) THEN
700 info = -15
701 ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
702 $ THEN
703 info = -18
704 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
705 info = -28
706 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
707 info = -30
708 END IF
709 END IF
710 idum1( 1 ) = ibtype
711 idum2( 1 ) = 1
712 IF( wantz ) THEN
713 idum1( 2 ) = ichar( 'V' )
714 ELSE
715 idum1( 2 ) = ichar( 'N' )
716 END IF
717 idum2( 2 ) = 2
718 IF( upper ) THEN
719 idum1( 3 ) = ichar( 'U' )
720 ELSE
721 idum1( 3 ) = ichar( 'L' )
722 END IF
723 idum2( 3 ) = 3
724 IF( alleig ) THEN
725 idum1( 4 ) = ichar( 'A' )
726 ELSE IF( indeig ) THEN
727 idum1( 4 ) = ichar( 'I' )
728 ELSE
729 idum1( 4 ) = ichar( 'V' )
730 END IF
731 idum2( 4 ) = 4
732 IF( lquery ) THEN
733 idum1( 5 ) = -1
734 ELSE
735 idum1( 5 ) = 1
736 END IF
737 idum2( 5 ) = 5
738 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 9, n, 4, n, 4, ib,
739 $ jb, descb, 13, 5, idum1, idum2, info )
740 CALL pchk1mat( n, 4, n, 4, iz, jz, descz, 26, 0, idum1, idum2,
741 $ info )
742 END IF
743*
744 iwork( 1 ) = liwmin
745 work( 1 ) = dble( lwopt )
746*
747 IF( info.NE.0 ) THEN
748 CALL pxerbla( ictxt, 'PDSYGVX ', -info )
749 RETURN
750 ELSE IF( lquery ) THEN
751 RETURN
752 END IF
753*
754* Form a Cholesky factorization of sub( B ).
755*
756 CALL pdpotrf( uplo, n, b, ib, jb, descb, info )
757 IF( info.NE.0 ) THEN
758 iwork( 1 ) = liwmin
759 work( 1 ) = dble( lwopt )
760 ifail( 1 ) = info
761 info = ierrnpd
762 RETURN
763 END IF
764*
765* Transform problem to standard eigenvalue problem and solve.
766*
767 CALL pdsyngst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
768 $ descb, scale, work, lwork, info )
769 CALL pdsyevx( jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il,
770 $ iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work,
771 $ lwork, iwork, liwork, ifail, iclustr, gap, info )
772*
773 IF( wantz ) THEN
774*
775* Backtransform eigenvectors to the original problem.
776*
777 neig = m
778 IF( ibtype.EQ.1 .OR. ibtype.EQ.2 ) THEN
779*
780* For sub( A )*x=(lambda)*sub( B )*x and
781* sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
782* x = inv(L)'*y or inv(U)*y
783*
784 IF( upper ) THEN
785 trans = 'N'
786 ELSE
787 trans = 'T'
788 END IF
789*
790 CALL pdtrsm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
791 $ b, ib, jb, descb, z, iz, jz, descz )
792*
793 ELSE IF( ibtype.EQ.3 ) THEN
794*
795* For sub( B )*sub( A )*x=(lambda)*x;
796* backtransform eigenvectors: x = L*y or U'*y
797*
798 IF( upper ) THEN
799 trans = 'T'
800 ELSE
801 trans = 'N'
802 END IF
803*
804 CALL pdtrmm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
805 $ b, ib, jb, descb, z, iz, jz, descz )
806 END IF
807 END IF
808*
809 IF( scale.NE.one ) THEN
810 CALL dscal( n, scale, w, 1 )
811 END IF
812*
813 iwork( 1 ) = liwmin
814 work( 1 ) = dble( lwopt )
815 RETURN
816*
817* End of PDSYGVX
818*
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine pdpotrf(uplo, n, a, ia, ja, desca, info)
Definition pdpotrf.f:2
subroutine pdsyevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pdsyevx.f:5
subroutine pdsyngst(ibtype, uplo, n, a, ia, ja, desca, b, ib, jb, descb, scale, work, lwork, info)
Definition pdsyngst.f:3
integer function pjlaenv(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pjlaenv.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: