ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdsygvx.f
Go to the documentation of this file.
1  SUBROUTINE pdsygvx( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA,
2  $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU,
3  $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ,
4  $ WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR,
5  $ GAP, INFO )
6 *
7 * -- ScaLAPACK routine (version 1.7) --
8 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9 * and University of California, Berkeley.
10 * October 15, 1999
11 *
12 * .. Scalar Arguments ..
13  CHARACTER JOBZ, RANGE, UPLO
14  INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15  $ LIWORK, LWORK, M, N, NZ
16  DOUBLE PRECISION ABSTOL, ORFAC, VL, VU
17 * ..
18 * .. Array Arguments ..
19 *
20  INTEGER DESCA( * ), DESCB( * ), DESCZ( * ),
21  $ ICLUSTR( * ), IFAIL( * ), IWORK( * )
22  DOUBLE PRECISION A( * ), B( * ), GAP( * ), W( * ), WORK( * ),
23  $ Z( * )
24 * ..
25 *
26 * Purpose
27 *
28 * =======
29 *
30 * PDSYGVX computes all the eigenvalues, and optionally,
31 * the eigenvectors
32 * of a real generalized SY-definite eigenproblem, of the form
33 * sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or
34 * sub( B )*sub( A )*x=(lambda)*x.
35 * Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36 * SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37 * to be symmetric positive definite.
38 *
39 * Notes
40 * =====
41 *
42 *
43 * Each global data object is described by an associated description
44 * vector. This vector stores the information required to establish
45 * the mapping between an object element and its corresponding process
46 * and memory location.
47 *
48 * Let A be a generic term for any 2D block cyclicly distributed array.
49 * Such a global array has an associated description vector DESCA.
50 * In the following comments, the character _ should be read as
51 * "of the global array".
52 *
53 * NOTATION STORED IN EXPLANATION
54 * --------------- -------------- --------------------------------------
55 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
56 * DTYPE_A = 1.
57 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58 * the BLACS process grid A is distribu-
59 * ted over. The context itself is glo-
60 * bal, but the handle (the integer
61 * value) may vary.
62 * M_A (global) DESCA( M_ ) The number of rows in the global
63 * array A.
64 * N_A (global) DESCA( N_ ) The number of columns in the global
65 * array A.
66 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
67 * the rows of the array.
68 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
69 * the columns of the array.
70 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71 * row of the array A is distributed.
72 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73 * first column of the array A is
74 * distributed.
75 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
76 * array. LLD_A >= MAX(1,LOCr(M_A)).
77 *
78 * Let K be the number of rows or columns of a distributed matrix,
79 * and assume that its process grid has dimension p x q.
80 * LOCr( K ) denotes the number of elements of K that a process
81 * would receive if K were distributed over the p processes of its
82 * process column.
83 * Similarly, LOCc( K ) denotes the number of elements of K that a
84 * process would receive if K were distributed over the q processes of
85 * its process row.
86 * The values of LOCr() and LOCc() may be determined via a call to the
87 * ScaLAPACK tool function, NUMROC:
88 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90 * An upper bound for these quantities may be computed by:
91 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93 *
94 *
95 * Arguments
96 * =========
97 *
98 * IBTYPE (global input) INTEGER
99 * Specifies the problem type to be solved:
100 * = 1: sub( A )*x = (lambda)*sub( B )*x
101 * = 2: sub( A )*sub( B )*x = (lambda)*x
102 * = 3: sub( B )*sub( A )*x = (lambda)*x
103 *
104 * JOBZ (global input) CHARACTER*1
105 * = 'N': Compute eigenvalues only;
106 * = 'V': Compute eigenvalues and eigenvectors.
107 *
108 * RANGE (global input) CHARACTER*1
109 * = 'A': all eigenvalues will be found.
110 * = 'V': all eigenvalues in the interval [VL,VU] will be found.
111 * = 'I': the IL-th through IU-th eigenvalues will be found.
112 *
113 * UPLO (global input) CHARACTER*1
114 * = 'U': Upper triangles of sub( A ) and sub( B ) are stored;
115 * = 'L': Lower triangles of sub( A ) and sub( B ) are stored.
116 *
117 * N (global input) INTEGER
118 * The order of the matrices sub( A ) and sub( B ). N >= 0.
119 *
120 * A (local input/local output) DOUBLE PRECISION pointer into the
121 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122 * On entry, this array contains the local pieces of the
123 * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U',
124 * the leading N-by-N upper triangular part of sub( A ) contains
125 * the upper triangular part of the matrix. If UPLO = 'L', the
126 * leading N-by-N lower triangular part of sub( A ) contains
127 * the lower triangular part of the matrix.
128 *
129 * On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130 * the distributed matrix Z of eigenvectors. The eigenvectors
131 * are normalized as follows:
132 * if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I;
133 * if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I.
134 * If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135 * or the lower triangle (if UPLO='L') of sub( A ), including
136 * the diagonal, is destroyed.
137 *
138 * IA (global input) INTEGER
139 * The row index in the global array A indicating the first
140 * row of sub( A ).
141 *
142 * JA (global input) INTEGER
143 * The column index in the global array A indicating the
144 * first column of sub( A ).
145 *
146 * DESCA (global and local input) INTEGER array of dimension DLEN_.
147 * The array descriptor for the distributed matrix A.
148 * If DESCA( CTXT_ ) is incorrect, PDSYGVX cannot guarantee
149 * correct error reporting.
150 *
151 * B (local input/local output) DOUBLE PRECISION pointer into the
152 * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153 * On entry, this array contains the local pieces of the
154 * N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U',
155 * the leading N-by-N upper triangular part of sub( B ) contains
156 * the upper triangular part of the matrix. If UPLO = 'L', the
157 * leading N-by-N lower triangular part of sub( B ) contains
158 * the lower triangular part of the matrix.
159 *
160 * On exit, if INFO <= N, the part of sub( B ) containing the
161 * matrix is overwritten by the triangular factor U or L from
162 * the Cholesky factorization sub( B ) = U**T*U or
163 * sub( B ) = L*L**T.
164 *
165 * IB (global input) INTEGER
166 * The row index in the global array B indicating the first
167 * row of sub( B ).
168 *
169 * JB (global input) INTEGER
170 * The column index in the global array B indicating the
171 * first column of sub( B ).
172 *
173 * DESCB (global and local input) INTEGER array of dimension DLEN_.
174 * The array descriptor for the distributed matrix B.
175 * DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176 *
177 * VL (global input) DOUBLE PRECISION
178 * If RANGE='V', the lower bound of the interval to be searched
179 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
180 *
181 * VU (global input) DOUBLE PRECISION
182 * If RANGE='V', the upper bound of the interval to be searched
183 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
184 *
185 * IL (global input) INTEGER
186 * If RANGE='I', the index (from smallest to largest) of the
187 * smallest eigenvalue to be returned. IL >= 1.
188 * Not referenced if RANGE = 'A' or 'V'.
189 *
190 * IU (global input) INTEGER
191 * If RANGE='I', the index (from smallest to largest) of the
192 * largest eigenvalue to be returned. min(IL,N) <= IU <= N.
193 * Not referenced if RANGE = 'A' or 'V'.
194 *
195 * ABSTOL (global input) DOUBLE PRECISION
196 * If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
197 * the most orthogonal eigenvectors.
198 *
199 * The absolute error tolerance for the eigenvalues.
200 * An approximate eigenvalue is accepted as converged
201 * when it is determined to lie in an interval [a,b]
202 * of width less than or equal to
203 *
204 * ABSTOL + EPS * max( |a|,|b| ) ,
205 *
206 * where EPS is the machine precision. If ABSTOL is less than
207 * or equal to zero, then EPS*norm(T) will be used in its place,
208 * where norm(T) is the 1-norm of the tridiagonal matrix
209 * obtained by reducing A to tridiagonal form.
210 *
211 * Eigenvalues will be computed most accurately when ABSTOL is
212 * set to twice the underflow threshold 2*PDLAMCH('S') not zero.
213 * If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214 * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215 * eigenvectors did not converge, try setting ABSTOL to
216 * 2*PDLAMCH('S').
217 *
218 * See "Computing Small Singular Values of Bidiagonal Matrices
219 * with Guaranteed High Relative Accuracy," by Demmel and
220 * Kahan, LAPACK Working Note #3.
221 *
222 * See "On the correctness of Parallel Bisection in Floating
223 * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224 *
225 * M (global output) INTEGER
226 * Total number of eigenvalues found. 0 <= M <= N.
227 *
228 * NZ (global output) INTEGER
229 * Total number of eigenvectors computed. 0 <= NZ <= M.
230 * The number of columns of Z that are filled.
231 * If JOBZ .NE. 'V', NZ is not referenced.
232 * If JOBZ .EQ. 'V', NZ = M unless the user supplies
233 * insufficient space and PDSYGVX is not able to detect this
234 * before beginning computation. To get all the eigenvectors
235 * requested, the user must supply both sufficient
236 * space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237 * and sufficient workspace to compute them. (See LWORK below.)
238 * PDSYGVX is always able to detect insufficient space without
239 * computation unless RANGE .EQ. 'V'.
240 *
241 * W (global output) DOUBLE PRECISION array, dimension (N)
242 * On normal exit, the first M entries contain the selected
243 * eigenvalues in ascending order.
244 *
245 * ORFAC (global input) DOUBLE PRECISION
246 * Specifies which eigenvectors should be reorthogonalized.
247 * Eigenvectors that correspond to eigenvalues which are within
248 * tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249 * However, if the workspace is insufficient (see LWORK),
250 * tol may be decreased until all eigenvectors to be
251 * reorthogonalized can be stored in one process.
252 * No reorthogonalization will be done if ORFAC equals zero.
253 * A default value of 10^-3 is used if ORFAC is negative.
254 * ORFAC should be identical on all processes.
255 *
256 * Z (local output) DOUBLE PRECISION array,
257 * global dimension (N, N),
258 * local dimension ( LLD_Z, LOCc(JZ+N-1) )
259 * If JOBZ = 'V', then on normal exit the first M columns of Z
260 * contain the orthonormal eigenvectors of the matrix
261 * corresponding to the selected eigenvalues. If an eigenvector
262 * fails to converge, then that column of Z contains the latest
263 * approximation to the eigenvector, and the index of the
264 * eigenvector is returned in IFAIL.
265 * If JOBZ = 'N', then Z is not referenced.
266 *
267 * IZ (global input) INTEGER
268 * The row index in the global array Z indicating the first
269 * row of sub( Z ).
270 *
271 * JZ (global input) INTEGER
272 * The column index in the global array Z indicating the
273 * first column of sub( Z ).
274 *
275 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
276 * The array descriptor for the distributed matrix Z.
277 * DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278 *
279 * WORK (local workspace/output) DOUBLE PRECISION array,
280 * dimension max(3,LWORK)
281 * if JOBZ='N' WORK(1) = optimal amount of workspace
282 * required to compute eigenvalues efficiently
283 * if JOBZ='V' WORK(1) = optimal amount of workspace
284 * required to compute eigenvalues and eigenvectors
285 * efficiently with no guarantee on orthogonality.
286 * If RANGE='V', it is assumed that all eigenvectors
287 * may be required.
288 *
289 * LWORK (local input) INTEGER
290 * See below for definitions of variables used to define LWORK.
291 * If no eigenvectors are requested (JOBZ = 'N') then
292 * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
293 * If eigenvectors are requested (JOBZ = 'V' ) then
294 * the amount of workspace required to guarantee that all
295 * eigenvectors are computed is:
296 * LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
297 * ICEIL( NEIG, NPROW*NPCOL)*NN
298 *
299 * The computed eigenvectors may not be orthogonal if the
300 * minimal workspace is supplied and ORFAC is too small.
301 * If you want to guarantee orthogonality (at the cost
302 * of potentially poor performance) you should add
303 * the following to LWORK:
304 * (CLUSTERSIZE-1)*N
305 * where CLUSTERSIZE is the number of eigenvalues in the
306 * largest cluster, where a cluster is defined as a set of
307 * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
308 * W(J+1) <= W(J) + ORFAC*2*norm(A) }
309 * Variable definitions:
310 * NEIG = number of eigenvectors requested
311 * NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
312 * DESCZ( NB_ )
313 * NN = MAX( N, NB, 2 )
314 * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
315 * DESCZ( CSRC_ ) = 0
316 * NP0 = NUMROC( NN, NB, 0, 0, NPROW )
317 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
318 * ICEIL( X, Y ) is a ScaLAPACK function returning
319 * ceiling(X/Y)
320 *
321 * When LWORK is too small:
322 * If LWORK is too small to guarantee orthogonality,
323 * PDSYGVX attempts to maintain orthogonality in
324 * the clusters with the smallest
325 * spacing between the eigenvalues.
326 * If LWORK is too small to compute all the eigenvectors
327 * requested, no computation is performed and INFO=-23
328 * is returned. Note that when RANGE='V', PDSYGVX does
329 * not know how many eigenvectors are requested until
330 * the eigenvalues are computed. Therefore, when RANGE='V'
331 * and as long as LWORK is large enough to allow PDSYGVX to
332 * compute the eigenvalues, PDSYGVX will compute the
333 * eigenvalues and as many eigenvectors as it can.
334 *
335 * Relationship between workspace, orthogonality & performance:
336 * Greater performance can be achieved if adequate workspace
337 * is provided. On the other hand, in some situations,
338 * performance can decrease as the workspace provided
339 * increases above the workspace amount shown below:
340 *
341 * For optimal performance, greater workspace may be
342 * needed, i.e.
343 * LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT,
344 * NSYGST_LWOPT )
345 * Where:
346 * LWORK, as defined previously, depends upon the number
347 * of eigenvectors requested, and
348 * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
349 * ( NPS + 3 ) * NPS
350 * NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
351 *
352 * ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L',
353 * 0, 0, 0, 0)
354 * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
355 * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
356 * NB = DESCA( MB_ )
357 * NP0 = NUMROC( N, NB, 0, 0, NPROW )
358 * NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
359 *
360 * NUMROC is a ScaLAPACK tool functions;
361 * PJLAENV is a ScaLAPACK envionmental inquiry function
362 * MYROW, MYCOL, NPROW and NPCOL can be determined by
363 * calling the subroutine BLACS_GRIDINFO.
364 *
365 * For large N, no extra workspace is needed, however the
366 * biggest boost in performance comes for small N, so it
367 * is wise to provide the extra workspace (typically less
368 * than a Megabyte per process).
369 *
370 * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
371 * enough space to compute all the eigenvectors
372 * orthogonally will cause serious degradation in
373 * performance. In the limit (i.e. CLUSTERSIZE = N-1)
374 * PDSTEIN will perform no better than DSTEIN on 1 processor.
375 * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
376 * all eigenvectors will increase the total execution time
377 * by a factor of 2 or more.
378 * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
379 * grow as the square of the cluster size, all other factors
380 * remaining equal and assuming enough workspace. Less
381 * workspace means less reorthogonalization but faster
382 * execution.
383 *
384 * If LWORK = -1, then LWORK is global input and a workspace
385 * query is assumed; the routine only calculates the size
386 * required for optimal performance on all work arrays.
387 * Each of these values is returned in the first entry of the
388 * corresponding work array, and no error message is issued by
389 * PXERBLA.
390 *
391 *
392 * IWORK (local workspace) INTEGER array
393 * On return, IWORK(1) contains the amount of integer workspace
394 * required.
395 *
396 * LIWORK (local input) INTEGER
397 * size of IWORK
398 * LIWORK >= 6 * NNP
399 * Where:
400 * NNP = MAX( N, NPROW*NPCOL + 1, 4 )
401 *
402 * If LIWORK = -1, then LIWORK is global input and a workspace
403 * query is assumed; the routine only calculates the minimum
404 * and optimal size for all work arrays. Each of these
405 * values is returned in the first entry of the corresponding
406 * work array, and no error message is issued by PXERBLA.
407 *
408 * IFAIL (output) INTEGER array, dimension (N)
409 * IFAIL provides additional information when INFO .NE. 0
410 * If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
411 * the smallest minor which is not positive definite.
412 * If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
413 * indices of the eigenvectors that failed to converge.
414 *
415 * If neither of the above error conditions hold and JOBZ = 'V',
416 * then the first M elements of IFAIL are set to zero.
417 *
418 * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
419 * This array contains indices of eigenvectors corresponding to
420 * a cluster of eigenvalues that could not be reorthogonalized
421 * due to insufficient workspace (see LWORK, ORFAC and INFO).
422 * Eigenvectors corresponding to clusters of eigenvalues indexed
423 * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
424 * reorthogonalized due to lack of workspace. Hence the
425 * eigenvectors corresponding to these clusters may not be
426 * orthogonal. ICLUSTR() is a zero terminated array.
427 * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
428 * K is the number of clusters
429 * ICLUSTR is not referenced if JOBZ = 'N'
430 *
431 * GAP (global output) DOUBLE PRECISION array,
432 * dimension (NPROW*NPCOL)
433 * This array contains the gap between eigenvalues whose
434 * eigenvectors could not be reorthogonalized. The output
435 * values in this array correspond to the clusters indicated
436 * by the array ICLUSTR. As a result, the dot product between
437 * eigenvectors correspoding to the I^th cluster may be as high
438 * as ( C * n ) / GAP(I) where C is a small constant.
439 *
440 * INFO (global output) INTEGER
441 * = 0: successful exit
442 * < 0: If the i-th argument is an array and the j-entry had
443 * an illegal value, then INFO = -(i*100+j), if the i-th
444 * argument is a scalar and had an illegal value, then
445 * INFO = -i.
446 * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
447 * failed to converge. Their indices are stored
448 * in IFAIL. Send e-mail to scalapack@cs.utk.edu
449 * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
450 * to one or more clusters of eigenvalues could not be
451 * reorthogonalized because of insufficient workspace.
452 * The indices of the clusters are stored in the array
453 * ICLUSTR.
454 * if (MOD(INFO/4,2).NE.0), then space limit prevented
455 * PDSYGVX from computing all of the eigenvectors
456 * between VL and VU. The number of eigenvectors
457 * computed is returned in NZ.
458 * if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to
459 * compute eigenvalues.
460 * Send e-mail to scalapack@cs.utk.edu
461 * if (MOD(INFO/16,2).NE.0), then B was not positive
462 * definite. IFAIL(1) indicates the order of
463 * the smallest minor which is not positive definite.
464 *
465 * Alignment requirements
466 * ======================
467 *
468 * The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
469 * and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
470 * namely the following expressions should be true:
471 *
472 * DESCA(MB_) = DESCA(NB_)
473 * IA = IB = IZ
474 * JA = IB = JZ
475 * DESCA(M_) = DESCB(M_) =DESCZ(M_)
476 * DESCA(N_) = DESCB(N_)= DESCZ(N_)
477 * DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
478 * DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
479 * DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
480 * DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
481 * MOD( IA-1, DESCA( MB_ ) ) = 0
482 * MOD( JA-1, DESCA( NB_ ) ) = 0
483 * MOD( IB-1, DESCB( MB_ ) ) = 0
484 * MOD( JB-1, DESCB( NB_ ) ) = 0
485 *
486 * =====================================================================
487 *
488 * .. Parameters ..
489  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
490  $ MB_, NB_, RSRC_, CSRC_, LLD_
491  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
492  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
493  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
494  DOUBLE PRECISION ONE
495  parameter( one = 1.0d+0 )
496  DOUBLE PRECISION FIVE, ZERO
497  PARAMETER ( FIVE = 5.0d+0, zero = 0.0d+0 )
498  INTEGER IERRNPD
499  parameter( ierrnpd = 16 )
500 * ..
501 * .. Local Scalars ..
502  LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
503  CHARACTER TRANS
504  INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
505  $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LWMIN,
506  $ lwopt, mq0, mycol, myrow, nb, neig, nn, np0,
507  $ npcol, nprow, nps, nq0, nsygst_lwopt,
508  $ nsytrd_lwopt, sqnpc
509  DOUBLE PRECISION EPS, SCALE
510 * ..
511 * .. Local Arrays ..
512  INTEGER IDUM1( 5 ), IDUM2( 5 )
513 * ..
514 * .. External Functions ..
515  LOGICAL LSAME
516  INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
517  DOUBLE PRECISION PDLAMCH
518  EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PDLAMCH
519 * ..
520 * .. External Subroutines ..
521  EXTERNAL blacs_gridinfo, chk1mat, dgebr2d, dgebs2d,
522  $ dscal, pchk1mat, pchk2mat, pdpotrf, pdsyevx,
523  $ pdsyngst, pdtrmm, pdtrsm, pxerbla
524 * ..
525 * .. Intrinsic Functions ..
526  INTRINSIC abs, dble, ichar, int, max, min, mod, sqrt
527 * ..
528 * .. Executable Statements ..
529 * This is just to keep ftnchek and toolpack/1 happy
530  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
531  $ rsrc_.LT.0 )RETURN
532 *
533 * Get grid parameters
534 *
535  ictxt = desca( ctxt_ )
536  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
537 *
538 * Test the input parameters
539 *
540  info = 0
541  IF( nprow.EQ.-1 ) THEN
542  info = -( 900+ctxt_ )
543  ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
544  info = -( 1300+ctxt_ )
545  ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
546  info = -( 2600+ctxt_ )
547  ELSE
548 *
549 * Get machine constants.
550 *
551  eps = pdlamch( desca( ctxt_ ), 'Precision' )
552 *
553  wantz = lsame( jobz, 'V' )
554  upper = lsame( uplo, 'U' )
555  alleig = lsame( range, 'A' )
556  valeig = lsame( range, 'V' )
557  indeig = lsame( range, 'I' )
558  CALL chk1mat( n, 4, n, 4, ia, ja, desca, 9, info )
559  CALL chk1mat( n, 4, n, 4, ib, jb, descb, 13, info )
560  CALL chk1mat( n, 4, n, 4, iz, jz, descz, 26, info )
561  IF( info.EQ.0 ) THEN
562  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
563  work( 1 ) = abstol
564  IF( valeig ) THEN
565  work( 2 ) = vl
566  work( 3 ) = vu
567  ELSE
568  work( 2 ) = zero
569  work( 3 ) = zero
570  END IF
571  CALL dgebs2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3 )
572  ELSE
573  CALL dgebr2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3,
574  $ 0, 0 )
575  END IF
576  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
577  $ nprow )
578  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
579  $ nprow )
580  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
581  $ npcol )
582  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
583  $ npcol )
584  iroffa = mod( ia-1, desca( mb_ ) )
585  icoffa = mod( ja-1, desca( nb_ ) )
586  iroffb = mod( ib-1, descb( mb_ ) )
587  icoffb = mod( jb-1, descb( nb_ ) )
588 *
589 * Compute the total amount of space needed
590 *
591  lquery = .false.
592  IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
593  $ lquery = .true.
594 *
595  liwmin = 6*max( n, ( nprow*npcol )+1, 4 )
596 *
597  nb = desca( mb_ )
598  nn = max( n, nb, 2 )
599  np0 = numroc( nn, nb, 0, 0, nprow )
600 *
601  IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
602  $ THEN
603  lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
604  IF( wantz ) THEN
605  mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
606  lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
607  ELSE
608  lwopt = lwmin
609  END IF
610  neig = 0
611  ELSE
612  IF( alleig .OR. valeig ) THEN
613  neig = n
614  ELSE IF( indeig ) THEN
615  neig = iu - il + 1
616  END IF
617  mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
618  lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
619  $ iceil( neig, nprow*npcol )*nn
620  lwopt = lwmin
621 *
622  END IF
623 *
624 * Compute how much workspace is needed to use the
625 * new TRD and GST algorithms
626 *
627  anb = pjlaenv( ictxt, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
628  sqnpc = int( sqrt( dble( nprow*npcol ) ) )
629  nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
630  nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
631  nb = desca( mb_ )
632  np0 = numroc( n, nb, 0, 0, nprow )
633  nq0 = numroc( n, nb, 0, 0, npcol )
634  nsygst_lwopt = 2*np0*nb + nq0*nb + nb*nb
635  lwopt = max( lwopt, n+nsytrd_lwopt, nsygst_lwopt )
636 *
637 * Version 1.0 Limitations
638 *
639  IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
640  info = -1
641  ELSE IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
642  info = -2
643  ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
644  info = -3
645  ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
646  info = -4
647  ELSE IF( n.LT.0 ) THEN
648  info = -5
649  ELSE IF( iroffa.NE.0 ) THEN
650  info = -7
651  ELSE IF( icoffa.NE.0 ) THEN
652  info = -8
653  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
654  info = -( 900+nb_ )
655  ELSE IF( desca( m_ ).NE.descb( m_ ) ) THEN
656  info = -( 1300+m_ )
657  ELSE IF( desca( n_ ).NE.descb( n_ ) ) THEN
658  info = -( 1300+n_ )
659  ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
660  info = -( 1300+mb_ )
661  ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
662  info = -( 1300+nb_ )
663  ELSE IF( desca( rsrc_ ).NE.descb( rsrc_ ) ) THEN
664  info = -( 1300+rsrc_ )
665  ELSE IF( desca( csrc_ ).NE.descb( csrc_ ) ) THEN
666  info = -( 1300+csrc_ )
667  ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
668  info = -( 1300+ctxt_ )
669  ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
670  info = -( 2200+m_ )
671  ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
672  info = -( 2200+n_ )
673  ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
674  info = -( 2200+mb_ )
675  ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
676  info = -( 2200+nb_ )
677  ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
678  info = -( 2200+rsrc_ )
679  ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
680  info = -( 2200+csrc_ )
681  ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
682  info = -( 2200+ctxt_ )
683  ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
684  info = -11
685  ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
686  info = -12
687  ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
688  info = -15
689  ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
690  $ THEN
691  info = -16
692  ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
693  $ THEN
694  info = -17
695  ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
696  $ abs( vl ) ) ) THEN
697  info = -14
698  ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
699  $ abs( vu ) ) ) THEN
700  info = -15
701  ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
702  $ THEN
703  info = -18
704  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
705  info = -28
706  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
707  info = -30
708  END IF
709  END IF
710  idum1( 1 ) = ibtype
711  idum2( 1 ) = 1
712  IF( wantz ) THEN
713  idum1( 2 ) = ichar( 'V' )
714  ELSE
715  idum1( 2 ) = ichar( 'N' )
716  END IF
717  idum2( 2 ) = 2
718  IF( upper ) THEN
719  idum1( 3 ) = ichar( 'U' )
720  ELSE
721  idum1( 3 ) = ichar( 'L' )
722  END IF
723  idum2( 3 ) = 3
724  IF( alleig ) THEN
725  idum1( 4 ) = ichar( 'A' )
726  ELSE IF( indeig ) THEN
727  idum1( 4 ) = ichar( 'I' )
728  ELSE
729  idum1( 4 ) = ichar( 'V' )
730  END IF
731  idum2( 4 ) = 4
732  IF( lquery ) THEN
733  idum1( 5 ) = -1
734  ELSE
735  idum1( 5 ) = 1
736  END IF
737  idum2( 5 ) = 5
738  CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 9, n, 4, n, 4, ib,
739  $ jb, descb, 13, 5, idum1, idum2, info )
740  CALL pchk1mat( n, 4, n, 4, iz, jz, descz, 26, 0, idum1, idum2,
741  $ info )
742  END IF
743 *
744  iwork( 1 ) = liwmin
745  work( 1 ) = dble( lwopt )
746 *
747  IF( info.NE.0 ) THEN
748  CALL pxerbla( ictxt, 'PDSYGVX ', -info )
749  RETURN
750  ELSE IF( lquery ) THEN
751  RETURN
752  END IF
753 *
754 * Form a Cholesky factorization of sub( B ).
755 *
756  CALL pdpotrf( uplo, n, b, ib, jb, descb, info )
757  IF( info.NE.0 ) THEN
758  iwork( 1 ) = liwmin
759  work( 1 ) = dble( lwopt )
760  ifail( 1 ) = info
761  info = ierrnpd
762  RETURN
763  END IF
764 *
765 * Transform problem to standard eigenvalue problem and solve.
766 *
767  CALL pdsyngst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
768  $ descb, scale, work, lwork, info )
769  CALL pdsyevx( jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il,
770  $ iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work,
771  $ lwork, iwork, liwork, ifail, iclustr, gap, info )
772 *
773  IF( wantz ) THEN
774 *
775 * Backtransform eigenvectors to the original problem.
776 *
777  neig = m
778  IF( ibtype.EQ.1 .OR. ibtype.EQ.2 ) THEN
779 *
780 * For sub( A )*x=(lambda)*sub( B )*x and
781 * sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
782 * x = inv(L)'*y or inv(U)*y
783 *
784  IF( upper ) THEN
785  trans = 'N'
786  ELSE
787  trans = 'T'
788  END IF
789 *
790  CALL pdtrsm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
791  $ b, ib, jb, descb, z, iz, jz, descz )
792 *
793  ELSE IF( ibtype.EQ.3 ) THEN
794 *
795 * For sub( B )*sub( A )*x=(lambda)*x;
796 * backtransform eigenvectors: x = L*y or U'*y
797 *
798  IF( upper ) THEN
799  trans = 'T'
800  ELSE
801  trans = 'N'
802  END IF
803 *
804  CALL pdtrmm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
805  $ b, ib, jb, descb, z, iz, jz, descz )
806  END IF
807  END IF
808 *
809  IF( scale.NE.one ) THEN
810  CALL dscal( n, scale, w, 1 )
811  END IF
812 *
813  iwork( 1 ) = liwmin
814  work( 1 ) = dble( lwopt )
815  RETURN
816 *
817 * End of PDSYGVX
818 *
819  END
max
#define max(A, B)
Definition: pcgemr.c:180
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pdsyngst
subroutine pdsyngst(IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, SCALE, WORK, LWORK, INFO)
Definition: pdsyngst.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pdsyevx
subroutine pdsyevx(JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pdsyevx.f:5
pdpotrf
subroutine pdpotrf(UPLO, N, A, IA, JA, DESCA, INFO)
Definition: pdpotrf.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pdsygvx
subroutine pdsygvx(IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pdsygvx.f:6
min
#define min(A, B)
Definition: pcgemr.c:181