ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pssygvx.f
Go to the documentation of this file.
1  SUBROUTINE pssygvx( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA,
2  $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU,
3  $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ,
4  $ WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR,
5  $ GAP, INFO )
6 *
7 * -- ScaLAPACK routine (version 1.7) --
8 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9 * and University of California, Berkeley.
10 * October 15, 1999
11 *
12 * .. Scalar Arguments ..
13  CHARACTER JOBZ, RANGE, UPLO
14  INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15  $ LIWORK, LWORK, M, N, NZ
16  REAL ABSTOL, ORFAC, VL, VU
17 * ..
18 * .. Array Arguments ..
19 *
20  INTEGER DESCA( * ), DESCB( * ), DESCZ( * ),
21  $ ICLUSTR( * ), IFAIL( * ), IWORK( * )
22  REAL A( * ), B( * ), GAP( * ), W( * ), WORK( * ),
23  $ Z( * )
24 * ..
25 *
26 * Purpose
27 *
28 * =======
29 *
30 * PSSYGVX computes all the eigenvalues, and optionally,
31 * the eigenvectors
32 * of a real generalized SY-definite eigenproblem, of the form
33 * sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or
34 * sub( B )*sub( A )*x=(lambda)*x.
35 * Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36 * SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37 * to be symmetric positive definite.
38 *
39 * Notes
40 * =====
41 *
42 *
43 * Each global data object is described by an associated description
44 * vector. This vector stores the information required to establish
45 * the mapping between an object element and its corresponding process
46 * and memory location.
47 *
48 * Let A be a generic term for any 2D block cyclicly distributed array.
49 * Such a global array has an associated description vector DESCA.
50 * In the following comments, the character _ should be read as
51 * "of the global array".
52 *
53 * NOTATION STORED IN EXPLANATION
54 * --------------- -------------- --------------------------------------
55 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
56 * DTYPE_A = 1.
57 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58 * the BLACS process grid A is distribu-
59 * ted over. The context itself is glo-
60 * bal, but the handle (the integer
61 * value) may vary.
62 * M_A (global) DESCA( M_ ) The number of rows in the global
63 * array A.
64 * N_A (global) DESCA( N_ ) The number of columns in the global
65 * array A.
66 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
67 * the rows of the array.
68 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
69 * the columns of the array.
70 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71 * row of the array A is distributed.
72 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73 * first column of the array A is
74 * distributed.
75 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
76 * array. LLD_A >= MAX(1,LOCr(M_A)).
77 *
78 * Let K be the number of rows or columns of a distributed matrix,
79 * and assume that its process grid has dimension p x q.
80 * LOCr( K ) denotes the number of elements of K that a process
81 * would receive if K were distributed over the p processes of its
82 * process column.
83 * Similarly, LOCc( K ) denotes the number of elements of K that a
84 * process would receive if K were distributed over the q processes of
85 * its process row.
86 * The values of LOCr() and LOCc() may be determined via a call to the
87 * ScaLAPACK tool function, NUMROC:
88 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90 * An upper bound for these quantities may be computed by:
91 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93 *
94 *
95 * Arguments
96 * =========
97 *
98 * IBTYPE (global input) INTEGER
99 * Specifies the problem type to be solved:
100 * = 1: sub( A )*x = (lambda)*sub( B )*x
101 * = 2: sub( A )*sub( B )*x = (lambda)*x
102 * = 3: sub( B )*sub( A )*x = (lambda)*x
103 *
104 * JOBZ (global input) CHARACTER*1
105 * = 'N': Compute eigenvalues only;
106 * = 'V': Compute eigenvalues and eigenvectors.
107 *
108 * RANGE (global input) CHARACTER*1
109 * = 'A': all eigenvalues will be found.
110 * = 'V': all eigenvalues in the interval [VL,VU] will be found.
111 * = 'I': the IL-th through IU-th eigenvalues will be found.
112 *
113 * UPLO (global input) CHARACTER*1
114 * = 'U': Upper triangles of sub( A ) and sub( B ) are stored;
115 * = 'L': Lower triangles of sub( A ) and sub( B ) are stored.
116 *
117 * N (global input) INTEGER
118 * The order of the matrices sub( A ) and sub( B ). N >= 0.
119 *
120 * A (local input/local output) REAL pointer into the
121 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122 * On entry, this array contains the local pieces of the
123 * N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U',
124 * the leading N-by-N upper triangular part of sub( A ) contains
125 * the upper triangular part of the matrix. If UPLO = 'L', the
126 * leading N-by-N lower triangular part of sub( A ) contains
127 * the lower triangular part of the matrix.
128 *
129 * On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130 * the distributed matrix Z of eigenvectors. The eigenvectors
131 * are normalized as follows:
132 * if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I;
133 * if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I.
134 * If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135 * or the lower triangle (if UPLO='L') of sub( A ), including
136 * the diagonal, is destroyed.
137 *
138 * IA (global input) INTEGER
139 * The row index in the global array A indicating the first
140 * row of sub( A ).
141 *
142 * JA (global input) INTEGER
143 * The column index in the global array A indicating the
144 * first column of sub( A ).
145 *
146 * DESCA (global and local input) INTEGER array of dimension DLEN_.
147 * The array descriptor for the distributed matrix A.
148 * If DESCA( CTXT_ ) is incorrect, PSSYGVX cannot guarantee
149 * correct error reporting.
150 *
151 * B (local input/local output) REAL pointer into the
152 * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153 * On entry, this array contains the local pieces of the
154 * N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U',
155 * the leading N-by-N upper triangular part of sub( B ) contains
156 * the upper triangular part of the matrix. If UPLO = 'L', the
157 * leading N-by-N lower triangular part of sub( B ) contains
158 * the lower triangular part of the matrix.
159 *
160 * On exit, if INFO <= N, the part of sub( B ) containing the
161 * matrix is overwritten by the triangular factor U or L from
162 * the Cholesky factorization sub( B ) = U**T*U or
163 * sub( B ) = L*L**T.
164 *
165 * IB (global input) INTEGER
166 * The row index in the global array B indicating the first
167 * row of sub( B ).
168 *
169 * JB (global input) INTEGER
170 * The column index in the global array B indicating the
171 * first column of sub( B ).
172 *
173 * DESCB (global and local input) INTEGER array of dimension DLEN_.
174 * The array descriptor for the distributed matrix B.
175 * DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176 *
177 * VL (global input) REAL
178 * If RANGE='V', the lower bound of the interval to be searched
179 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
180 *
181 * VU (global input) REAL
182 * If RANGE='V', the upper bound of the interval to be searched
183 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
184 *
185 * IL (global input) INTEGER
186 * If RANGE='I', the index (from smallest to largest) of the
187 * smallest eigenvalue to be returned. IL >= 1.
188 * Not referenced if RANGE = 'A' or 'V'.
189 *
190 * IU (global input) INTEGER
191 * If RANGE='I', the index (from smallest to largest) of the
192 * largest eigenvalue to be returned. min(IL,N) <= IU <= N.
193 * Not referenced if RANGE = 'A' or 'V'.
194 *
195 * ABSTOL (global input) REAL
196 * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
197 * the most orthogonal eigenvectors.
198 *
199 * The absolute error tolerance for the eigenvalues.
200 * An approximate eigenvalue is accepted as converged
201 * when it is determined to lie in an interval [a,b]
202 * of width less than or equal to
203 *
204 * ABSTOL + EPS * max( |a|,|b| ) ,
205 *
206 * where EPS is the machine precision. If ABSTOL is less than
207 * or equal to zero, then EPS*norm(T) will be used in its place,
208 * where norm(T) is the 1-norm of the tridiagonal matrix
209 * obtained by reducing A to tridiagonal form.
210 *
211 * Eigenvalues will be computed most accurately when ABSTOL is
212 * set to twice the underflow threshold 2*PSLAMCH('S') not zero.
213 * If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214 * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215 * eigenvectors did not converge, try setting ABSTOL to
216 * 2*PSLAMCH('S').
217 *
218 * See "Computing Small Singular Values of Bidiagonal Matrices
219 * with Guaranteed High Relative Accuracy," by Demmel and
220 * Kahan, LAPACK Working Note #3.
221 *
222 * See "On the correctness of Parallel Bisection in Floating
223 * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224 *
225 * M (global output) INTEGER
226 * Total number of eigenvalues found. 0 <= M <= N.
227 *
228 * NZ (global output) INTEGER
229 * Total number of eigenvectors computed. 0 <= NZ <= M.
230 * The number of columns of Z that are filled.
231 * If JOBZ .NE. 'V', NZ is not referenced.
232 * If JOBZ .EQ. 'V', NZ = M unless the user supplies
233 * insufficient space and PSSYGVX is not able to detect this
234 * before beginning computation. To get all the eigenvectors
235 * requested, the user must supply both sufficient
236 * space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237 * and sufficient workspace to compute them. (See LWORK below.)
238 * PSSYGVX is always able to detect insufficient space without
239 * computation unless RANGE .EQ. 'V'.
240 *
241 * W (global output) REAL array, dimension (N)
242 * On normal exit, the first M entries contain the selected
243 * eigenvalues in ascending order.
244 *
245 * ORFAC (global input) REAL
246 * Specifies which eigenvectors should be reorthogonalized.
247 * Eigenvectors that correspond to eigenvalues which are within
248 * tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249 * However, if the workspace is insufficient (see LWORK),
250 * tol may be decreased until all eigenvectors to be
251 * reorthogonalized can be stored in one process.
252 * No reorthogonalization will be done if ORFAC equals zero.
253 * A default value of 10^-3 is used if ORFAC is negative.
254 * ORFAC should be identical on all processes.
255 *
256 * Z (local output) REAL array,
257 * global dimension (N, N),
258 * local dimension ( LLD_Z, LOCc(JZ+N-1) )
259 * If JOBZ = 'V', then on normal exit the first M columns of Z
260 * contain the orthonormal eigenvectors of the matrix
261 * corresponding to the selected eigenvalues. If an eigenvector
262 * fails to converge, then that column of Z contains the latest
263 * approximation to the eigenvector, and the index of the
264 * eigenvector is returned in IFAIL.
265 * If JOBZ = 'N', then Z is not referenced.
266 *
267 * IZ (global input) INTEGER
268 * The row index in the global array Z indicating the first
269 * row of sub( Z ).
270 *
271 * JZ (global input) INTEGER
272 * The column index in the global array Z indicating the
273 * first column of sub( Z ).
274 *
275 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
276 * The array descriptor for the distributed matrix Z.
277 * DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278 *
279 * WORK (local workspace/output) REAL array,
280 * dimension max(3,LWORK)
281 * if JOBZ='N' WORK(1) = optimal amount of workspace
282 * required to compute eigenvalues efficiently
283 * if JOBZ='V' WORK(1) = optimal amount of workspace
284 * required to compute eigenvalues and eigenvectors
285 * efficiently with no guarantee on orthogonality.
286 * If RANGE='V', it is assumed that all eigenvectors
287 * may be required.
288 *
289 * LWORK (local input) INTEGER
290 * See below for definitions of variables used to define LWORK.
291 * If no eigenvectors are requested (JOBZ = 'N') then
292 * LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
293 * If eigenvectors are requested (JOBZ = 'V' ) then
294 * the amount of workspace required to guarantee that all
295 * eigenvectors are computed is:
296 * LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
297 * ICEIL( NEIG, NPROW*NPCOL)*NN
298 *
299 * The computed eigenvectors may not be orthogonal if the
300 * minimal workspace is supplied and ORFAC is too small.
301 * If you want to guarantee orthogonality (at the cost
302 * of potentially poor performance) you should add
303 * the following to LWORK:
304 * (CLUSTERSIZE-1)*N
305 * where CLUSTERSIZE is the number of eigenvalues in the
306 * largest cluster, where a cluster is defined as a set of
307 * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
308 * W(J+1) <= W(J) + ORFAC*2*norm(A) }
309 * Variable definitions:
310 * NEIG = number of eigenvectors requested
311 * NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
312 * DESCZ( NB_ )
313 * NN = MAX( N, NB, 2 )
314 * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
315 * DESCZ( CSRC_ ) = 0
316 * NP0 = NUMROC( NN, NB, 0, 0, NPROW )
317 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
318 * ICEIL( X, Y ) is a ScaLAPACK function returning
319 * ceiling(X/Y)
320 *
321 * When LWORK is too small:
322 * If LWORK is too small to guarantee orthogonality,
323 * PSSYGVX attempts to maintain orthogonality in
324 * the clusters with the smallest
325 * spacing between the eigenvalues.
326 * If LWORK is too small to compute all the eigenvectors
327 * requested, no computation is performed and INFO=-23
328 * is returned. Note that when RANGE='V', PSSYGVX does
329 * not know how many eigenvectors are requested until
330 * the eigenvalues are computed. Therefore, when RANGE='V'
331 * and as long as LWORK is large enough to allow PSSYGVX to
332 * compute the eigenvalues, PSSYGVX will compute the
333 * eigenvalues and as many eigenvectors as it can.
334 *
335 * Relationship between workspace, orthogonality & performance:
336 * Greater performance can be achieved if adequate workspace
337 * is provided. On the other hand, in some situations,
338 * performance can decrease as the workspace provided
339 * increases above the workspace amount shown below:
340 *
341 * For optimal performance, greater workspace may be
342 * needed, i.e.
343 * LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT,
344 * NSYGST_LWOPT )
345 * Where:
346 * LWORK, as defined previously, depends upon the number
347 * of eigenvectors requested, and
348 * NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
349 * ( NPS + 3 ) * NPS
350 * NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
351 *
352 * ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L',
353 * 0, 0, 0, 0)
354 * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
355 * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
356 * NB = DESCA( MB_ )
357 * NP0 = NUMROC( N, NB, 0, 0, NPROW )
358 * NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
359 *
360 * NUMROC is a ScaLAPACK tool functions;
361 * PJLAENV is a ScaLAPACK envionmental inquiry function
362 * MYROW, MYCOL, NPROW and NPCOL can be determined by
363 * calling the subroutine BLACS_GRIDINFO.
364 *
365 * For large N, no extra workspace is needed, however the
366 * biggest boost in performance comes for small N, so it
367 * is wise to provide the extra workspace (typically less
368 * than a Megabyte per process).
369 *
370 * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
371 * enough space to compute all the eigenvectors
372 * orthogonally will cause serious degradation in
373 * performance. In the limit (i.e. CLUSTERSIZE = N-1)
374 * PSSTEIN will perform no better than SSTEIN on 1 processor.
375 * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
376 * all eigenvectors will increase the total execution time
377 * by a factor of 2 or more.
378 * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
379 * grow as the square of the cluster size, all other factors
380 * remaining equal and assuming enough workspace. Less
381 * workspace means less reorthogonalization but faster
382 * execution.
383 *
384 * If LWORK = -1, then LWORK is global input and a workspace
385 * query is assumed; the routine only calculates the size
386 * required for optimal performance on all work arrays.
387 * Each of these values is returned in the first entry of the
388 * corresponding work array, and no error message is issued by
389 * PXERBLA.
390 *
391 *
392 * IWORK (local workspace) INTEGER array
393 * On return, IWORK(1) contains the amount of integer workspace
394 * required.
395 *
396 * LIWORK (local input) INTEGER
397 * size of IWORK
398 * LIWORK >= 6 * NNP
399 * Where:
400 * NNP = MAX( N, NPROW*NPCOL + 1, 4 )
401 *
402 * If LIWORK = -1, then LIWORK is global input and a workspace
403 * query is assumed; the routine only calculates the minimum
404 * and optimal size for all work arrays. Each of these
405 * values is returned in the first entry of the corresponding
406 * work array, and no error message is issued by PXERBLA.
407 *
408 * IFAIL (output) INTEGER array, dimension (N)
409 * IFAIL provides additional information when INFO .NE. 0
410 * If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
411 * the smallest minor which is not positive definite.
412 * If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
413 * indices of the eigenvectors that failed to converge.
414 *
415 * If neither of the above error conditions hold and JOBZ = 'V',
416 * then the first M elements of IFAIL are set to zero.
417 *
418 * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
419 * This array contains indices of eigenvectors corresponding to
420 * a cluster of eigenvalues that could not be reorthogonalized
421 * due to insufficient workspace (see LWORK, ORFAC and INFO).
422 * Eigenvectors corresponding to clusters of eigenvalues indexed
423 * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
424 * reorthogonalized due to lack of workspace. Hence the
425 * eigenvectors corresponding to these clusters may not be
426 * orthogonal. ICLUSTR() is a zero terminated array.
427 * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
428 * K is the number of clusters
429 * ICLUSTR is not referenced if JOBZ = 'N'
430 *
431 * GAP (global output) REAL array,
432 * dimension (NPROW*NPCOL)
433 * This array contains the gap between eigenvalues whose
434 * eigenvectors could not be reorthogonalized. The output
435 * values in this array correspond to the clusters indicated
436 * by the array ICLUSTR. As a result, the dot product between
437 * eigenvectors correspoding to the I^th cluster may be as high
438 * as ( C * n ) / GAP(I) where C is a small constant.
439 *
440 * INFO (global output) INTEGER
441 * = 0: successful exit
442 * < 0: If the i-th argument is an array and the j-entry had
443 * an illegal value, then INFO = -(i*100+j), if the i-th
444 * argument is a scalar and had an illegal value, then
445 * INFO = -i.
446 * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
447 * failed to converge. Their indices are stored
448 * in IFAIL. Send e-mail to scalapack@cs.utk.edu
449 * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
450 * to one or more clusters of eigenvalues could not be
451 * reorthogonalized because of insufficient workspace.
452 * The indices of the clusters are stored in the array
453 * ICLUSTR.
454 * if (MOD(INFO/4,2).NE.0), then space limit prevented
455 * PSSYGVX from computing all of the eigenvectors
456 * between VL and VU. The number of eigenvectors
457 * computed is returned in NZ.
458 * if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to
459 * compute eigenvalues.
460 * Send e-mail to scalapack@cs.utk.edu
461 * if (MOD(INFO/16,2).NE.0), then B was not positive
462 * definite. IFAIL(1) indicates the order of
463 * the smallest minor which is not positive definite.
464 *
465 * Alignment requirements
466 * ======================
467 *
468 * The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
469 * and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
470 * namely the following expressions should be true:
471 *
472 * DESCA(MB_) = DESCA(NB_)
473 * IA = IB = IZ
474 * JA = IB = JZ
475 * DESCA(M_) = DESCB(M_) =DESCZ(M_)
476 * DESCA(N_) = DESCB(N_)= DESCZ(N_)
477 * DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
478 * DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
479 * DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
480 * DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
481 * MOD( IA-1, DESCA( MB_ ) ) = 0
482 * MOD( JA-1, DESCA( NB_ ) ) = 0
483 * MOD( IB-1, DESCB( MB_ ) ) = 0
484 * MOD( JB-1, DESCB( NB_ ) ) = 0
485 *
486 * =====================================================================
487 *
488 * .. Parameters ..
489  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
490  $ MB_, NB_, RSRC_, CSRC_, LLD_
491  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
492  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
493  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
494  REAL ONE
495  parameter( one = 1.0e+0 )
496  REAL FIVE, ZERO
497  PARAMETER ( FIVE = 5.0e+0, zero = 0.0e+0 )
498  INTEGER IERRNPD
499  parameter( ierrnpd = 16 )
500 * ..
501 * .. Local Scalars ..
502  LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
503  CHARACTER TRANS
504  INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
505  $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LWMIN,
506  $ lwopt, mq0, mycol, myrow, nb, neig, nn, np0,
507  $ npcol, nprow, nps, nq0, nsygst_lwopt,
508  $ nsytrd_lwopt, sqnpc
509  REAL EPS, SCALE
510 * ..
511 * .. Local Arrays ..
512  INTEGER IDUM1( 5 ), IDUM2( 5 )
513 * ..
514 * .. External Functions ..
515  LOGICAL LSAME
516  INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
517  REAL PSLAMCH
518  EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PSLAMCH
519 * ..
520 * .. External Subroutines ..
521  EXTERNAL blacs_gridinfo, chk1mat, pchk1mat, pchk2mat,
522  $ pspotrf, pssyevx, pssyngst, pstrmm, pstrsm,
523  $ pxerbla, sgebr2d, sgebs2d, sscal
524 * ..
525 * .. Intrinsic Functions ..
526  INTRINSIC abs, dble, ichar, int, max, min, mod, real,
527  $ sqrt
528 * ..
529 * .. Executable Statements ..
530 * This is just to keep ftnchek and toolpack/1 happy
531  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
532  $ rsrc_.LT.0 )RETURN
533 *
534 * Get grid parameters
535 *
536  ictxt = desca( ctxt_ )
537  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
538 *
539 * Test the input parameters
540 *
541  info = 0
542  IF( nprow.EQ.-1 ) THEN
543  info = -( 900+ctxt_ )
544  ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
545  info = -( 1300+ctxt_ )
546  ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
547  info = -( 2600+ctxt_ )
548  ELSE
549 *
550 * Get machine constants.
551 *
552  eps = pslamch( desca( ctxt_ ), 'Precision' )
553 *
554  wantz = lsame( jobz, 'V' )
555  upper = lsame( uplo, 'U' )
556  alleig = lsame( range, 'A' )
557  valeig = lsame( range, 'V' )
558  indeig = lsame( range, 'I' )
559  CALL chk1mat( n, 4, n, 4, ia, ja, desca, 9, info )
560  CALL chk1mat( n, 4, n, 4, ib, jb, descb, 13, info )
561  CALL chk1mat( n, 4, n, 4, iz, jz, descz, 26, info )
562  IF( info.EQ.0 ) THEN
563  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
564  work( 1 ) = abstol
565  IF( valeig ) THEN
566  work( 2 ) = vl
567  work( 3 ) = vu
568  ELSE
569  work( 2 ) = zero
570  work( 3 ) = zero
571  END IF
572  CALL sgebs2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3 )
573  ELSE
574  CALL sgebr2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3,
575  $ 0, 0 )
576  END IF
577  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
578  $ nprow )
579  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
580  $ nprow )
581  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
582  $ npcol )
583  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
584  $ npcol )
585  iroffa = mod( ia-1, desca( mb_ ) )
586  icoffa = mod( ja-1, desca( nb_ ) )
587  iroffb = mod( ib-1, descb( mb_ ) )
588  icoffb = mod( jb-1, descb( nb_ ) )
589 *
590 * Compute the total amount of space needed
591 *
592  lquery = .false.
593  IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
594  $ lquery = .true.
595 *
596  liwmin = 6*max( n, ( nprow*npcol )+1, 4 )
597 *
598  nb = desca( mb_ )
599  nn = max( n, nb, 2 )
600  np0 = numroc( nn, nb, 0, 0, nprow )
601 *
602  IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
603  $ THEN
604  lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
605  IF( wantz ) THEN
606  mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
607  lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
608  ELSE
609  lwopt = lwmin
610  END IF
611  neig = 0
612  ELSE
613  IF( alleig .OR. valeig ) THEN
614  neig = n
615  ELSE IF( indeig ) THEN
616  neig = iu - il + 1
617  END IF
618  mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
619  lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
620  $ iceil( neig, nprow*npcol )*nn
621  lwopt = lwmin
622 *
623  END IF
624 *
625 * Compute how much workspace is needed to use the
626 * new TRD and GST algorithms
627 *
628  anb = pjlaenv( ictxt, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
629  sqnpc = int( sqrt( dble( nprow*npcol ) ) )
630  nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
631  nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
632  nb = desca( mb_ )
633  np0 = numroc( n, nb, 0, 0, nprow )
634  nq0 = numroc( n, nb, 0, 0, npcol )
635  nsygst_lwopt = 2*np0*nb + nq0*nb + nb*nb
636  lwopt = max( lwopt, n+nsytrd_lwopt, nsygst_lwopt )
637 *
638 * Version 1.0 Limitations
639 *
640  IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
641  info = -1
642  ELSE IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
643  info = -2
644  ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
645  info = -3
646  ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
647  info = -4
648  ELSE IF( n.LT.0 ) THEN
649  info = -5
650  ELSE IF( iroffa.NE.0 ) THEN
651  info = -7
652  ELSE IF( icoffa.NE.0 ) THEN
653  info = -8
654  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
655  info = -( 900+nb_ )
656  ELSE IF( desca( m_ ).NE.descb( m_ ) ) THEN
657  info = -( 1300+m_ )
658  ELSE IF( desca( n_ ).NE.descb( n_ ) ) THEN
659  info = -( 1300+n_ )
660  ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
661  info = -( 1300+mb_ )
662  ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
663  info = -( 1300+nb_ )
664  ELSE IF( desca( rsrc_ ).NE.descb( rsrc_ ) ) THEN
665  info = -( 1300+rsrc_ )
666  ELSE IF( desca( csrc_ ).NE.descb( csrc_ ) ) THEN
667  info = -( 1300+csrc_ )
668  ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
669  info = -( 1300+ctxt_ )
670  ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
671  info = -( 2200+m_ )
672  ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
673  info = -( 2200+n_ )
674  ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
675  info = -( 2200+mb_ )
676  ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
677  info = -( 2200+nb_ )
678  ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
679  info = -( 2200+rsrc_ )
680  ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
681  info = -( 2200+csrc_ )
682  ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
683  info = -( 2200+ctxt_ )
684  ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
685  info = -11
686  ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
687  info = -12
688  ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
689  info = -15
690  ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
691  $ THEN
692  info = -16
693  ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
694  $ THEN
695  info = -17
696  ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
697  $ abs( vl ) ) ) THEN
698  info = -14
699  ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
700  $ abs( vu ) ) ) THEN
701  info = -15
702  ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
703  $ THEN
704  info = -18
705  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
706  info = -28
707  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
708  info = -30
709  END IF
710  END IF
711  idum1( 1 ) = ibtype
712  idum2( 1 ) = 1
713  IF( wantz ) THEN
714  idum1( 2 ) = ichar( 'V' )
715  ELSE
716  idum1( 2 ) = ichar( 'N' )
717  END IF
718  idum2( 2 ) = 2
719  IF( upper ) THEN
720  idum1( 3 ) = ichar( 'U' )
721  ELSE
722  idum1( 3 ) = ichar( 'L' )
723  END IF
724  idum2( 3 ) = 3
725  IF( alleig ) THEN
726  idum1( 4 ) = ichar( 'A' )
727  ELSE IF( indeig ) THEN
728  idum1( 4 ) = ichar( 'I' )
729  ELSE
730  idum1( 4 ) = ichar( 'V' )
731  END IF
732  idum2( 4 ) = 4
733  IF( lquery ) THEN
734  idum1( 5 ) = -1
735  ELSE
736  idum1( 5 ) = 1
737  END IF
738  idum2( 5 ) = 5
739  CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 9, n, 4, n, 4, ib,
740  $ jb, descb, 13, 5, idum1, idum2, info )
741  CALL pchk1mat( n, 4, n, 4, iz, jz, descz, 26, 0, idum1, idum2,
742  $ info )
743  END IF
744 *
745  iwork( 1 ) = liwmin
746  work( 1 ) = real( lwopt )
747 *
748  IF( info.NE.0 ) THEN
749  CALL pxerbla( ictxt, 'PSSYGVX ', -info )
750  RETURN
751  ELSE IF( lquery ) THEN
752  RETURN
753  END IF
754 *
755 * Form a Cholesky factorization of sub( B ).
756 *
757  CALL pspotrf( uplo, n, b, ib, jb, descb, info )
758  IF( info.NE.0 ) THEN
759  iwork( 1 ) = liwmin
760  work( 1 ) = real( lwopt )
761  ifail( 1 ) = info
762  info = ierrnpd
763  RETURN
764  END IF
765 *
766 * Transform problem to standard eigenvalue problem and solve.
767 *
768  CALL pssyngst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
769  $ descb, scale, work, lwork, info )
770  CALL pssyevx( jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il,
771  $ iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work,
772  $ lwork, iwork, liwork, ifail, iclustr, gap, info )
773 *
774  IF( wantz ) THEN
775 *
776 * Backtransform eigenvectors to the original problem.
777 *
778  neig = m
779  IF( ibtype.EQ.1 .OR. ibtype.EQ.2 ) THEN
780 *
781 * For sub( A )*x=(lambda)*sub( B )*x and
782 * sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
783 * x = inv(L)'*y or inv(U)*y
784 *
785  IF( upper ) THEN
786  trans = 'N'
787  ELSE
788  trans = 'T'
789  END IF
790 *
791  CALL pstrsm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
792  $ b, ib, jb, descb, z, iz, jz, descz )
793 *
794  ELSE IF( ibtype.EQ.3 ) THEN
795 *
796 * For sub( B )*sub( A )*x=(lambda)*x;
797 * backtransform eigenvectors: x = L*y or U'*y
798 *
799  IF( upper ) THEN
800  trans = 'T'
801  ELSE
802  trans = 'N'
803  END IF
804 *
805  CALL pstrmm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
806  $ b, ib, jb, descb, z, iz, jz, descz )
807  END IF
808  END IF
809 *
810  IF( scale.NE.one ) THEN
811  CALL sscal( n, scale, w, 1 )
812  END IF
813 *
814  iwork( 1 ) = liwmin
815  work( 1 ) = real( lwopt )
816  RETURN
817 *
818 * End of PSSYGVX
819 *
820  END
pssygvx
subroutine pssygvx(IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pssygvx.f:6
max
#define max(A, B)
Definition: pcgemr.c:180
pssyevx
subroutine pssyevx(JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pssyevx.f:5
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pspotrf
subroutine pspotrf(UPLO, N, A, IA, JA, DESCA, INFO)
Definition: pspotrf.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pssyngst
subroutine pssyngst(IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, SCALE, WORK, LWORK, INFO)
Definition: pssyngst.f:3
min
#define min(A, B)
Definition: pcgemr.c:181