ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcheevx.f
Go to the documentation of this file.
1  SUBROUTINE pcheevx( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL,
2  $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,
3  $ JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK,
4  $ LIWORK, IFAIL, ICLUSTR, GAP, INFO )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * May 25, 2001
10 *
11 * .. Scalar Arguments ..
12  CHARACTER JOBZ, RANGE, UPLO
13  INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LRWORK,
14  $ LWORK, M, N, NZ
15  REAL ABSTOL, ORFAC, VL, VU
16 * ..
17 * .. Array Arguments ..
18  INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19  $ IFAIL( * ), IWORK( * )
20  REAL GAP( * ), RWORK( * ), W( * )
21  COMPLEX A( * ), WORK( * ), Z( * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * PCHEEVX computes selected eigenvalues and, optionally, eigenvectors
28 * of a complex hermitian matrix A by calling the recommended sequence
29 * of ScaLAPACK routines. Eigenvalues/vectors can be selected by
30 * specifying a range of values or a range of indices for the desired
31 * eigenvalues.
32 *
33 * Notes
34 * =====
35 *
36 * Each global data object is described by an associated description
37 * vector. This vector stores the information required to establish
38 * the mapping between an object element and its corresponding process
39 * and memory location.
40 *
41 * Let A be a generic term for any 2D block cyclicly distributed array.
42 * Such a global array has an associated description vector DESCA.
43 * In the following comments, the character _ should be read as
44 * "of the global array".
45 *
46 * NOTATION STORED IN EXPLANATION
47 * --------------- -------------- --------------------------------------
48 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49 * DTYPE_A = 1.
50 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51 * the BLACS process grid A is distribu-
52 * ted over. The context itself is glo-
53 * bal, but the handle (the integer
54 * value) may vary.
55 * M_A (global) DESCA( M_ ) The number of rows in the global
56 * array A.
57 * N_A (global) DESCA( N_ ) The number of columns in the global
58 * array A.
59 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60 * the rows of the array.
61 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62 * the columns of the array.
63 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64 * row of the array A is distributed.
65 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66 * first column of the array A is
67 * distributed.
68 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69 * array. LLD_A >= MAX(1,LOCr(M_A)).
70 *
71 * Let K be the number of rows or columns of a distributed matrix,
72 * and assume that its process grid has dimension p x q.
73 * LOCr( K ) denotes the number of elements of K that a process
74 * would receive if K were distributed over the p processes of its
75 * process column.
76 * Similarly, LOCc( K ) denotes the number of elements of K that a
77 * process would receive if K were distributed over the q processes of
78 * its process row.
79 * The values of LOCr() and LOCc() may be determined via a call to the
80 * ScaLAPACK tool function, NUMROC:
81 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83 * An upper bound for these quantities may be computed by:
84 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86 *
87 * PCHEEVX assumes IEEE 754 standard compliant arithmetic. To port
88 * to a system which does not have IEEE 754 arithmetic, modify
89 * the appropriate SLmake.inc file to include the compiler switch
90 * -DNO_IEEE. This switch only affects the compilation of pslaiect.c.
91 *
92 * Arguments
93 * =========
94 *
95 * NP = the number of rows local to a given process.
96 * NQ = the number of columns local to a given process.
97 *
98 * JOBZ (global input) CHARACTER*1
99 * Specifies whether or not to compute the eigenvectors:
100 * = 'N': Compute eigenvalues only.
101 * = 'V': Compute eigenvalues and eigenvectors.
102 *
103 * RANGE (global input) CHARACTER*1
104 * = 'A': all eigenvalues will be found.
105 * = 'V': all eigenvalues in the interval [VL,VU] will be found.
106 * = 'I': the IL-th through IU-th eigenvalues will be found.
107 *
108 * UPLO (global input) CHARACTER*1
109 * Specifies whether the upper or lower triangular part of the
110 * Hermitian matrix A is stored:
111 * = 'U': Upper triangular
112 * = 'L': Lower triangular
113 *
114 * N (global input) INTEGER
115 * The number of rows and columns of the matrix A. N >= 0.
116 *
117 * A (local input/workspace) block cyclic COMPLEX array,
118 * global dimension (N, N),
119 * local dimension ( LLD_A, LOCc(JA+N-1) )
120 *
121 * On entry, the Hermitian matrix A. If UPLO = 'U', only the
122 * upper triangular part of A is used to define the elements of
123 * the Hermitian matrix. If UPLO = 'L', only the lower
124 * triangular part of A is used to define the elements of the
125 * Hermitian matrix.
126 *
127 * On exit, the lower triangle (if UPLO='L') or the upper
128 * triangle (if UPLO='U') of A, including the diagonal, is
129 * destroyed.
130 *
131 * IA (global input) INTEGER
132 * A's global row index, which points to the beginning of the
133 * submatrix which is to be operated on.
134 *
135 * JA (global input) INTEGER
136 * A's global column index, which points to the beginning of
137 * the submatrix which is to be operated on.
138 *
139 * DESCA (global and local input) INTEGER array of dimension DLEN_.
140 * The array descriptor for the distributed matrix A.
141 * If DESCA( CTXT_ ) is incorrect, PCHEEVX cannot guarantee
142 * correct error reporting.
143 *
144 * VL (global input) REAL
145 * If RANGE='V', the lower bound of the interval to be searched
146 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
147 *
148 * VU (global input) REAL
149 * If RANGE='V', the upper bound of the interval to be searched
150 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
151 *
152 * IL (global input) INTEGER
153 * If RANGE='I', the index (from smallest to largest) of the
154 * smallest eigenvalue to be returned. IL >= 1.
155 * Not referenced if RANGE = 'A' or 'V'.
156 *
157 * IU (global input) INTEGER
158 * If RANGE='I', the index (from smallest to largest) of the
159 * largest eigenvalue to be returned. min(IL,N) <= IU <= N.
160 * Not referenced if RANGE = 'A' or 'V'.
161 *
162 * ABSTOL (global input) REAL
163 * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
164 * the most orthogonal eigenvectors.
165 *
166 * The absolute error tolerance for the eigenvalues.
167 * An approximate eigenvalue is accepted as converged
168 * when it is determined to lie in an interval [a,b]
169 * of width less than or equal to
170 *
171 * ABSTOL + EPS * max( |a|,|b| ) ,
172 *
173 * where EPS is the machine precision. If ABSTOL is less than
174 * or equal to zero, then EPS*norm(T) will be used in its place,
175 * where norm(T) is the 1-norm of the tridiagonal matrix
176 * obtained by reducing A to tridiagonal form.
177 *
178 * Eigenvalues will be computed most accurately when ABSTOL is
179 * set to twice the underflow threshold 2*PSLAMCH('S') not zero.
180 * If this routine returns with ((MOD(INFO,2).NE.0) .OR.
181 * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
182 * eigenvectors did not converge, try setting ABSTOL to
183 * 2*PSLAMCH('S').
184 *
185 * See "Computing Small Singular Values of Bidiagonal Matrices
186 * with Guaranteed High Relative Accuracy," by Demmel and
187 * Kahan, LAPACK Working Note #3.
188 *
189 * See "On the correctness of Parallel Bisection in Floating
190 * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
191 *
192 * M (global output) INTEGER
193 * Total number of eigenvalues found. 0 <= M <= N.
194 *
195 * NZ (global output) INTEGER
196 * Total number of eigenvectors computed. 0 <= NZ <= M.
197 * The number of columns of Z that are filled.
198 * If JOBZ .NE. 'V', NZ is not referenced.
199 * If JOBZ .EQ. 'V', NZ = M unless the user supplies
200 * insufficient space and PCHEEVX is not able to detect this
201 * before beginning computation. To get all the eigenvectors
202 * requested, the user must supply both sufficient
203 * space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
204 * and sufficient workspace to compute them. (See LWORK below.)
205 * PCHEEVX is always able to detect insufficient space without
206 * computation unless RANGE .EQ. 'V'.
207 *
208 * W (global output) REAL array, dimension (N)
209 * On normal exit, the first M entries contain the selected
210 * eigenvalues in ascending order.
211 *
212 * ORFAC (global input) REAL
213 * Specifies which eigenvectors should be reorthogonalized.
214 * Eigenvectors that correspond to eigenvalues which are within
215 * tol=ORFAC*norm(A) of each other are to be reorthogonalized.
216 * However, if the workspace is insufficient (see LWORK),
217 * tol may be decreased until all eigenvectors to be
218 * reorthogonalized can be stored in one process.
219 * No reorthogonalization will be done if ORFAC equals zero.
220 * A default value of 10^-3 is used if ORFAC is negative.
221 * ORFAC should be identical on all processes.
222 *
223 * Z (local output) COMPLEX array,
224 * global dimension (N, N),
225 * local dimension ( LLD_Z, LOCc(JZ+N-1) )
226 * If JOBZ = 'V', then on normal exit the first M columns of Z
227 * contain the orthonormal eigenvectors of the matrix
228 * corresponding to the selected eigenvalues. If an eigenvector
229 * fails to converge, then that column of Z contains the latest
230 * approximation to the eigenvector, and the index of the
231 * eigenvector is returned in IFAIL.
232 * If JOBZ = 'N', then Z is not referenced.
233 *
234 * IZ (global input) INTEGER
235 * Z's global row index, which points to the beginning of the
236 * submatrix which is to be operated on.
237 *
238 * JZ (global input) INTEGER
239 * Z's global column index, which points to the beginning of
240 * the submatrix which is to be operated on.
241 *
242 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
243 * The array descriptor for the distributed matrix Z.
244 * DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
245 *
246 * WORK (local workspace/output) COMPLEX array,
247 * dimension (LWORK)
248 * WORK(1) returns workspace adequate workspace to allow
249 * optimal performance.
250 *
251 * LWORK (local input) INTEGER
252 * Size of WORK array. If only eigenvalues are requested:
253 * LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 )
254 * If eigenvectors are requested:
255 * LWORK >= N + ( NP0 + MQ0 + NB ) * NB
256 * with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ).
257 *
258 * For optimal performance, greater workspace is needed, i.e.
259 * LWORK >= MAX( LWORK, NHETRD_LWORK )
260 * Where LWORK is as defined above, and
261 * NHETRD_LWORK = N + 2*( ANB+1 )*( 4*NPS+2 ) +
262 * ( NPS + 1 ) * NPS
263 *
264 * ICTXT = DESCA( CTXT_ )
265 * ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
266 * SQNPC = SQRT( DBLE( NPROW * NPCOL ) )
267 * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
268 *
269 * NUMROC is a ScaLAPACK tool functions;
270 * PJLAENV is a ScaLAPACK envionmental inquiry function
271 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
272 * the subroutine BLACS_GRIDINFO.
273 *
274 * If LWORK = -1, then LWORK is global input and a workspace
275 * query is assumed; the routine only calculates the
276 * optimal size for all work arrays. Each of these
277 * values is returned in the first entry of the corresponding
278 * work array, and no error message is issued by PXERBLA.
279 *
280 * RWORK (local workspace/output) REAL array,
281 * dimension max(3,LRWORK)
282 * On return, WORK(1) contains the optimal amount of
283 * workspace required for efficient execution.
284 * if JOBZ='N' RWORK(1) = optimal amount of workspace
285 * required to compute eigenvalues efficiently
286 * if JOBZ='V' RWORK(1) = optimal amount of workspace
287 * required to compute eigenvalues and eigenvectors
288 * efficiently with no guarantee on orthogonality.
289 * If RANGE='V', it is assumed that all eigenvectors
290 * may be required.
291 *
292 * LRWORK (local input) INTEGER
293 * Size of RWORK
294 * See below for definitions of variables used to define LRWORK.
295 * If no eigenvectors are requested (JOBZ = 'N') then
296 * LRWORK >= 5 * NN + 4 * N
297 * If eigenvectors are requested (JOBZ = 'V' ) then
298 * the amount of workspace required to guarantee that all
299 * eigenvectors are computed is:
300 * LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) +
301 * ICEIL( NEIG, NPROW*NPCOL)*NN
302 *
303 * The computed eigenvectors may not be orthogonal if the
304 * minimal workspace is supplied and ORFAC is too small.
305 * If you want to guarantee orthogonality (at the cost
306 * of potentially poor performance) you should add
307 * the following to LRWORK:
308 * (CLUSTERSIZE-1)*N
309 * where CLUSTERSIZE is the number of eigenvalues in the
310 * largest cluster, where a cluster is defined as a set of
311 * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
312 * W(J+1) <= W(J) + ORFAC*2*norm(A) }
313 * Variable definitions:
314 * NEIG = number of eigenvectors requested
315 * NB = DESCA( MB_ ) = DESCA( NB_ ) =
316 * DESCZ( MB_ ) = DESCZ( NB_ )
317 * NN = MAX( N, NB, 2 )
318 * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
319 * DESCZ( CSRC_ ) = 0
320 * NP0 = NUMROC( NN, NB, 0, 0, NPROW )
321 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
322 * ICEIL( X, Y ) is a ScaLAPACK function returning
323 * ceiling(X/Y)
324 *
325 * When LRWORK is too small:
326 * If LRWORK is too small to guarantee orthogonality,
327 * PCHEEVX attempts to maintain orthogonality in
328 * the clusters with the smallest
329 * spacing between the eigenvalues.
330 * If LRWORK is too small to compute all the eigenvectors
331 * requested, no computation is performed and INFO=-25
332 * is returned. Note that when RANGE='V', PCHEEVX does
333 * not know how many eigenvectors are requested until
334 * the eigenvalues are computed. Therefore, when RANGE='V'
335 * and as long as LRWORK is large enough to allow PCHEEVX to
336 * compute the eigenvalues, PCHEEVX will compute the
337 * eigenvalues and as many eigenvectors as it can.
338 *
339 * Relationship between workspace, orthogonality & performance:
340 * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
341 * enough space to compute all the eigenvectors
342 * orthogonally will cause serious degradation in
343 * performance. In the limit (i.e. CLUSTERSIZE = N-1)
344 * PCSTEIN will perform no better than CSTEIN on 1
345 * processor.
346 * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
347 * all eigenvectors will increase the total execution time
348 * by a factor of 2 or more.
349 * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
350 * grow as the square of the cluster size, all other factors
351 * remaining equal and assuming enough workspace. Less
352 * workspace means less reorthogonalization but faster
353 * execution.
354 *
355 * If LRWORK = -1, then LRWORK is global input and a workspace
356 * query is assumed; the routine only calculates the size
357 * required for optimal performance for all work arrays. Each of
358 * these values is returned in the first entry of the
359 * corresponding work arrays, and no error message is issued by
360 * PXERBLA.
361 *
362 * IWORK (local workspace) INTEGER array
363 * On return, IWORK(1) contains the amount of integer workspace
364 * required.
365 *
366 * LIWORK (local input) INTEGER
367 * size of IWORK
368 * LIWORK >= 6 * NNP
369 * Where:
370 * NNP = MAX( N, NPROW*NPCOL + 1, 4 )
371 * If LIWORK = -1, then LIWORK is global input and a workspace
372 * query is assumed; the routine only calculates the minimum
373 * and optimal size for all work arrays. Each of these
374 * values is returned in the first entry of the corresponding
375 * work array, and no error message is issued by PXERBLA.
376 *
377 * IFAIL (global output) INTEGER array, dimension (N)
378 * If JOBZ = 'V', then on normal exit, the first M elements of
379 * IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
380 * IFAIL contains the
381 * indices of the eigenvectors that failed to converge.
382 * If JOBZ = 'N', then IFAIL is not referenced.
383 *
384 * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
385 * This array contains indices of eigenvectors corresponding to
386 * a cluster of eigenvalues that could not be reorthogonalized
387 * due to insufficient workspace (see LWORK, ORFAC and INFO).
388 * Eigenvectors corresponding to clusters of eigenvalues indexed
389 * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
390 * reorthogonalized due to lack of workspace. Hence the
391 * eigenvectors corresponding to these clusters may not be
392 * orthogonal. ICLUSTR() is a zero terminated array.
393 * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
394 * K is the number of clusters
395 * ICLUSTR is not referenced if JOBZ = 'N'
396 *
397 * GAP (global output) REAL array,
398 * dimension (NPROW*NPCOL)
399 * This array contains the gap between eigenvalues whose
400 * eigenvectors could not be reorthogonalized. The output
401 * values in this array correspond to the clusters indicated
402 * by the array ICLUSTR. As a result, the dot product between
403 * eigenvectors correspoding to the I^th cluster may be as high
404 * as ( C * n ) / GAP(I) where C is a small constant.
405 *
406 * INFO (global output) INTEGER
407 * = 0: successful exit
408 * < 0: If the i-th argument is an array and the j-entry had
409 * an illegal value, then INFO = -(i*100+j), if the i-th
410 * argument is a scalar and had an illegal value, then
411 * INFO = -i.
412 * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
413 * failed to converge. Their indices are stored
414 * in IFAIL. Ensure ABSTOL=2.0*PSLAMCH( 'U' )
415 * Send e-mail to scalapack@cs.utk.edu
416 * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
417 * to one or more clusters of eigenvalues could not be
418 * reorthogonalized because of insufficient workspace.
419 * The indices of the clusters are stored in the array
420 * ICLUSTR.
421 * if (MOD(INFO/4,2).NE.0), then space limit prevented
422 * PCHEEVX from computing all of the eigenvectors
423 * between VL and VU. The number of eigenvectors
424 * computed is returned in NZ.
425 * if (MOD(INFO/8,2).NE.0), then PCSTEBZ failed to compute
426 * eigenvalues. Ensure ABSTOL=2.0*PSLAMCH( 'U' )
427 * Send e-mail to scalapack@cs.utk.edu
428 *
429 * Alignment requirements
430 * ======================
431 *
432 * The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
433 * must verify some alignment properties, namely the following
434 * expressions should be true:
435 *
436 * ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
437 * IAROW.EQ.IZROW )
438 * where
439 * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
440 *
441 * =====================================================================
442 *
443 * Differences between PCHEEVX and CHEEVX
444 * ======================================
445 *
446 * A, LDA -> A, IA, JA, DESCA
447 * Z, LDZ -> Z, IZ, JZ, DESCZ
448 * WORKSPACE needs are larger for PCHEEVX.
449 * LIWORK parameter added
450 *
451 * ORFAC, ICLUSTER() and GAP() parameters added
452 * meaning of INFO is changed
453 *
454 * Functional differences:
455 * PCHEEVX does not promise orthogonality for eigenvectors associated
456 * with tighly clustered eigenvalues.
457 * PCHEEVX does not reorthogonalize eigenvectors
458 * that are on different processes. The extent of reorthogonalization
459 * is controlled by the input parameter LWORK.
460 *
461 * Version 1.4 limitations:
462 * DESCA(MB_) = DESCA(NB_)
463 * DESCA(M_) = DESCZ(M_)
464 * DESCA(N_) = DESCZ(N_)
465 * DESCA(MB_) = DESCZ(MB_)
466 * DESCA(NB_) = DESCZ(NB_)
467 * DESCA(RSRC_) = DESCZ(RSRC_)
468 *
469 * .. Parameters ..
470  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
471  $ MB_, NB_, RSRC_, CSRC_, LLD_
472  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
473  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
474  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
475  REAL ZERO, ONE, TEN, FIVE
476  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0, ten = 10.0e+0,
477  $ five = 5.0e+0 )
478  INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
479  parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
480  $ ierrebz = 8 )
481 * ..
482 * .. Local Scalars ..
483  LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
484  $ VALEIG, WANTZ
485  CHARACTER ORDER
486  INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
487  $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
488  $ indrwork, indtau, indwork, iroffa, iroffz,
489  $ iscale, isizestebz, isizestein, izrow,
490  $ lallwork, liwmin, llrwork, llwork, lrwmin,
491  $ lrwopt, lwmin, lwopt, maxeigs, mb_a, mq0,
492  $ mycol, myrow, nb, nb_a, neig, nhetrd_lwopt, nn,
493  $ nnp, np0, npcol, nprocs, nprow, nps, nq0,
494  $ nsplit, nzz, offset, rsrc_a, rsrc_z, sizeheevx,
495  $ sizestein, sqnpc
496  REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
497  $ SIGMA, SMLNUM, VLL, VUU
498 * ..
499 * .. Local Arrays ..
500  INTEGER IDUM1( 4 ), IDUM2( 4 )
501 * ..
502 * .. External Functions ..
503  LOGICAL LSAME
504  INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
505  REAL PCLANHE, PSLAMCH
506  EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
507  $ pclanhe, pslamch
508 * ..
509 * .. External Subroutines ..
510  EXTERNAL blacs_gridinfo, chk1mat, igamn2d, pcelget,
512  $ pcunmtr, pslared1d, psstebz, pxerbla, sgebr2d,
513  $ sgebs2d, slasrt, sscal
514 * ..
515 * .. Intrinsic Functions ..
516  INTRINSIC abs, cmplx, dble, ichar, int, max, min, mod,
517  $ real, sqrt
518 * ..
519 * .. Executable Statements ..
520 * This is just to keep ftnchek and toolpack/1 happy
521  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
522  $ rsrc_.LT.0 )RETURN
523 *
524  quickreturn = ( n.EQ.0 )
525 *
526 * Test the input arguments.
527 *
528  ictxt = desca( ctxt_ )
529  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
530  info = 0
531 *
532  wantz = lsame( jobz, 'V' )
533  IF( nprow.EQ.-1 ) THEN
534  info = -( 800+ctxt_ )
535  ELSE IF( wantz ) THEN
536  IF( ictxt.NE.descz( ctxt_ ) ) THEN
537  info = -( 2100+ctxt_ )
538  END IF
539  END IF
540  IF( info.EQ.0 ) THEN
541  CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
542  IF( wantz )
543  $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
544 *
545  IF( info.EQ.0 ) THEN
546 *
547 * Get machine constants.
548 *
549  safmin = pslamch( ictxt, 'Safe minimum' )
550  eps = pslamch( ictxt, 'Precision' )
551  smlnum = safmin / eps
552  bignum = one / smlnum
553  rmin = sqrt( smlnum )
554  rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
555 *
556  nprocs = nprow*npcol
557  lower = lsame( uplo, 'L' )
558  alleig = lsame( range, 'A' )
559  valeig = lsame( range, 'V' )
560  indeig = lsame( range, 'I' )
561 *
562 * Set up pointers into the WORK array
563 *
564  indtau = 1
565  indwork = indtau + n
566  llwork = lwork - indwork + 1
567 *
568 * Set up pointers into the RWORK array
569 *
570  inde = 1
571  indd = inde + n
572  indd2 = indd + n
573  inde2 = indd2 + n
574  indrwork = inde2 + n
575  llrwork = lrwork - indrwork + 1
576 *
577 * Set up pointers into the IWORK array
578 *
579  isizestein = 3*n + nprocs + 1
580  isizestebz = max( 4*n, 14, nprocs )
581  indibl = ( max( isizestein, isizestebz ) ) + 1
582  indisp = indibl + n
583 *
584 * Compute the total amount of space needed
585 *
586  lquery = .false.
587  IF( lwork.EQ.-1 .OR. liwork.EQ.-1 .OR. lrwork.EQ.-1 )
588  $ lquery = .true.
589 *
590  nnp = max( n, nprocs+1, 4 )
591  liwmin = 6*nnp
592 *
593  nprocs = nprow*npcol
594  nb_a = desca( nb_ )
595  mb_a = desca( mb_ )
596  nb = nb_a
597  nn = max( n, nb, 2 )
598 *
599  rsrc_a = desca( rsrc_ )
600  csrc_a = desca( csrc_ )
601  iroffa = mod( ia-1, mb_a )
602  icoffa = mod( ja-1, nb_a )
603  iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
604  np0 = numroc( n+iroffa, nb, 0, 0, nprow )
605  mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
606  IF( wantz ) THEN
607  rsrc_z = descz( rsrc_ )
608  iroffz = mod( iz-1, mb_a )
609  izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
610  ELSE
611  iroffz = 0
612  izrow = 0
613  END IF
614 *
615  IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
616  $ THEN
617  lwmin = n + max( nb*( np0+1 ), 3 )
618  lwopt = lwmin
619  lrwmin = 5*nn + 4*n
620  IF( wantz ) THEN
621  mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
622  lrwopt = 4*n + max( 5*nn, np0*mq0 ) +
623  $ iceil( n, nprow*npcol )*nn
624  ELSE
625  lrwopt = lrwmin
626  END IF
627  neig = 0
628  ELSE
629  IF( alleig .OR. valeig ) THEN
630  neig = n
631  ELSE IF( indeig ) THEN
632  neig = iu - il + 1
633  END IF
634  mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
635  nq0 = numroc( nn, nb, 0, 0, npcol )
636  lwmin = n + ( np0+nq0+nb )*nb
637  lrwmin = 4*n + max( 5*nn, np0*mq0 ) +
638  $ iceil( neig, nprow*npcol )*nn
639  lrwopt = lrwmin
640  lwopt = lwmin
641 *
642  END IF
643 *
644 * Compute how much workspace is needed to use the
645 * new TRD code
646 *
647  anb = pjlaenv( ictxt, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
648  sqnpc = int( sqrt( dble( nprow*npcol ) ) )
649  nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
650  nhetrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+2 )*nps
651  lwopt = max( lwopt, n+nhetrd_lwopt )
652 *
653  END IF
654  IF( info.EQ.0 ) THEN
655  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
656  rwork( 1 ) = abstol
657  IF( valeig ) THEN
658  rwork( 2 ) = vl
659  rwork( 3 ) = vu
660  ELSE
661  rwork( 2 ) = zero
662  rwork( 3 ) = zero
663  END IF
664  CALL sgebs2d( ictxt, 'ALL', ' ', 3, 1, rwork, 3 )
665  ELSE
666  CALL sgebr2d( ictxt, 'ALL', ' ', 3, 1, rwork, 3, 0, 0 )
667  END IF
668  IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
669  info = -1
670  ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
671  info = -2
672  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
673  info = -3
674  ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
675  info = -10
676  ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
677  $ THEN
678  info = -11
679  ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
680  $ THEN
681  info = -12
682  ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
683  info = -23
684  ELSE IF( lrwork.LT.lrwmin .AND. lrwork.NE.-1 ) THEN
685  info = -25
686  ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
687  info = -27
688  ELSE IF( valeig .AND. ( abs( rwork( 2 )-vl ).GT.five*eps*
689  $ abs( vl ) ) ) THEN
690  info = -9
691  ELSE IF( valeig .AND. ( abs( rwork( 3 )-vu ).GT.five*eps*
692  $ abs( vu ) ) ) THEN
693  info = -10
694  ELSE IF( abs( rwork( 1 )-abstol ).GT.five*eps*
695  $ abs( abstol ) ) THEN
696  info = -13
697  ELSE IF( iroffa.NE.0 ) THEN
698  info = -6
699  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
700  info = -( 800+nb_ )
701  END IF
702  IF( wantz ) THEN
703  IF( iroffa.NE.iroffz ) THEN
704  info = -19
705  ELSE IF( iarow.NE.izrow ) THEN
706  info = -19
707  ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
708  info = -( 2100+m_ )
709  ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
710  info = -( 2100+n_ )
711  ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
712  info = -( 2100+mb_ )
713  ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
714  info = -( 2100+nb_ )
715  ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
716  info = -( 2100+rsrc_ )
717  ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
718  info = -( 2100+csrc_ )
719  ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
720  info = -( 2100+ctxt_ )
721  END IF
722  END IF
723  END IF
724  IF( wantz ) THEN
725  idum1( 1 ) = ichar( 'V' )
726  ELSE
727  idum1( 1 ) = ichar( 'N' )
728  END IF
729  idum2( 1 ) = 1
730  IF( lower ) THEN
731  idum1( 2 ) = ichar( 'L' )
732  ELSE
733  idum1( 2 ) = ichar( 'U' )
734  END IF
735  idum2( 2 ) = 2
736  IF( alleig ) THEN
737  idum1( 3 ) = ichar( 'A' )
738  ELSE IF( indeig ) THEN
739  idum1( 3 ) = ichar( 'I' )
740  ELSE
741  idum1( 3 ) = ichar( 'V' )
742  END IF
743  idum2( 3 ) = 3
744  IF( lquery ) THEN
745  idum1( 4 ) = -1
746  ELSE
747  idum1( 4 ) = 1
748  END IF
749  idum2( 4 ) = 4
750  IF( wantz ) THEN
751  CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
752  $ jz, descz, 21, 4, idum1, idum2, info )
753  ELSE
754  CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
755  $ idum2, info )
756  END IF
757  work( 1 ) = cmplx( lwopt )
758  rwork( 1 ) = real( lrwopt )
759  iwork( 1 ) = liwmin
760  END IF
761 *
762  IF( info.NE.0 ) THEN
763  CALL pxerbla( ictxt, 'PCHEEVX', -info )
764  RETURN
765  ELSE IF( lquery ) THEN
766  RETURN
767  END IF
768 *
769 * Quick return if possible
770 *
771  IF( quickreturn ) THEN
772  IF( wantz ) THEN
773  nz = 0
774  iclustr( 1 ) = 0
775  END IF
776  m = 0
777  work( 1 ) = cmplx( lwopt )
778  rwork( 1 ) = real( lrwmin )
779  iwork( 1 ) = liwmin
780  RETURN
781  END IF
782 *
783 * Scale matrix to allowable range, if necessary.
784 *
785  abstll = abstol
786  iscale = 0
787  IF( valeig ) THEN
788  vll = vl
789  vuu = vu
790  ELSE
791  vll = zero
792  vuu = zero
793  END IF
794 *
795  anrm = pclanhe( 'M', uplo, n, a, ia, ja, desca,
796  $ rwork( indrwork ) )
797 *
798  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
799  iscale = 1
800  sigma = rmin / anrm
801  anrm = anrm*sigma
802  ELSE IF( anrm.GT.rmax ) THEN
803  iscale = 1
804  sigma = rmax / anrm
805  anrm = anrm*sigma
806  END IF
807 *
808  IF( iscale.EQ.1 ) THEN
809  CALL pclascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
810  IF( abstol.GT.0 )
811  $ abstll = abstol*sigma
812  IF( valeig ) THEN
813  vll = vl*sigma
814  vuu = vu*sigma
815  IF( vuu.EQ.vll ) THEN
816  vuu = vuu + 2*max( abs( vuu )*eps, safmin )
817  END IF
818  END IF
819  END IF
820 *
821 * Call PCHENTRD to reduce Hermitian matrix to tridiagonal form.
822 *
823  lallwork = llrwork
824 *
825  CALL pchentrd( uplo, n, a, ia, ja, desca, rwork( indd ),
826  $ rwork( inde ), work( indtau ), work( indwork ),
827  $ llwork, rwork( indrwork ), llrwork, iinfo )
828 *
829 *
830 * Copy the values of D, E to all processes
831 *
832 * Here PxLARED1D is used to redistribute the tridiagonal matrix.
833 * PxLARED1D, however, doesn't yet work with arbritary matrix
834 * distributions so we have PxELGET as a backup.
835 *
836  offset = 0
837  IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
838  $ THEN
839  CALL pslared1d( n, ia, ja, desca, rwork( indd ),
840  $ rwork( indd2 ), rwork( indrwork ), llrwork )
841 *
842  CALL pslared1d( n, ia, ja, desca, rwork( inde ),
843  $ rwork( inde2 ), rwork( indrwork ), llrwork )
844  IF( .NOT.lower )
845  $ offset = 1
846  ELSE
847  DO 10 i = 1, n
848  CALL pcelget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
849  $ i+ja-1, desca )
850  rwork( indd2+i-1 ) = real( work( indd2+i-1 ) )
851  10 CONTINUE
852  IF( lsame( uplo, 'U' ) ) THEN
853  DO 20 i = 1, n - 1
854  CALL pcelget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
855  $ i+ja, desca )
856  rwork( inde2+i-1 ) = real( work( inde2+i-1 ) )
857  20 CONTINUE
858  ELSE
859  DO 30 i = 1, n - 1
860  CALL pcelget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
861  $ i+ja-1, desca )
862  rwork( inde2+i-1 ) = real( work( inde2+i-1 ) )
863  30 CONTINUE
864  END IF
865  END IF
866 *
867 * Call PSSTEBZ and, if eigenvectors are desired, PCSTEIN.
868 *
869  IF( wantz ) THEN
870  order = 'B'
871  ELSE
872  order = 'E'
873  END IF
874 *
875  CALL psstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
876  $ rwork( indd2 ), rwork( inde2+offset ), m, nsplit, w,
877  $ iwork( indibl ), iwork( indisp ), rwork( indrwork ),
878  $ llrwork, iwork( 1 ), isizestebz, iinfo )
879 *
880 *
881 * IF PSSTEBZ fails, the error propogates to INFO, but
882 * we do not propogate the eigenvalue(s) which failed because:
883 * 1) This should never happen if the user specifies
884 * ABSTOL = 2 * PSLAMCH( 'U' )
885 * 2) PSSTEIN will confirm/deny whether the eigenvalues are
886 * close enough.
887 *
888  IF( iinfo.NE.0 ) THEN
889  info = info + ierrebz
890  DO 40 i = 1, m
891  iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
892  40 CONTINUE
893  END IF
894  IF( wantz ) THEN
895 *
896  IF( valeig ) THEN
897 *
898 * Compute the maximum number of eigenvalues that we can
899 * compute in the
900 * workspace that we have, and that we can store in Z.
901 *
902 * Loop through the possibilities looking for the largest
903 * NZ that we can feed to PCSTEIN and PCUNMTR
904 *
905 * Since all processes must end up with the same value
906 * of NZ, we first compute the minimum of LALLWORK
907 *
908  CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
909  $ -1, -1 )
910 *
911  maxeigs = descz( n_ )
912 *
913  DO 50 nz = min( maxeigs, m ), 0, -1
914  mq0 = numroc( nz, nb, 0, 0, npcol )
915  sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
916  sizeheevx = sizestein
917  IF( sizeheevx.LE.lallwork )
918  $ GO TO 60
919  50 CONTINUE
920  60 CONTINUE
921  ELSE
922  nz = m
923  END IF
924  nz = max( nz, 0 )
925  IF( nz.NE.m ) THEN
926  info = info + ierrspc
927 *
928  DO 70 i = 1, m
929  ifail( i ) = 0
930  70 CONTINUE
931 *
932 * The following code handles a rare special case
933 * - NZ .NE. M means that we don't have enough room to store
934 * all the vectors.
935 * - NSPLIT .GT. 1 means that the matrix split
936 * In this case, we cannot simply take the first NZ eigenvalues
937 * because PSSTEBZ sorts the eigenvalues by block when
938 * a split occurs. So, we have to make another call to
939 * PSSTEBZ with a new upper limit - VUU.
940 *
941  IF( nsplit.GT.1 ) THEN
942  CALL slasrt( 'I', m, w, iinfo )
943  nzz = 0
944  IF( nz.GT.0 ) THEN
945 *
946  vuu = w( nz ) - ten*( eps*anrm+safmin )
947  IF( vll.GE.vuu ) THEN
948  nzz = 0
949  ELSE
950  CALL psstebz( ictxt, range, order, n, vll, vuu, il,
951  $ iu, abstll, rwork( indd2 ),
952  $ rwork( inde2+offset ), nzz, nsplit,
953  $ w, iwork( indibl ), iwork( indisp ),
954  $ rwork( indrwork ), llrwork,
955  $ iwork( 1 ), isizestebz, iinfo )
956  END IF
957 *
958  IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
959  IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
960  info = info + ierrebz
961  END IF
962  END IF
963  END IF
964  nz = min( nz, nzz )
965 *
966  END IF
967  END IF
968  CALL pcstein( n, rwork( indd2 ), rwork( inde2+offset ), nz, w,
969  $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
970  $ jz, descz, rwork( indrwork ), lallwork,
971  $ iwork( 1 ), isizestein, ifail, iclustr, gap,
972  $ iinfo )
973 *
974  IF( iinfo.GE.nz+1 )
975  $ info = info + ierrcls
976  IF( mod( iinfo, nz+1 ).NE.0 )
977  $ info = info + ierrein
978 *
979 * Z = Q * Z
980 *
981 *
982  IF( nz.GT.0 ) THEN
983  CALL pcunmtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
984  $ work( indtau ), z, iz, jz, descz,
985  $ work( indwork ), llwork, iinfo )
986  END IF
987 *
988  END IF
989 *
990 * If matrix was scaled, then rescale eigenvalues appropriately.
991 *
992  IF( iscale.EQ.1 ) THEN
993  CALL sscal( m, one / sigma, w, 1 )
994  END IF
995 *
996  work( 1 ) = cmplx( lwopt )
997  rwork( 1 ) = real( lrwopt )
998  iwork( 1 ) = liwmin
999 *
1000  RETURN
1001 *
1002 * End of PCHEEVX
1003 *
1004  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
pcelget
subroutine pcelget(SCOPE, TOP, ALPHA, A, IA, JA, DESCA)
Definition: pcelget.f:2
psstebz
subroutine psstebz(ICTXT, RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, M, NSPLIT, W, IBLOCK, ISPLIT, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: psstebz.f:4
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pslared1d
subroutine pslared1d(N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK)
Definition: pslared1d.f:2
pchentrd
subroutine pchentrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pchentrd.f:3
pcunmtr
subroutine pcunmtr(SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: pcunmtr.f:3
pcheevx
subroutine pcheevx(JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pcheevx.f:5
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pcstein
subroutine pcstein(N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pcstein.f:4
pclascl
subroutine pclascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pclascl.f:3
min
#define min(A, B)
Definition: pcgemr.c:181