LAPACK  3.8.0
LAPACK: Linear Algebra PACKage
cheevd_2stage.f
Go to the documentation of this file.
1 *> \brief <b> CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b>
2 *
3 * @generated from zheevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:14 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download CHEEVD_2STAGE + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheevd_2stage.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheevd_2stage.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheevd_2stage.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24 * RWORK, LRWORK, IWORK, LIWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER JOBZ, UPLO
30 * INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
31 * ..
32 * .. Array Arguments ..
33 * INTEGER IWORK( * )
34 * REAL RWORK( * ), W( * )
35 * COMPLEX A( LDA, * ), WORK( * )
36 * ..
37 *
38 *
39 *> \par Purpose:
40 * =============
41 *>
42 *> \verbatim
43 *>
44 *> CHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
45 *> complex Hermitian matrix A using the 2stage technique for
46 *> the reduction to tridiagonal. If eigenvectors are desired, it uses a
47 *> divide and conquer algorithm.
48 *>
49 *> The divide and conquer algorithm makes very mild assumptions about
50 *> floating point arithmetic. It will work on machines with a guard
51 *> digit in add/subtract, or on those binary machines without guard
52 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
53 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
54 *> without guard digits, but we know of none.
55 *> \endverbatim
56 *
57 * Arguments:
58 * ==========
59 *
60 *> \param[in] JOBZ
61 *> \verbatim
62 *> JOBZ is CHARACTER*1
63 *> = 'N': Compute eigenvalues only;
64 *> = 'V': Compute eigenvalues and eigenvectors.
65 *> Not available in this release.
66 *> \endverbatim
67 *>
68 *> \param[in] UPLO
69 *> \verbatim
70 *> UPLO is CHARACTER*1
71 *> = 'U': Upper triangle of A is stored;
72 *> = 'L': Lower triangle of A is stored.
73 *> \endverbatim
74 *>
75 *> \param[in] N
76 *> \verbatim
77 *> N is INTEGER
78 *> The order of the matrix A. N >= 0.
79 *> \endverbatim
80 *>
81 *> \param[in,out] A
82 *> \verbatim
83 *> A is COMPLEX array, dimension (LDA, N)
84 *> On entry, the Hermitian matrix A. If UPLO = 'U', the
85 *> leading N-by-N upper triangular part of A contains the
86 *> upper triangular part of the matrix A. If UPLO = 'L',
87 *> the leading N-by-N lower triangular part of A contains
88 *> the lower triangular part of the matrix A.
89 *> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
90 *> orthonormal eigenvectors of the matrix A.
91 *> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
92 *> or the upper triangle (if UPLO='U') of A, including the
93 *> diagonal, is destroyed.
94 *> \endverbatim
95 *>
96 *> \param[in] LDA
97 *> \verbatim
98 *> LDA is INTEGER
99 *> The leading dimension of the array A. LDA >= max(1,N).
100 *> \endverbatim
101 *>
102 *> \param[out] W
103 *> \verbatim
104 *> W is REAL array, dimension (N)
105 *> If INFO = 0, the eigenvalues in ascending order.
106 *> \endverbatim
107 *>
108 *> \param[out] WORK
109 *> \verbatim
110 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
111 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
112 *> \endverbatim
113 *>
114 *> \param[in] LWORK
115 *> \verbatim
116 *> LWORK is INTEGER
117 *> The dimension of the array WORK.
118 *> If N <= 1, LWORK must be at least 1.
119 *> If JOBZ = 'N' and N > 1, LWORK must be queried.
120 *> LWORK = MAX(1, dimension) where
121 *> dimension = max(stage1,stage2) + (KD+1)*N + N+1
122 *> = N*KD + N*max(KD+1,FACTOPTNB)
123 *> + max(2*KD*KD, KD*NTHREADS)
124 *> + (KD+1)*N + N+1
125 *> where KD is the blocking size of the reduction,
126 *> FACTOPTNB is the blocking used by the QR or LQ
127 *> algorithm, usually FACTOPTNB=128 is a good choice
128 *> NTHREADS is the number of threads used when
129 *> openMP compilation is enabled, otherwise =1.
130 *> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2
131 *>
132 *> If LWORK = -1, then a workspace query is assumed; the routine
133 *> only calculates the optimal sizes of the WORK, RWORK and
134 *> IWORK arrays, returns these values as the first entries of
135 *> the WORK, RWORK and IWORK arrays, and no error message
136 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
137 *> \endverbatim
138 *>
139 *> \param[out] RWORK
140 *> \verbatim
141 *> RWORK is REAL array,
142 *> dimension (LRWORK)
143 *> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
144 *> \endverbatim
145 *>
146 *> \param[in] LRWORK
147 *> \verbatim
148 *> LRWORK is INTEGER
149 *> The dimension of the array RWORK.
150 *> If N <= 1, LRWORK must be at least 1.
151 *> If JOBZ = 'N' and N > 1, LRWORK must be at least N.
152 *> If JOBZ = 'V' and N > 1, LRWORK must be at least
153 *> 1 + 5*N + 2*N**2.
154 *>
155 *> If LRWORK = -1, then a workspace query is assumed; the
156 *> routine only calculates the optimal sizes of the WORK, RWORK
157 *> and IWORK arrays, returns these values as the first entries
158 *> of the WORK, RWORK and IWORK arrays, and no error message
159 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
160 *> \endverbatim
161 *>
162 *> \param[out] IWORK
163 *> \verbatim
164 *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
165 *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
166 *> \endverbatim
167 *>
168 *> \param[in] LIWORK
169 *> \verbatim
170 *> LIWORK is INTEGER
171 *> The dimension of the array IWORK.
172 *> If N <= 1, LIWORK must be at least 1.
173 *> If JOBZ = 'N' and N > 1, LIWORK must be at least 1.
174 *> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
175 *>
176 *> If LIWORK = -1, then a workspace query is assumed; the
177 *> routine only calculates the optimal sizes of the WORK, RWORK
178 *> and IWORK arrays, returns these values as the first entries
179 *> of the WORK, RWORK and IWORK arrays, and no error message
180 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
181 *> \endverbatim
182 *>
183 *> \param[out] INFO
184 *> \verbatim
185 *> INFO is INTEGER
186 *> = 0: successful exit
187 *> < 0: if INFO = -i, the i-th argument had an illegal value
188 *> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
189 *> to converge; i off-diagonal elements of an intermediate
190 *> tridiagonal form did not converge to zero;
191 *> if INFO = i and JOBZ = 'V', then the algorithm failed
192 *> to compute an eigenvalue while working on the submatrix
193 *> lying in rows and columns INFO/(N+1) through
194 *> mod(INFO,N+1).
195 *> \endverbatim
196 *
197 * Authors:
198 * ========
199 *
200 *> \author Univ. of Tennessee
201 *> \author Univ. of California Berkeley
202 *> \author Univ. of Colorado Denver
203 *> \author NAG Ltd.
204 *
205 *> \date November 2017
206 *
207 *> \ingroup complexHEeigen
208 *
209 *> \par Further Details:
210 * =====================
211 *>
212 *> Modified description of INFO. Sven, 16 Feb 05.
213 *
214 *> \par Contributors:
215 * ==================
216 *>
217 *> Jeff Rutter, Computer Science Division, University of California
218 *> at Berkeley, USA
219 *>
220 *> \par Further Details:
221 * =====================
222 *>
223 *> \verbatim
224 *>
225 *> All details about the 2stage techniques are available in:
226 *>
227 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
228 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
229 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
230 *> of 2011 International Conference for High Performance Computing,
231 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
232 *> Article 8 , 11 pages.
233 *> http://doi.acm.org/10.1145/2063384.2063394
234 *>
235 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
236 *> An improved parallel singular value algorithm and its implementation
237 *> for multicore hardware, In Proceedings of 2013 International Conference
238 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
239 *> Denver, Colorado, USA, 2013.
240 *> Article 90, 12 pages.
241 *> http://doi.acm.org/10.1145/2503210.2503292
242 *>
243 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
244 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
245 *> calculations based on fine-grained memory aware tasks.
246 *> International Journal of High Performance Computing Applications.
247 *> Volume 28 Issue 2, Pages 196-209, May 2014.
248 *> http://hpc.sagepub.com/content/28/2/196
249 *>
250 *> \endverbatim
251 *
252 * =====================================================================
253  SUBROUTINE cheevd_2stage( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
254  $ RWORK, LRWORK, IWORK, LIWORK, INFO )
255 *
256  IMPLICIT NONE
257 *
258 * -- LAPACK driver routine (version 3.8.0) --
259 * -- LAPACK is a software package provided by Univ. of Tennessee, --
260 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
261 * November 2017
262 *
263 * .. Scalar Arguments ..
264  CHARACTER JOBZ, UPLO
265  INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
266 * ..
267 * .. Array Arguments ..
268  INTEGER IWORK( * )
269  REAL RWORK( * ), W( * )
270  COMPLEX A( lda, * ), WORK( * )
271 * ..
272 *
273 * =====================================================================
274 *
275 * .. Parameters ..
276  REAL ZERO, ONE
277  parameter( zero = 0.0e0, one = 1.0e0 )
278  COMPLEX CONE
279  parameter( cone = ( 1.0e0, 0.0e0 ) )
280 * ..
281 * .. Local Scalars ..
282  LOGICAL LOWER, LQUERY, WANTZ
283  INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2,
284  $ indwrk, iscale, liwmin, llrwk, llwork,
285  $ llwrk2, lrwmin, lwmin,
286  $ lhtrd, lwtrd, kd, ib, indhous
287 
288 
289  REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
290  $ smlnum
291 * ..
292 * .. External Functions ..
293  LOGICAL LSAME
294  INTEGER ILAENV2STAGE
295  REAL SLAMCH, CLANHE
296  EXTERNAL lsame, slamch, clanhe, ilaenv2stage
297 * ..
298 * .. External Subroutines ..
299  EXTERNAL sscal, ssterf, xerbla, clacpy, clascl,
301 * ..
302 * .. Intrinsic Functions ..
303  INTRINSIC REAL, MAX, SQRT
304 * ..
305 * .. Executable Statements ..
306 *
307 * Test the input parameters.
308 *
309  wantz = lsame( jobz, 'V' )
310  lower = lsame( uplo, 'L' )
311  lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 .OR. liwork.EQ.-1 )
312 *
313  info = 0
314  IF( .NOT.( lsame( jobz, 'N' ) ) ) THEN
315  info = -1
316  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
317  info = -2
318  ELSE IF( n.LT.0 ) THEN
319  info = -3
320  ELSE IF( lda.LT.max( 1, n ) ) THEN
321  info = -5
322  END IF
323 *
324  IF( info.EQ.0 ) THEN
325  IF( n.LE.1 ) THEN
326  lwmin = 1
327  lrwmin = 1
328  liwmin = 1
329  ELSE
330  kd = ilaenv2stage( 1, 'CHETRD_2STAGE', jobz,
331  $ n, -1, -1, -1 )
332  ib = ilaenv2stage( 2, 'CHETRD_2STAGE', jobz,
333  $ n, kd, -1, -1 )
334  lhtrd = ilaenv2stage( 3, 'CHETRD_2STAGE', jobz,
335  $ n, kd, ib, -1 )
336  lwtrd = ilaenv2stage( 4, 'CHETRD_2STAGE', jobz,
337  $ n, kd, ib, -1 )
338  IF( wantz ) THEN
339  lwmin = 2*n + n*n
340  lrwmin = 1 + 5*n + 2*n**2
341  liwmin = 3 + 5*n
342  ELSE
343  lwmin = n + 1 + lhtrd + lwtrd
344  lrwmin = n
345  liwmin = 1
346  END IF
347  END IF
348  work( 1 ) = lwmin
349  rwork( 1 ) = lrwmin
350  iwork( 1 ) = liwmin
351 *
352  IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
353  info = -8
354  ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
355  info = -10
356  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
357  info = -12
358  END IF
359  END IF
360 *
361  IF( info.NE.0 ) THEN
362  CALL xerbla( 'CHEEVD_2STAGE', -info )
363  RETURN
364  ELSE IF( lquery ) THEN
365  RETURN
366  END IF
367 *
368 * Quick return if possible
369 *
370  IF( n.EQ.0 )
371  $ RETURN
372 *
373  IF( n.EQ.1 ) THEN
374  w( 1 ) = REAL( A( 1, 1 ) )
375  IF( wantz )
376  $ a( 1, 1 ) = cone
377  RETURN
378  END IF
379 *
380 * Get machine constants.
381 *
382  safmin = slamch( 'Safe minimum' )
383  eps = slamch( 'Precision' )
384  smlnum = safmin / eps
385  bignum = one / smlnum
386  rmin = sqrt( smlnum )
387  rmax = sqrt( bignum )
388 *
389 * Scale matrix to allowable range, if necessary.
390 *
391  anrm = clanhe( 'M', uplo, n, a, lda, rwork )
392  iscale = 0
393  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
394  iscale = 1
395  sigma = rmin / anrm
396  ELSE IF( anrm.GT.rmax ) THEN
397  iscale = 1
398  sigma = rmax / anrm
399  END IF
400  IF( iscale.EQ.1 )
401  $ CALL clascl( uplo, 0, 0, one, sigma, n, n, a, lda, info )
402 *
403 * Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form.
404 *
405  inde = 1
406  indrwk = inde + n
407  llrwk = lrwork - indrwk + 1
408  indtau = 1
409  indhous = indtau + n
410  indwrk = indhous + lhtrd
411  llwork = lwork - indwrk + 1
412  indwk2 = indwrk + n*n
413  llwrk2 = lwork - indwk2 + 1
414 *
415  CALL chetrd_2stage( jobz, uplo, n, a, lda, w, rwork( inde ),
416  $ work( indtau ), work( indhous ), lhtrd,
417  $ work( indwrk ), llwork, iinfo )
418 *
419 * For eigenvalues only, call SSTERF. For eigenvectors, first call
420 * CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
421 * tridiagonal matrix, then call CUNMTR to multiply it to the
422 * Householder transformations represented as Householder vectors in
423 * A.
424 *
425  IF( .NOT.wantz ) THEN
426  CALL ssterf( n, w, rwork( inde ), info )
427  ELSE
428  CALL cstedc( 'I', n, w, rwork( inde ), work( indwrk ), n,
429  $ work( indwk2 ), llwrk2, rwork( indrwk ), llrwk,
430  $ iwork, liwork, info )
431  CALL cunmtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),
432  $ work( indwrk ), n, work( indwk2 ), llwrk2, iinfo )
433  CALL clacpy( 'A', n, n, work( indwrk ), n, a, lda )
434  END IF
435 *
436 * If matrix was scaled, then rescale eigenvalues appropriately.
437 *
438  IF( iscale.EQ.1 ) THEN
439  IF( info.EQ.0 ) THEN
440  imax = n
441  ELSE
442  imax = info - 1
443  END IF
444  CALL sscal( imax, one / sigma, w, 1 )
445  END IF
446 *
447  work( 1 ) = lwmin
448  rwork( 1 ) = lrwmin
449  iwork( 1 ) = liwmin
450 *
451  RETURN
452 *
453 * End of CHEEVD_2STAGE
454 *
455  END
subroutine cunmtr(SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
CUNMTR
Definition: cunmtr.f:174
subroutine cheevd_2stage(JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO)
CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE ma...
subroutine clascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: clascl.f:145
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine clacpy(UPLO, M, N, A, LDA, B, LDB)
CLACPY copies all or part of one two-dimensional array to another.
Definition: clacpy.f:105
subroutine cstedc(COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO)
CSTEDC
Definition: cstedc.f:214
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:81
subroutine chetrd_2stage(VECT, UPLO, N, A, LDA, D, E, TAU, HOUS2, LHOUS2, WORK, LWORK, INFO)
CHETRD_2STAGE
subroutine ssterf(N, D, E, INFO)
SSTERF
Definition: ssterf.f:88