LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
cheevd_2stage.f
Go to the documentation of this file.
1 *> \brief <b> CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b>
2 *
3 * @generated from zheevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:14 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download CHEEVD_2STAGE + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheevd_2stage.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheevd_2stage.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheevd_2stage.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24 * RWORK, LRWORK, IWORK, LIWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER JOBZ, UPLO
30 * INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
31 * ..
32 * .. Array Arguments ..
33 * INTEGER IWORK( * )
34 * REAL RWORK( * ), W( * )
35 * COMPLEX A( LDA, * ), WORK( * )
36 * ..
37 *
38 *
39 *> \par Purpose:
40 * =============
41 *>
42 *> \verbatim
43 *>
44 *> CHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
45 *> complex Hermitian matrix A using the 2stage technique for
46 *> the reduction to tridiagonal. If eigenvectors are desired, it uses a
47 *> divide and conquer algorithm.
48 *>
49 *> The divide and conquer algorithm makes very mild assumptions about
50 *> floating point arithmetic. It will work on machines with a guard
51 *> digit in add/subtract, or on those binary machines without guard
52 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
53 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
54 *> without guard digits, but we know of none.
55 *> \endverbatim
56 *
57 * Arguments:
58 * ==========
59 *
60 *> \param[in] JOBZ
61 *> \verbatim
62 *> JOBZ is CHARACTER*1
63 *> = 'N': Compute eigenvalues only;
64 *> = 'V': Compute eigenvalues and eigenvectors.
65 *> Not available in this release.
66 *> \endverbatim
67 *>
68 *> \param[in] UPLO
69 *> \verbatim
70 *> UPLO is CHARACTER*1
71 *> = 'U': Upper triangle of A is stored;
72 *> = 'L': Lower triangle of A is stored.
73 *> \endverbatim
74 *>
75 *> \param[in] N
76 *> \verbatim
77 *> N is INTEGER
78 *> The order of the matrix A. N >= 0.
79 *> \endverbatim
80 *>
81 *> \param[in,out] A
82 *> \verbatim
83 *> A is COMPLEX array, dimension (LDA, N)
84 *> On entry, the Hermitian matrix A. If UPLO = 'U', the
85 *> leading N-by-N upper triangular part of A contains the
86 *> upper triangular part of the matrix A. If UPLO = 'L',
87 *> the leading N-by-N lower triangular part of A contains
88 *> the lower triangular part of the matrix A.
89 *> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
90 *> orthonormal eigenvectors of the matrix A.
91 *> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
92 *> or the upper triangle (if UPLO='U') of A, including the
93 *> diagonal, is destroyed.
94 *> \endverbatim
95 *>
96 *> \param[in] LDA
97 *> \verbatim
98 *> LDA is INTEGER
99 *> The leading dimension of the array A. LDA >= max(1,N).
100 *> \endverbatim
101 *>
102 *> \param[out] W
103 *> \verbatim
104 *> W is REAL array, dimension (N)
105 *> If INFO = 0, the eigenvalues in ascending order.
106 *> \endverbatim
107 *>
108 *> \param[out] WORK
109 *> \verbatim
110 *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
111 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
112 *> \endverbatim
113 *>
114 *> \param[in] LWORK
115 *> \verbatim
116 *> LWORK is INTEGER
117 *> The dimension of the array WORK.
118 *> If N <= 1, LWORK must be at least 1.
119 *> If JOBZ = 'N' and N > 1, LWORK must be queried.
120 *> LWORK = MAX(1, dimension) where
121 *> dimension = max(stage1,stage2) + (KD+1)*N + N+1
122 *> = N*KD + N*max(KD+1,FACTOPTNB)
123 *> + max(2*KD*KD, KD*NTHREADS)
124 *> + (KD+1)*N + N+1
125 *> where KD is the blocking size of the reduction,
126 *> FACTOPTNB is the blocking used by the QR or LQ
127 *> algorithm, usually FACTOPTNB=128 is a good choice
128 *> NTHREADS is the number of threads used when
129 *> openMP compilation is enabled, otherwise =1.
130 *> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2
131 *>
132 *> If LWORK = -1, then a workspace query is assumed; the routine
133 *> only calculates the optimal sizes of the WORK, RWORK and
134 *> IWORK arrays, returns these values as the first entries of
135 *> the WORK, RWORK and IWORK arrays, and no error message
136 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
137 *> \endverbatim
138 *>
139 *> \param[out] RWORK
140 *> \verbatim
141 *> RWORK is REAL array,
142 *> dimension (LRWORK)
143 *> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
144 *> \endverbatim
145 *>
146 *> \param[in] LRWORK
147 *> \verbatim
148 *> LRWORK is INTEGER
149 *> The dimension of the array RWORK.
150 *> If N <= 1, LRWORK must be at least 1.
151 *> If JOBZ = 'N' and N > 1, LRWORK must be at least N.
152 *> If JOBZ = 'V' and N > 1, LRWORK must be at least
153 *> 1 + 5*N + 2*N**2.
154 *>
155 *> If LRWORK = -1, then a workspace query is assumed; the
156 *> routine only calculates the optimal sizes of the WORK, RWORK
157 *> and IWORK arrays, returns these values as the first entries
158 *> of the WORK, RWORK and IWORK arrays, and no error message
159 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
160 *> \endverbatim
161 *>
162 *> \param[out] IWORK
163 *> \verbatim
164 *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
165 *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
166 *> \endverbatim
167 *>
168 *> \param[in] LIWORK
169 *> \verbatim
170 *> LIWORK is INTEGER
171 *> The dimension of the array IWORK.
172 *> If N <= 1, LIWORK must be at least 1.
173 *> If JOBZ = 'N' and N > 1, LIWORK must be at least 1.
174 *> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
175 *>
176 *> If LIWORK = -1, then a workspace query is assumed; the
177 *> routine only calculates the optimal sizes of the WORK, RWORK
178 *> and IWORK arrays, returns these values as the first entries
179 *> of the WORK, RWORK and IWORK arrays, and no error message
180 *> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
181 *> \endverbatim
182 *>
183 *> \param[out] INFO
184 *> \verbatim
185 *> INFO is INTEGER
186 *> = 0: successful exit
187 *> < 0: if INFO = -i, the i-th argument had an illegal value
188 *> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
189 *> to converge; i off-diagonal elements of an intermediate
190 *> tridiagonal form did not converge to zero;
191 *> if INFO = i and JOBZ = 'V', then the algorithm failed
192 *> to compute an eigenvalue while working on the submatrix
193 *> lying in rows and columns INFO/(N+1) through
194 *> mod(INFO,N+1).
195 *> \endverbatim
196 *
197 * Authors:
198 * ========
199 *
200 *> \author Univ. of Tennessee
201 *> \author Univ. of California Berkeley
202 *> \author Univ. of Colorado Denver
203 *> \author NAG Ltd.
204 *
205 *> \ingroup complexHEeigen
206 *
207 *> \par Further Details:
208 * =====================
209 *>
210 *> Modified description of INFO. Sven, 16 Feb 05.
211 *
212 *> \par Contributors:
213 * ==================
214 *>
215 *> Jeff Rutter, Computer Science Division, University of California
216 *> at Berkeley, USA
217 *>
218 *> \par Further Details:
219 * =====================
220 *>
221 *> \verbatim
222 *>
223 *> All details about the 2stage techniques are available in:
224 *>
225 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
226 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
227 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
228 *> of 2011 International Conference for High Performance Computing,
229 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
230 *> Article 8 , 11 pages.
231 *> http://doi.acm.org/10.1145/2063384.2063394
232 *>
233 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
234 *> An improved parallel singular value algorithm and its implementation
235 *> for multicore hardware, In Proceedings of 2013 International Conference
236 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
237 *> Denver, Colorado, USA, 2013.
238 *> Article 90, 12 pages.
239 *> http://doi.acm.org/10.1145/2503210.2503292
240 *>
241 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
242 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
243 *> calculations based on fine-grained memory aware tasks.
244 *> International Journal of High Performance Computing Applications.
245 *> Volume 28 Issue 2, Pages 196-209, May 2014.
246 *> http://hpc.sagepub.com/content/28/2/196
247 *>
248 *> \endverbatim
249 *
250 * =====================================================================
251  SUBROUTINE cheevd_2stage( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
252  $ RWORK, LRWORK, IWORK, LIWORK, INFO )
253 *
254  IMPLICIT NONE
255 *
256 * -- LAPACK driver routine --
257 * -- LAPACK is a software package provided by Univ. of Tennessee, --
258 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
259 *
260 * .. Scalar Arguments ..
261  CHARACTER JOBZ, UPLO
262  INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
263 * ..
264 * .. Array Arguments ..
265  INTEGER IWORK( * )
266  REAL RWORK( * ), W( * )
267  COMPLEX A( LDA, * ), WORK( * )
268 * ..
269 *
270 * =====================================================================
271 *
272 * .. Parameters ..
273  REAL ZERO, ONE
274  parameter( zero = 0.0e0, one = 1.0e0 )
275  COMPLEX CONE
276  parameter( cone = ( 1.0e0, 0.0e0 ) )
277 * ..
278 * .. Local Scalars ..
279  LOGICAL LOWER, LQUERY, WANTZ
280  INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2,
281  $ indwrk, iscale, liwmin, llrwk, llwork,
282  $ llwrk2, lrwmin, lwmin,
283  $ lhtrd, lwtrd, kd, ib, indhous
284 
285 
286  REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
287  $ smlnum
288 * ..
289 * .. External Functions ..
290  LOGICAL LSAME
291  INTEGER ILAENV2STAGE
292  REAL SLAMCH, CLANHE
293  EXTERNAL lsame, slamch, clanhe, ilaenv2stage
294 * ..
295 * .. External Subroutines ..
296  EXTERNAL sscal, ssterf, xerbla, clacpy, clascl,
298 * ..
299 * .. Intrinsic Functions ..
300  INTRINSIC real, max, sqrt
301 * ..
302 * .. Executable Statements ..
303 *
304 * Test the input parameters.
305 *
306  wantz = lsame( jobz, 'V' )
307  lower = lsame( uplo, 'L' )
308  lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 .OR. liwork.EQ.-1 )
309 *
310  info = 0
311  IF( .NOT.( lsame( jobz, 'N' ) ) ) THEN
312  info = -1
313  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
314  info = -2
315  ELSE IF( n.LT.0 ) THEN
316  info = -3
317  ELSE IF( lda.LT.max( 1, n ) ) THEN
318  info = -5
319  END IF
320 *
321  IF( info.EQ.0 ) THEN
322  IF( n.LE.1 ) THEN
323  lwmin = 1
324  lrwmin = 1
325  liwmin = 1
326  ELSE
327  kd = ilaenv2stage( 1, 'CHETRD_2STAGE', jobz,
328  $ n, -1, -1, -1 )
329  ib = ilaenv2stage( 2, 'CHETRD_2STAGE', jobz,
330  $ n, kd, -1, -1 )
331  lhtrd = ilaenv2stage( 3, 'CHETRD_2STAGE', jobz,
332  $ n, kd, ib, -1 )
333  lwtrd = ilaenv2stage( 4, 'CHETRD_2STAGE', jobz,
334  $ n, kd, ib, -1 )
335  IF( wantz ) THEN
336  lwmin = 2*n + n*n
337  lrwmin = 1 + 5*n + 2*n**2
338  liwmin = 3 + 5*n
339  ELSE
340  lwmin = n + 1 + lhtrd + lwtrd
341  lrwmin = n
342  liwmin = 1
343  END IF
344  END IF
345  work( 1 ) = lwmin
346  rwork( 1 ) = lrwmin
347  iwork( 1 ) = liwmin
348 *
349  IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
350  info = -8
351  ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
352  info = -10
353  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
354  info = -12
355  END IF
356  END IF
357 *
358  IF( info.NE.0 ) THEN
359  CALL xerbla( 'CHEEVD_2STAGE', -info )
360  RETURN
361  ELSE IF( lquery ) THEN
362  RETURN
363  END IF
364 *
365 * Quick return if possible
366 *
367  IF( n.EQ.0 )
368  $ RETURN
369 *
370  IF( n.EQ.1 ) THEN
371  w( 1 ) = real( a( 1, 1 ) )
372  IF( wantz )
373  $ a( 1, 1 ) = cone
374  RETURN
375  END IF
376 *
377 * Get machine constants.
378 *
379  safmin = slamch( 'Safe minimum' )
380  eps = slamch( 'Precision' )
381  smlnum = safmin / eps
382  bignum = one / smlnum
383  rmin = sqrt( smlnum )
384  rmax = sqrt( bignum )
385 *
386 * Scale matrix to allowable range, if necessary.
387 *
388  anrm = clanhe( 'M', uplo, n, a, lda, rwork )
389  iscale = 0
390  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
391  iscale = 1
392  sigma = rmin / anrm
393  ELSE IF( anrm.GT.rmax ) THEN
394  iscale = 1
395  sigma = rmax / anrm
396  END IF
397  IF( iscale.EQ.1 )
398  $ CALL clascl( uplo, 0, 0, one, sigma, n, n, a, lda, info )
399 *
400 * Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form.
401 *
402  inde = 1
403  indrwk = inde + n
404  llrwk = lrwork - indrwk + 1
405  indtau = 1
406  indhous = indtau + n
407  indwrk = indhous + lhtrd
408  llwork = lwork - indwrk + 1
409  indwk2 = indwrk + n*n
410  llwrk2 = lwork - indwk2 + 1
411 *
412  CALL chetrd_2stage( jobz, uplo, n, a, lda, w, rwork( inde ),
413  $ work( indtau ), work( indhous ), lhtrd,
414  $ work( indwrk ), llwork, iinfo )
415 *
416 * For eigenvalues only, call SSTERF. For eigenvectors, first call
417 * CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
418 * tridiagonal matrix, then call CUNMTR to multiply it to the
419 * Householder transformations represented as Householder vectors in
420 * A.
421 *
422  IF( .NOT.wantz ) THEN
423  CALL ssterf( n, w, rwork( inde ), info )
424  ELSE
425  CALL cstedc( 'I', n, w, rwork( inde ), work( indwrk ), n,
426  $ work( indwk2 ), llwrk2, rwork( indrwk ), llrwk,
427  $ iwork, liwork, info )
428  CALL cunmtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),
429  $ work( indwrk ), n, work( indwk2 ), llwrk2, iinfo )
430  CALL clacpy( 'A', n, n, work( indwrk ), n, a, lda )
431  END IF
432 *
433 * If matrix was scaled, then rescale eigenvalues appropriately.
434 *
435  IF( iscale.EQ.1 ) THEN
436  IF( info.EQ.0 ) THEN
437  imax = n
438  ELSE
439  imax = info - 1
440  END IF
441  CALL sscal( imax, one / sigma, w, 1 )
442  END IF
443 *
444  work( 1 ) = lwmin
445  rwork( 1 ) = lrwmin
446  iwork( 1 ) = liwmin
447 *
448  RETURN
449 *
450 * End of CHEEVD_2STAGE
451 *
452  END
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine ssterf(N, D, E, INFO)
SSTERF
Definition: ssterf.f:86
subroutine chetrd_2stage(VECT, UPLO, N, A, LDA, D, E, TAU, HOUS2, LHOUS2, WORK, LWORK, INFO)
CHETRD_2STAGE
subroutine cheevd_2stage(JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO)
CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat...
subroutine clascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: clascl.f:143
subroutine clacpy(UPLO, M, N, A, LDA, B, LDB)
CLACPY copies all or part of one two-dimensional array to another.
Definition: clacpy.f:103
subroutine cunmtr(SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
CUNMTR
Definition: cunmtr.f:172
subroutine cstedc(COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO)
CSTEDC
Definition: cstedc.f:212
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:79