LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
zheevd_2stage.f
Go to the documentation of this file.
1*> \brief <b> ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b>
2*
3* @precisions fortran z -> s d c
4*
5* =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8* http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHEEVD_2STAGE + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevd_2stage.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevd_2stage.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevd_2stage.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20* Definition:
21* ===========
22*
23* SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24* RWORK, LRWORK, IWORK, LIWORK, INFO )
25*
26* IMPLICIT NONE
27*
28* .. Scalar Arguments ..
29* CHARACTER JOBZ, UPLO
30* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
31* ..
32* .. Array Arguments ..
33* INTEGER IWORK( * )
34* DOUBLE PRECISION RWORK( * ), W( * )
35* COMPLEX*16 A( LDA, * ), WORK( * )
36* ..
37*
38*
39*> \par Purpose:
40* =============
41*>
42*> \verbatim
43*>
44*> ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
45*> complex Hermitian matrix A using the 2stage technique for
46*> the reduction to tridiagonal. If eigenvectors are desired, it uses a
47*> divide and conquer algorithm.
48*>
49*> \endverbatim
50*
51* Arguments:
52* ==========
53*
54*> \param[in] JOBZ
55*> \verbatim
56*> JOBZ is CHARACTER*1
57*> = 'N': Compute eigenvalues only;
58*> = 'V': Compute eigenvalues and eigenvectors.
59*> Not available in this release.
60*> \endverbatim
61*>
62*> \param[in] UPLO
63*> \verbatim
64*> UPLO is CHARACTER*1
65*> = 'U': Upper triangle of A is stored;
66*> = 'L': Lower triangle of A is stored.
67*> \endverbatim
68*>
69*> \param[in] N
70*> \verbatim
71*> N is INTEGER
72*> The order of the matrix A. N >= 0.
73*> \endverbatim
74*>
75*> \param[in,out] A
76*> \verbatim
77*> A is COMPLEX*16 array, dimension (LDA, N)
78*> On entry, the Hermitian matrix A. If UPLO = 'U', the
79*> leading N-by-N upper triangular part of A contains the
80*> upper triangular part of the matrix A. If UPLO = 'L',
81*> the leading N-by-N lower triangular part of A contains
82*> the lower triangular part of the matrix A.
83*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
84*> orthonormal eigenvectors of the matrix A.
85*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
86*> or the upper triangle (if UPLO='U') of A, including the
87*> diagonal, is destroyed.
88*> \endverbatim
89*>
90*> \param[in] LDA
91*> \verbatim
92*> LDA is INTEGER
93*> The leading dimension of the array A. LDA >= max(1,N).
94*> \endverbatim
95*>
96*> \param[out] W
97*> \verbatim
98*> W is DOUBLE PRECISION array, dimension (N)
99*> If INFO = 0, the eigenvalues in ascending order.
100*> \endverbatim
101*>
102*> \param[out] WORK
103*> \verbatim
104*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
105*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
106*> \endverbatim
107*>
108*> \param[in] LWORK
109*> \verbatim
110*> LWORK is INTEGER
111*> The dimension of the array WORK.
112*> If N <= 1, LWORK must be at least 1.
113*> If JOBZ = 'N' and N > 1, LWORK must be queried.
114*> LWORK = MAX(1, dimension) where
115*> dimension = max(stage1,stage2) + (KD+1)*N + N+1
116*> = N*KD + N*max(KD+1,FACTOPTNB)
117*> + max(2*KD*KD, KD*NTHREADS)
118*> + (KD+1)*N + N+1
119*> where KD is the blocking size of the reduction,
120*> FACTOPTNB is the blocking used by the QR or LQ
121*> algorithm, usually FACTOPTNB=128 is a good choice
122*> NTHREADS is the number of threads used when
123*> openMP compilation is enabled, otherwise =1.
124*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2
125*>
126*> If LWORK = -1, then a workspace query is assumed; the routine
127*> only calculates the optimal sizes of the WORK, RWORK and
128*> IWORK arrays, returns these values as the first entries of
129*> the WORK, RWORK and IWORK arrays, and no error message
130*> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
131*> \endverbatim
132*>
133*> \param[out] RWORK
134*> \verbatim
135*> RWORK is DOUBLE PRECISION array,
136*> dimension (LRWORK)
137*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
138*> \endverbatim
139*>
140*> \param[in] LRWORK
141*> \verbatim
142*> LRWORK is INTEGER
143*> The dimension of the array RWORK.
144*> If N <= 1, LRWORK must be at least 1.
145*> If JOBZ = 'N' and N > 1, LRWORK must be at least N.
146*> If JOBZ = 'V' and N > 1, LRWORK must be at least
147*> 1 + 5*N + 2*N**2.
148*>
149*> If LRWORK = -1, then a workspace query is assumed; the
150*> routine only calculates the optimal sizes of the WORK, RWORK
151*> and IWORK arrays, returns these values as the first entries
152*> of the WORK, RWORK and IWORK arrays, and no error message
153*> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
154*> \endverbatim
155*>
156*> \param[out] IWORK
157*> \verbatim
158*> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
159*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
160*> \endverbatim
161*>
162*> \param[in] LIWORK
163*> \verbatim
164*> LIWORK is INTEGER
165*> The dimension of the array IWORK.
166*> If N <= 1, LIWORK must be at least 1.
167*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1.
168*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
169*>
170*> If LIWORK = -1, then a workspace query is assumed; the
171*> routine only calculates the optimal sizes of the WORK, RWORK
172*> and IWORK arrays, returns these values as the first entries
173*> of the WORK, RWORK and IWORK arrays, and no error message
174*> related to LWORK or LRWORK or LIWORK is issued by XERBLA.
175*> \endverbatim
176*>
177*> \param[out] INFO
178*> \verbatim
179*> INFO is INTEGER
180*> = 0: successful exit
181*> < 0: if INFO = -i, the i-th argument had an illegal value
182*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
183*> to converge; i off-diagonal elements of an intermediate
184*> tridiagonal form did not converge to zero;
185*> if INFO = i and JOBZ = 'V', then the algorithm failed
186*> to compute an eigenvalue while working on the submatrix
187*> lying in rows and columns INFO/(N+1) through
188*> mod(INFO,N+1).
189*> \endverbatim
190*
191* Authors:
192* ========
193*
194*> \author Univ. of Tennessee
195*> \author Univ. of California Berkeley
196*> \author Univ. of Colorado Denver
197*> \author NAG Ltd.
198*
199*> \ingroup heevd_2stage
200*
201*> \par Further Details:
202* =====================
203*>
204*> Modified description of INFO. Sven, 16 Feb 05.
205*
206*> \par Contributors:
207* ==================
208*>
209*> Jeff Rutter, Computer Science Division, University of California
210*> at Berkeley, USA
211*>
212*> \par Further Details:
213* =====================
214*>
215*> \verbatim
216*>
217*> All details about the 2stage techniques are available in:
218*>
219*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
220*> Parallel reduction to condensed forms for symmetric eigenvalue problems
221*> using aggregated fine-grained and memory-aware kernels. In Proceedings
222*> of 2011 International Conference for High Performance Computing,
223*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
224*> Article 8 , 11 pages.
225*> http://doi.acm.org/10.1145/2063384.2063394
226*>
227*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
228*> An improved parallel singular value algorithm and its implementation
229*> for multicore hardware, In Proceedings of 2013 International Conference
230*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
231*> Denver, Colorado, USA, 2013.
232*> Article 90, 12 pages.
233*> http://doi.acm.org/10.1145/2503210.2503292
234*>
235*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
236*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
237*> calculations based on fine-grained memory aware tasks.
238*> International Journal of High Performance Computing Applications.
239*> Volume 28 Issue 2, Pages 196-209, May 2014.
240*> http://hpc.sagepub.com/content/28/2/196
241*>
242*> \endverbatim
243*
244* =====================================================================
245 SUBROUTINE zheevd_2stage( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
246 $ RWORK, LRWORK, IWORK, LIWORK, INFO )
247*
248 IMPLICIT NONE
249*
250* -- LAPACK driver routine --
251* -- LAPACK is a software package provided by Univ. of Tennessee, --
252* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
253*
254* .. Scalar Arguments ..
255 CHARACTER JOBZ, UPLO
256 INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N
257* ..
258* .. Array Arguments ..
259 INTEGER IWORK( * )
260 DOUBLE PRECISION RWORK( * ), W( * )
261 COMPLEX*16 A( LDA, * ), WORK( * )
262* ..
263*
264* =====================================================================
265*
266* .. Parameters ..
267 DOUBLE PRECISION ZERO, ONE
268 parameter( zero = 0.0d0, one = 1.0d0 )
269 COMPLEX*16 CONE
270 parameter( cone = ( 1.0d0, 0.0d0 ) )
271* ..
272* .. Local Scalars ..
273 LOGICAL LOWER, LQUERY, WANTZ
274 INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2,
275 $ indwrk, iscale, liwmin, llrwk, llwork,
276 $ llwrk2, lrwmin, lwmin,
277 $ lhtrd, lwtrd, kd, ib, indhous
278
279
280 DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
281 $ smlnum
282* ..
283* .. External Functions ..
284 LOGICAL LSAME
285 INTEGER ILAENV2STAGE
286 DOUBLE PRECISION DLAMCH, ZLANHE
287 EXTERNAL lsame, dlamch, zlanhe, ilaenv2stage
288* ..
289* .. External Subroutines ..
290 EXTERNAL dscal, dsterf, xerbla, zlacpy, zlascl,
292* ..
293* .. Intrinsic Functions ..
294 INTRINSIC dble, max, sqrt
295* ..
296* .. Executable Statements ..
297*
298* Test the input parameters.
299*
300 wantz = lsame( jobz, 'V' )
301 lower = lsame( uplo, 'L' )
302 lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 .OR. liwork.EQ.-1 )
303*
304 info = 0
305 IF( .NOT.( lsame( jobz, 'N' ) ) ) THEN
306 info = -1
307 ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
308 info = -2
309 ELSE IF( n.LT.0 ) THEN
310 info = -3
311 ELSE IF( lda.LT.max( 1, n ) ) THEN
312 info = -5
313 END IF
314*
315 IF( info.EQ.0 ) THEN
316 IF( n.LE.1 ) THEN
317 lwmin = 1
318 lrwmin = 1
319 liwmin = 1
320 ELSE
321 kd = ilaenv2stage( 1, 'ZHETRD_2STAGE', jobz,
322 $ n, -1, -1, -1 )
323 ib = ilaenv2stage( 2, 'ZHETRD_2STAGE', jobz,
324 $ n, kd, -1, -1 )
325 lhtrd = ilaenv2stage( 3, 'ZHETRD_2STAGE', jobz,
326 $ n, kd, ib, -1 )
327 lwtrd = ilaenv2stage( 4, 'ZHETRD_2STAGE', jobz,
328 $ n, kd, ib, -1 )
329 IF( wantz ) THEN
330 lwmin = 2*n + n*n
331 lrwmin = 1 + 5*n + 2*n**2
332 liwmin = 3 + 5*n
333 ELSE
334 lwmin = n + 1 + lhtrd + lwtrd
335 lrwmin = n
336 liwmin = 1
337 END IF
338 END IF
339 work( 1 ) = lwmin
340 rwork( 1 ) = lrwmin
341 iwork( 1 ) = liwmin
342*
343 IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
344 info = -8
345 ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
346 info = -10
347 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
348 info = -12
349 END IF
350 END IF
351*
352 IF( info.NE.0 ) THEN
353 CALL xerbla( 'ZHEEVD_2STAGE', -info )
354 RETURN
355 ELSE IF( lquery ) THEN
356 RETURN
357 END IF
358*
359* Quick return if possible
360*
361 IF( n.EQ.0 )
362 $ RETURN
363*
364 IF( n.EQ.1 ) THEN
365 w( 1 ) = dble( a( 1, 1 ) )
366 IF( wantz )
367 $ a( 1, 1 ) = cone
368 RETURN
369 END IF
370*
371* Get machine constants.
372*
373 safmin = dlamch( 'Safe minimum' )
374 eps = dlamch( 'Precision' )
375 smlnum = safmin / eps
376 bignum = one / smlnum
377 rmin = sqrt( smlnum )
378 rmax = sqrt( bignum )
379*
380* Scale matrix to allowable range, if necessary.
381*
382 anrm = zlanhe( 'M', uplo, n, a, lda, rwork )
383 iscale = 0
384 IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
385 iscale = 1
386 sigma = rmin / anrm
387 ELSE IF( anrm.GT.rmax ) THEN
388 iscale = 1
389 sigma = rmax / anrm
390 END IF
391 IF( iscale.EQ.1 )
392 $ CALL zlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info )
393*
394* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form.
395*
396 inde = 1
397 indrwk = inde + n
398 llrwk = lrwork - indrwk + 1
399 indtau = 1
400 indhous = indtau + n
401 indwrk = indhous + lhtrd
402 llwork = lwork - indwrk + 1
403 indwk2 = indwrk + n*n
404 llwrk2 = lwork - indwk2 + 1
405*
406 CALL zhetrd_2stage( jobz, uplo, n, a, lda, w, rwork( inde ),
407 $ work( indtau ), work( indhous ), lhtrd,
408 $ work( indwrk ), llwork, iinfo )
409*
410* For eigenvalues only, call DSTERF. For eigenvectors, first call
411* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
412* tridiagonal matrix, then call ZUNMTR to multiply it to the
413* Householder transformations represented as Householder vectors in
414* A.
415*
416 IF( .NOT.wantz ) THEN
417 CALL dsterf( n, w, rwork( inde ), info )
418 ELSE
419 CALL zstedc( 'I', n, w, rwork( inde ), work( indwrk ), n,
420 $ work( indwk2 ), llwrk2, rwork( indrwk ), llrwk,
421 $ iwork, liwork, info )
422 CALL zunmtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),
423 $ work( indwrk ), n, work( indwk2 ), llwrk2, iinfo )
424 CALL zlacpy( 'A', n, n, work( indwrk ), n, a, lda )
425 END IF
426*
427* If matrix was scaled, then rescale eigenvalues appropriately.
428*
429 IF( iscale.EQ.1 ) THEN
430 IF( info.EQ.0 ) THEN
431 imax = n
432 ELSE
433 imax = info - 1
434 END IF
435 CALL dscal( imax, one / sigma, w, 1 )
436 END IF
437*
438 work( 1 ) = lwmin
439 rwork( 1 ) = lrwmin
440 iwork( 1 ) = liwmin
441*
442 RETURN
443*
444* End of ZHEEVD_2STAGE
445*
446 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zheevd_2stage(jobz, uplo, n, a, lda, w, work, lwork, rwork, lrwork, iwork, liwork, info)
ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat...
subroutine zhetrd_2stage(vect, uplo, n, a, lda, d, e, tau, hous2, lhous2, work, lwork, info)
ZHETRD_2STAGE
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
Definition zlacpy.f:103
subroutine zlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition zlascl.f:143
subroutine dscal(n, da, dx, incx)
DSCAL
Definition dscal.f:79
subroutine zstedc(compz, n, d, e, z, ldz, work, lwork, rwork, lrwork, iwork, liwork, info)
ZSTEDC
Definition zstedc.f:206
subroutine dsterf(n, d, e, info)
DSTERF
Definition dsterf.f:86
subroutine zunmtr(side, uplo, trans, m, n, a, lda, tau, c, ldc, work, lwork, info)
ZUNMTR
Definition zunmtr.f:171