LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
dsyevd_2stage.f
Go to the documentation of this file.
1 *> \brief <b> DSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b>
2 *
3 * @precisions fortran d -> s
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download DSYEVD_2STAGE + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd_2stage.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd_2stage.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd_2stage.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
24 * IWORK, LIWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER JOBZ, UPLO
30 * INTEGER INFO, LDA, LIWORK, LWORK, N
31 * ..
32 * .. Array Arguments ..
33 * INTEGER IWORK( * )
34 * DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * )
35 * ..
36 *
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> DSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a
44 *> real symmetric matrix A using the 2stage technique for
45 *> the reduction to tridiagonal. If eigenvectors are desired, it uses a
46 *> divide and conquer algorithm.
47 *>
48 *> The divide and conquer algorithm makes very mild assumptions about
49 *> floating point arithmetic. It will work on machines with a guard
50 *> digit in add/subtract, or on those binary machines without guard
51 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
52 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
53 *> without guard digits, but we know of none.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBZ
60 *> \verbatim
61 *> JOBZ is CHARACTER*1
62 *> = 'N': Compute eigenvalues only;
63 *> = 'V': Compute eigenvalues and eigenvectors.
64 *> Not available in this release.
65 *> \endverbatim
66 *>
67 *> \param[in] UPLO
68 *> \verbatim
69 *> UPLO is CHARACTER*1
70 *> = 'U': Upper triangle of A is stored;
71 *> = 'L': Lower triangle of A is stored.
72 *> \endverbatim
73 *>
74 *> \param[in] N
75 *> \verbatim
76 *> N is INTEGER
77 *> The order of the matrix A. N >= 0.
78 *> \endverbatim
79 *>
80 *> \param[in,out] A
81 *> \verbatim
82 *> A is DOUBLE PRECISION array, dimension (LDA, N)
83 *> On entry, the symmetric matrix A. If UPLO = 'U', the
84 *> leading N-by-N upper triangular part of A contains the
85 *> upper triangular part of the matrix A. If UPLO = 'L',
86 *> the leading N-by-N lower triangular part of A contains
87 *> the lower triangular part of the matrix A.
88 *> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
89 *> orthonormal eigenvectors of the matrix A.
90 *> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
91 *> or the upper triangle (if UPLO='U') of A, including the
92 *> diagonal, is destroyed.
93 *> \endverbatim
94 *>
95 *> \param[in] LDA
96 *> \verbatim
97 *> LDA is INTEGER
98 *> The leading dimension of the array A. LDA >= max(1,N).
99 *> \endverbatim
100 *>
101 *> \param[out] W
102 *> \verbatim
103 *> W is DOUBLE PRECISION array, dimension (N)
104 *> If INFO = 0, the eigenvalues in ascending order.
105 *> \endverbatim
106 *>
107 *> \param[out] WORK
108 *> \verbatim
109 *> WORK is DOUBLE PRECISION array,
110 *> dimension (LWORK)
111 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
112 *> \endverbatim
113 *>
114 *> \param[in] LWORK
115 *> \verbatim
116 *> LWORK is INTEGER
117 *> The dimension of the array WORK.
118 *> If N <= 1, LWORK must be at least 1.
119 *> If JOBZ = 'N' and N > 1, LWORK must be queried.
120 *> LWORK = MAX(1, dimension) where
121 *> dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1
122 *> = N*KD + N*max(KD+1,FACTOPTNB)
123 *> + max(2*KD*KD, KD*NTHREADS)
124 *> + (KD+1)*N + 2*N+1
125 *> where KD is the blocking size of the reduction,
126 *> FACTOPTNB is the blocking used by the QR or LQ
127 *> algorithm, usually FACTOPTNB=128 is a good choice
128 *> NTHREADS is the number of threads used when
129 *> openMP compilation is enabled, otherwise =1.
130 *> If JOBZ = 'V' and N > 1, LWORK must be at least
131 *> 1 + 6*N + 2*N**2.
132 *>
133 *> If LWORK = -1, then a workspace query is assumed; the routine
134 *> only calculates the optimal sizes of the WORK and IWORK
135 *> arrays, returns these values as the first entries of the WORK
136 *> and IWORK arrays, and no error message related to LWORK or
137 *> LIWORK is issued by XERBLA.
138 *> \endverbatim
139 *>
140 *> \param[out] IWORK
141 *> \verbatim
142 *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
143 *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
144 *> \endverbatim
145 *>
146 *> \param[in] LIWORK
147 *> \verbatim
148 *> LIWORK is INTEGER
149 *> The dimension of the array IWORK.
150 *> If N <= 1, LIWORK must be at least 1.
151 *> If JOBZ = 'N' and N > 1, LIWORK must be at least 1.
152 *> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
153 *>
154 *> If LIWORK = -1, then a workspace query is assumed; the
155 *> routine only calculates the optimal sizes of the WORK and
156 *> IWORK arrays, returns these values as the first entries of
157 *> the WORK and IWORK arrays, and no error message related to
158 *> LWORK or LIWORK is issued by XERBLA.
159 *> \endverbatim
160 *>
161 *> \param[out] INFO
162 *> \verbatim
163 *> INFO is INTEGER
164 *> = 0: successful exit
165 *> < 0: if INFO = -i, the i-th argument had an illegal value
166 *> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed
167 *> to converge; i off-diagonal elements of an intermediate
168 *> tridiagonal form did not converge to zero;
169 *> if INFO = i and JOBZ = 'V', then the algorithm failed
170 *> to compute an eigenvalue while working on the submatrix
171 *> lying in rows and columns INFO/(N+1) through
172 *> mod(INFO,N+1).
173 *> \endverbatim
174 *
175 * Authors:
176 * ========
177 *
178 *> \author Univ. of Tennessee
179 *> \author Univ. of California Berkeley
180 *> \author Univ. of Colorado Denver
181 *> \author NAG Ltd.
182 *
183 *> \ingroup doubleSYeigen
184 *
185 *> \par Contributors:
186 * ==================
187 *>
188 *> Jeff Rutter, Computer Science Division, University of California
189 *> at Berkeley, USA \n
190 *> Modified by Francoise Tisseur, University of Tennessee \n
191 *> Modified description of INFO. Sven, 16 Feb 05. \n
192 *> \par Further Details:
193 * =====================
194 *>
195 *> \verbatim
196 *>
197 *> All details about the 2stage techniques are available in:
198 *>
199 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
200 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
201 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
202 *> of 2011 International Conference for High Performance Computing,
203 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
204 *> Article 8 , 11 pages.
205 *> http://doi.acm.org/10.1145/2063384.2063394
206 *>
207 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
208 *> An improved parallel singular value algorithm and its implementation
209 *> for multicore hardware, In Proceedings of 2013 International Conference
210 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
211 *> Denver, Colorado, USA, 2013.
212 *> Article 90, 12 pages.
213 *> http://doi.acm.org/10.1145/2503210.2503292
214 *>
215 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
216 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
217 *> calculations based on fine-grained memory aware tasks.
218 *> International Journal of High Performance Computing Applications.
219 *> Volume 28 Issue 2, Pages 196-209, May 2014.
220 *> http://hpc.sagepub.com/content/28/2/196
221 *>
222 *> \endverbatim
223 *
224 * =====================================================================
225  SUBROUTINE dsyevd_2stage( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
226  $ IWORK, LIWORK, INFO )
227 *
228  IMPLICIT NONE
229 *
230 * -- LAPACK driver routine --
231 * -- LAPACK is a software package provided by Univ. of Tennessee, --
232 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
233 *
234 * .. Scalar Arguments ..
235  CHARACTER JOBZ, UPLO
236  INTEGER INFO, LDA, LIWORK, LWORK, N
237 * ..
238 * .. Array Arguments ..
239  INTEGER IWORK( * )
240  DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * )
241 * ..
242 *
243 * =====================================================================
244 *
245 * .. Parameters ..
246  DOUBLE PRECISION ZERO, ONE
247  parameter( zero = 0.0d+0, one = 1.0d+0 )
248 * ..
249 * .. Local Scalars ..
250 *
251  LOGICAL LOWER, LQUERY, WANTZ
252  INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE,
253  $ liwmin, llwork, llwrk2, lwmin,
254  $ lhtrd, lwtrd, kd, ib, indhous
255  DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
256  $ smlnum
257 * ..
258 * .. External Functions ..
259  LOGICAL LSAME
260  INTEGER ILAENV2STAGE
261  DOUBLE PRECISION DLAMCH, DLANSY
262  EXTERNAL lsame, dlamch, dlansy, ilaenv2stage
263 * ..
264 * .. External Subroutines ..
265  EXTERNAL dlacpy, dlascl, dormtr, dscal, dstedc, dsterf,
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max, sqrt
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input parameters.
274 *
275  wantz = lsame( jobz, 'V' )
276  lower = lsame( uplo, 'L' )
277  lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 )
278 *
279  info = 0
280  IF( .NOT.( lsame( jobz, 'N' ) ) ) THEN
281  info = -1
282  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
283  info = -2
284  ELSE IF( n.LT.0 ) THEN
285  info = -3
286  ELSE IF( lda.LT.max( 1, n ) ) THEN
287  info = -5
288  END IF
289 *
290  IF( info.EQ.0 ) THEN
291  IF( n.LE.1 ) THEN
292  liwmin = 1
293  lwmin = 1
294  ELSE
295  kd = ilaenv2stage( 1, 'DSYTRD_2STAGE', jobz,
296  $ n, -1, -1, -1 )
297  ib = ilaenv2stage( 2, 'DSYTRD_2STAGE', jobz,
298  $ n, kd, -1, -1 )
299  lhtrd = ilaenv2stage( 3, 'DSYTRD_2STAGE', jobz,
300  $ n, kd, ib, -1 )
301  lwtrd = ilaenv2stage( 4, 'DSYTRD_2STAGE', jobz,
302  $ n, kd, ib, -1 )
303  IF( wantz ) THEN
304  liwmin = 3 + 5*n
305  lwmin = 1 + 6*n + 2*n**2
306  ELSE
307  liwmin = 1
308  lwmin = 2*n + 1 + lhtrd + lwtrd
309  END IF
310  END IF
311  work( 1 ) = lwmin
312  iwork( 1 ) = liwmin
313 *
314  IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
315  info = -8
316  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
317  info = -10
318  END IF
319  END IF
320 *
321  IF( info.NE.0 ) THEN
322  CALL xerbla( 'DSYEVD_2STAGE', -info )
323  RETURN
324  ELSE IF( lquery ) THEN
325  RETURN
326  END IF
327 *
328 * Quick return if possible
329 *
330  IF( n.EQ.0 )
331  $ RETURN
332 *
333  IF( n.EQ.1 ) THEN
334  w( 1 ) = a( 1, 1 )
335  IF( wantz )
336  $ a( 1, 1 ) = one
337  RETURN
338  END IF
339 *
340 * Get machine constants.
341 *
342  safmin = dlamch( 'Safe minimum' )
343  eps = dlamch( 'Precision' )
344  smlnum = safmin / eps
345  bignum = one / smlnum
346  rmin = sqrt( smlnum )
347  rmax = sqrt( bignum )
348 *
349 * Scale matrix to allowable range, if necessary.
350 *
351  anrm = dlansy( 'M', uplo, n, a, lda, work )
352  iscale = 0
353  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
354  iscale = 1
355  sigma = rmin / anrm
356  ELSE IF( anrm.GT.rmax ) THEN
357  iscale = 1
358  sigma = rmax / anrm
359  END IF
360  IF( iscale.EQ.1 )
361  $ CALL dlascl( uplo, 0, 0, one, sigma, n, n, a, lda, info )
362 *
363 * Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form.
364 *
365  inde = 1
366  indtau = inde + n
367  indhous = indtau + n
368  indwrk = indhous + lhtrd
369  llwork = lwork - indwrk + 1
370  indwk2 = indwrk + n*n
371  llwrk2 = lwork - indwk2 + 1
372 *
373  CALL dsytrd_2stage( jobz, uplo, n, a, lda, w, work( inde ),
374  $ work( indtau ), work( indhous ), lhtrd,
375  $ work( indwrk ), llwork, iinfo )
376 *
377 * For eigenvalues only, call DSTERF. For eigenvectors, first call
378 * DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
379 * tridiagonal matrix, then call DORMTR to multiply it by the
380 * Householder transformations stored in A.
381 *
382  IF( .NOT.wantz ) THEN
383  CALL dsterf( n, w, work( inde ), info )
384  ELSE
385 * Not available in this release, and argument checking should not
386 * let it getting here
387  RETURN
388  CALL dstedc( 'I', n, w, work( inde ), work( indwrk ), n,
389  $ work( indwk2 ), llwrk2, iwork, liwork, info )
390  CALL dormtr( 'L', uplo, 'N', n, n, a, lda, work( indtau ),
391  $ work( indwrk ), n, work( indwk2 ), llwrk2, iinfo )
392  CALL dlacpy( 'A', n, n, work( indwrk ), n, a, lda )
393  END IF
394 *
395 * If matrix was scaled, then rescale eigenvalues appropriately.
396 *
397  IF( iscale.EQ.1 )
398  $ CALL dscal( n, one / sigma, w, 1 )
399 *
400  work( 1 ) = lwmin
401  iwork( 1 ) = liwmin
402 *
403  RETURN
404 *
405 * End of DSYEVD_2STAGE
406 *
407  END
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:143
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine dstedc(COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO)
DSTEDC
Definition: dstedc.f:188
subroutine dsterf(N, D, E, INFO)
DSTERF
Definition: dsterf.f:86
subroutine dscal(N, DA, DX, INCX)
DSCAL
Definition: dscal.f:79
subroutine dormtr(SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
DORMTR
Definition: dormtr.f:171
subroutine dsytrd_2stage(VECT, UPLO, N, A, LDA, D, E, TAU, HOUS2, LHOUS2, WORK, LWORK, INFO)
DSYTRD_2STAGE
subroutine dsyevd_2stage(JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK, INFO)
DSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY mat...