LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
dsbevd_2stage.f
Go to the documentation of this file.
1 *> \brief <b> DSBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b>
2 *
3 * @precisions fortran d -> s
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download DSBEVD_2STAGE + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsbevd_2stage.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsbevd_2stage.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsbevd_2stage.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE DSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ,
24 * WORK, LWORK, IWORK, LIWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER JOBZ, UPLO
30 * INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N
31 * ..
32 * .. Array Arguments ..
33 * INTEGER IWORK( * )
34 * DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * )
35 * ..
36 *
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> DSBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of
44 *> a real symmetric band matrix A using the 2stage technique for
45 *> the reduction to tridiagonal. If eigenvectors are desired, it uses
46 *> a divide and conquer algorithm.
47 *>
48 *> The divide and conquer algorithm makes very mild assumptions about
49 *> floating point arithmetic. It will work on machines with a guard
50 *> digit in add/subtract, or on those binary machines without guard
51 *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
52 *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
53 *> without guard digits, but we know of none.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBZ
60 *> \verbatim
61 *> JOBZ is CHARACTER*1
62 *> = 'N': Compute eigenvalues only;
63 *> = 'V': Compute eigenvalues and eigenvectors.
64 *> Not available in this release.
65 *> \endverbatim
66 *>
67 *> \param[in] UPLO
68 *> \verbatim
69 *> UPLO is CHARACTER*1
70 *> = 'U': Upper triangle of A is stored;
71 *> = 'L': Lower triangle of A is stored.
72 *> \endverbatim
73 *>
74 *> \param[in] N
75 *> \verbatim
76 *> N is INTEGER
77 *> The order of the matrix A. N >= 0.
78 *> \endverbatim
79 *>
80 *> \param[in] KD
81 *> \verbatim
82 *> KD is INTEGER
83 *> The number of superdiagonals of the matrix A if UPLO = 'U',
84 *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
85 *> \endverbatim
86 *>
87 *> \param[in,out] AB
88 *> \verbatim
89 *> AB is DOUBLE PRECISION array, dimension (LDAB, N)
90 *> On entry, the upper or lower triangle of the symmetric band
91 *> matrix A, stored in the first KD+1 rows of the array. The
92 *> j-th column of A is stored in the j-th column of the array AB
93 *> as follows:
94 *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
95 *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
96 *>
97 *> On exit, AB is overwritten by values generated during the
98 *> reduction to tridiagonal form. If UPLO = 'U', the first
99 *> superdiagonal and the diagonal of the tridiagonal matrix T
100 *> are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
101 *> the diagonal and first subdiagonal of T are returned in the
102 *> first two rows of AB.
103 *> \endverbatim
104 *>
105 *> \param[in] LDAB
106 *> \verbatim
107 *> LDAB is INTEGER
108 *> The leading dimension of the array AB. LDAB >= KD + 1.
109 *> \endverbatim
110 *>
111 *> \param[out] W
112 *> \verbatim
113 *> W is DOUBLE PRECISION array, dimension (N)
114 *> If INFO = 0, the eigenvalues in ascending order.
115 *> \endverbatim
116 *>
117 *> \param[out] Z
118 *> \verbatim
119 *> Z is DOUBLE PRECISION array, dimension (LDZ, N)
120 *> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
121 *> eigenvectors of the matrix A, with the i-th column of Z
122 *> holding the eigenvector associated with W(i).
123 *> If JOBZ = 'N', then Z is not referenced.
124 *> \endverbatim
125 *>
126 *> \param[in] LDZ
127 *> \verbatim
128 *> LDZ is INTEGER
129 *> The leading dimension of the array Z. LDZ >= 1, and if
130 *> JOBZ = 'V', LDZ >= max(1,N).
131 *> \endverbatim
132 *>
133 *> \param[out] WORK
134 *> \verbatim
135 *> WORK is DOUBLE PRECISION array, dimension LWORK
136 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
137 *> \endverbatim
138 *>
139 *> \param[in] LWORK
140 *> \verbatim
141 *> LWORK is INTEGER
142 *> The length of the array WORK. LWORK >= 1, when N <= 1;
143 *> otherwise
144 *> If JOBZ = 'N' and N > 1, LWORK must be queried.
145 *> LWORK = MAX(1, dimension) where
146 *> dimension = (2KD+1)*N + KD*NTHREADS + N
147 *> where KD is the size of the band.
148 *> NTHREADS is the number of threads used when
149 *> openMP compilation is enabled, otherwise =1.
150 *> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available.
151 *>
152 *> If LWORK = -1, then a workspace query is assumed; the routine
153 *> only calculates the optimal sizes of the WORK and IWORK
154 *> arrays, returns these values as the first entries of the WORK
155 *> and IWORK arrays, and no error message related to LWORK or
156 *> LIWORK is issued by XERBLA.
157 *> \endverbatim
158 *>
159 *> \param[out] IWORK
160 *> \verbatim
161 *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
162 *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
163 *> \endverbatim
164 *>
165 *> \param[in] LIWORK
166 *> \verbatim
167 *> LIWORK is INTEGER
168 *> The dimension of the array IWORK.
169 *> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1.
170 *> If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N.
171 *>
172 *> If LIWORK = -1, then a workspace query is assumed; the
173 *> routine only calculates the optimal sizes of the WORK and
174 *> IWORK arrays, returns these values as the first entries of
175 *> the WORK and IWORK arrays, and no error message related to
176 *> LWORK or LIWORK is issued by XERBLA.
177 *> \endverbatim
178 *>
179 *> \param[out] INFO
180 *> \verbatim
181 *> INFO is INTEGER
182 *> = 0: successful exit
183 *> < 0: if INFO = -i, the i-th argument had an illegal value
184 *> > 0: if INFO = i, the algorithm failed to converge; i
185 *> off-diagonal elements of an intermediate tridiagonal
186 *> form did not converge to zero.
187 *> \endverbatim
188 *
189 * Authors:
190 * ========
191 *
192 *> \author Univ. of Tennessee
193 *> \author Univ. of California Berkeley
194 *> \author Univ. of Colorado Denver
195 *> \author NAG Ltd.
196 *
197 *> \ingroup doubleOTHEReigen
198 *
199 *> \par Further Details:
200 * =====================
201 *>
202 *> \verbatim
203 *>
204 *> All details about the 2stage techniques are available in:
205 *>
206 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
207 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
208 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
209 *> of 2011 International Conference for High Performance Computing,
210 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
211 *> Article 8 , 11 pages.
212 *> http://doi.acm.org/10.1145/2063384.2063394
213 *>
214 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
215 *> An improved parallel singular value algorithm and its implementation
216 *> for multicore hardware, In Proceedings of 2013 International Conference
217 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
218 *> Denver, Colorado, USA, 2013.
219 *> Article 90, 12 pages.
220 *> http://doi.acm.org/10.1145/2503210.2503292
221 *>
222 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
223 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
224 *> calculations based on fine-grained memory aware tasks.
225 *> International Journal of High Performance Computing Applications.
226 *> Volume 28 Issue 2, Pages 196-209, May 2014.
227 *> http://hpc.sagepub.com/content/28/2/196
228 *>
229 *> \endverbatim
230 *
231 * =====================================================================
232  SUBROUTINE dsbevd_2stage( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ,
233  $ WORK, LWORK, IWORK, LIWORK, INFO )
234 *
235  IMPLICIT NONE
236 *
237 * -- LAPACK driver routine --
238 * -- LAPACK is a software package provided by Univ. of Tennessee, --
239 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
240 *
241 * .. Scalar Arguments ..
242  CHARACTER JOBZ, UPLO
243  INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N
244 * ..
245 * .. Array Arguments ..
246  INTEGER IWORK( * )
247  DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * )
248 * ..
249 *
250 * =====================================================================
251 *
252 * .. Parameters ..
253  DOUBLE PRECISION ZERO, ONE
254  parameter( zero = 0.0d+0, one = 1.0d+0 )
255 * ..
256 * .. Local Scalars ..
257  LOGICAL LOWER, LQUERY, WANTZ
258  INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN,
259  $ llwork, lwmin, lhtrd, lwtrd, ib, indhous,
260  $ llwrk2
261  DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
262  $ smlnum
263 * ..
264 * .. External Functions ..
265  LOGICAL LSAME
266  INTEGER ILAENV2STAGE
267  DOUBLE PRECISION DLAMCH, DLANSB
268  EXTERNAL lsame, dlamch, dlansb, ilaenv2stage
269 * ..
270 * .. External Subroutines ..
271  EXTERNAL dgemm, dlacpy, dlascl, dscal, dstedc,
273 * ..
274 * .. Intrinsic Functions ..
275  INTRINSIC sqrt
276 * ..
277 * .. Executable Statements ..
278 *
279 * Test the input parameters.
280 *
281  wantz = lsame( jobz, 'V' )
282  lower = lsame( uplo, 'L' )
283  lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 )
284 *
285  info = 0
286  IF( n.LE.1 ) THEN
287  liwmin = 1
288  lwmin = 1
289  ELSE
290  ib = ilaenv2stage( 2, 'DSYTRD_SB2ST', jobz, n, kd, -1, -1 )
291  lhtrd = ilaenv2stage( 3, 'DSYTRD_SB2ST', jobz, n, kd, ib, -1 )
292  lwtrd = ilaenv2stage( 4, 'DSYTRD_SB2ST', jobz, n, kd, ib, -1 )
293  IF( wantz ) THEN
294  liwmin = 3 + 5*n
295  lwmin = 1 + 5*n + 2*n**2
296  ELSE
297  liwmin = 1
298  lwmin = max( 2*n, n+lhtrd+lwtrd )
299  END IF
300  END IF
301  IF( .NOT.( lsame( jobz, 'N' ) ) ) THEN
302  info = -1
303  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
304  info = -2
305  ELSE IF( n.LT.0 ) THEN
306  info = -3
307  ELSE IF( kd.LT.0 ) THEN
308  info = -4
309  ELSE IF( ldab.LT.kd+1 ) THEN
310  info = -6
311  ELSE IF( ldz.LT.1 .OR. ( wantz .AND. ldz.LT.n ) ) THEN
312  info = -9
313  END IF
314 *
315  IF( info.EQ.0 ) THEN
316  work( 1 ) = lwmin
317  iwork( 1 ) = liwmin
318 *
319  IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
320  info = -11
321  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
322  info = -13
323  END IF
324  END IF
325 *
326  IF( info.NE.0 ) THEN
327  CALL xerbla( 'DSBEVD_2STAGE', -info )
328  RETURN
329  ELSE IF( lquery ) THEN
330  RETURN
331  END IF
332 *
333 * Quick return if possible
334 *
335  IF( n.EQ.0 )
336  $ RETURN
337 *
338  IF( n.EQ.1 ) THEN
339  w( 1 ) = ab( 1, 1 )
340  IF( wantz )
341  $ z( 1, 1 ) = one
342  RETURN
343  END IF
344 *
345 * Get machine constants.
346 *
347  safmin = dlamch( 'Safe minimum' )
348  eps = dlamch( 'Precision' )
349  smlnum = safmin / eps
350  bignum = one / smlnum
351  rmin = sqrt( smlnum )
352  rmax = sqrt( bignum )
353 *
354 * Scale matrix to allowable range, if necessary.
355 *
356  anrm = dlansb( 'M', uplo, n, kd, ab, ldab, work )
357  iscale = 0
358  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
359  iscale = 1
360  sigma = rmin / anrm
361  ELSE IF( anrm.GT.rmax ) THEN
362  iscale = 1
363  sigma = rmax / anrm
364  END IF
365  IF( iscale.EQ.1 ) THEN
366  IF( lower ) THEN
367  CALL dlascl( 'B', kd, kd, one, sigma, n, n, ab, ldab, info )
368  ELSE
369  CALL dlascl( 'Q', kd, kd, one, sigma, n, n, ab, ldab, info )
370  END IF
371  END IF
372 *
373 * Call DSYTRD_SB2ST to reduce band symmetric matrix to tridiagonal form.
374 *
375  inde = 1
376  indhous = inde + n
377  indwrk = indhous + lhtrd
378  llwork = lwork - indwrk + 1
379  indwk2 = indwrk + n*n
380  llwrk2 = lwork - indwk2 + 1
381 *
382  CALL dsytrd_sb2st( "N", jobz, uplo, n, kd, ab, ldab, w,
383  $ work( inde ), work( indhous ), lhtrd,
384  $ work( indwrk ), llwork, iinfo )
385 *
386 * For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC.
387 *
388  IF( .NOT.wantz ) THEN
389  CALL dsterf( n, w, work( inde ), info )
390  ELSE
391  CALL dstedc( 'I', n, w, work( inde ), work( indwrk ), n,
392  $ work( indwk2 ), llwrk2, iwork, liwork, info )
393  CALL dgemm( 'N', 'N', n, n, n, one, z, ldz, work( indwrk ), n,
394  $ zero, work( indwk2 ), n )
395  CALL dlacpy( 'A', n, n, work( indwk2 ), n, z, ldz )
396  END IF
397 *
398 * If matrix was scaled, then rescale eigenvalues appropriately.
399 *
400  IF( iscale.EQ.1 )
401  $ CALL dscal( n, one / sigma, w, 1 )
402 *
403  work( 1 ) = lwmin
404  iwork( 1 ) = liwmin
405  RETURN
406 *
407 * End of DSBEVD_2STAGE
408 *
409  END
subroutine dsytrd_sb2st(STAGE1, VECT, UPLO, N, KD, AB, LDAB, D, E, HOUS, LHOUS, WORK, LWORK, INFO)
DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T
Definition: dsytrd_sb2st.F:230
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:143
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine dstedc(COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO)
DSTEDC
Definition: dstedc.f:188
subroutine dsterf(N, D, E, INFO)
DSTERF
Definition: dsterf.f:86
subroutine dscal(N, DA, DX, INCX)
DSCAL
Definition: dscal.f:79
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187
subroutine dsbevd_2stage(JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO)
DSBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER ...