LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
dsytrd_sy2sb.f
Go to the documentation of this file.
1 *> \brief \b DSYTRD_SY2SB
2 *
3 * @generated from zhetrd_he2hb.f, fortran z -> d, Wed Dec 7 08:22:39 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download DSYTRD_SY2SB + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd_sy2sb.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd_sy2sb.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd_sy2sb.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
24 * WORK, LWORK, INFO )
25 *
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER UPLO
30 * INTEGER INFO, LDA, LDAB, LWORK, N, KD
31 * ..
32 * .. Array Arguments ..
33 * DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ),
34 * TAU( * ), WORK( * )
35 * ..
36 *
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> DSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric
44 *> band-diagonal form AB by a orthogonal similarity transformation:
45 *> Q**T * A * Q = AB.
46 *> \endverbatim
47 *
48 * Arguments:
49 * ==========
50 *
51 *> \param[in] UPLO
52 *> \verbatim
53 *> UPLO is CHARACTER*1
54 *> = 'U': Upper triangle of A is stored;
55 *> = 'L': Lower triangle of A is stored.
56 *> \endverbatim
57 *>
58 *> \param[in] N
59 *> \verbatim
60 *> N is INTEGER
61 *> The order of the matrix A. N >= 0.
62 *> \endverbatim
63 *>
64 *> \param[in] KD
65 *> \verbatim
66 *> KD is INTEGER
67 *> The number of superdiagonals of the reduced matrix if UPLO = 'U',
68 *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
69 *> The reduced matrix is stored in the array AB.
70 *> \endverbatim
71 *>
72 *> \param[in,out] A
73 *> \verbatim
74 *> A is DOUBLE PRECISION array, dimension (LDA,N)
75 *> On entry, the symmetric matrix A. If UPLO = 'U', the leading
76 *> N-by-N upper triangular part of A contains the upper
77 *> triangular part of the matrix A, and the strictly lower
78 *> triangular part of A is not referenced. If UPLO = 'L', the
79 *> leading N-by-N lower triangular part of A contains the lower
80 *> triangular part of the matrix A, and the strictly upper
81 *> triangular part of A is not referenced.
82 *> On exit, if UPLO = 'U', the diagonal and first superdiagonal
83 *> of A are overwritten by the corresponding elements of the
84 *> tridiagonal matrix T, and the elements above the first
85 *> superdiagonal, with the array TAU, represent the orthogonal
86 *> matrix Q as a product of elementary reflectors; if UPLO
87 *> = 'L', the diagonal and first subdiagonal of A are over-
88 *> written by the corresponding elements of the tridiagonal
89 *> matrix T, and the elements below the first subdiagonal, with
90 *> the array TAU, represent the orthogonal matrix Q as a product
91 *> of elementary reflectors. See Further Details.
92 *> \endverbatim
93 *>
94 *> \param[in] LDA
95 *> \verbatim
96 *> LDA is INTEGER
97 *> The leading dimension of the array A. LDA >= max(1,N).
98 *> \endverbatim
99 *>
100 *> \param[out] AB
101 *> \verbatim
102 *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
103 *> On exit, the upper or lower triangle of the symmetric band
104 *> matrix A, stored in the first KD+1 rows of the array. The
105 *> j-th column of A is stored in the j-th column of the array AB
106 *> as follows:
107 *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
108 *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
109 *> \endverbatim
110 *>
111 *> \param[in] LDAB
112 *> \verbatim
113 *> LDAB is INTEGER
114 *> The leading dimension of the array AB. LDAB >= KD+1.
115 *> \endverbatim
116 *>
117 *> \param[out] TAU
118 *> \verbatim
119 *> TAU is DOUBLE PRECISION array, dimension (N-KD)
120 *> The scalar factors of the elementary reflectors (see Further
121 *> Details).
122 *> \endverbatim
123 *>
124 *> \param[out] WORK
125 *> \verbatim
126 *> WORK is DOUBLE PRECISION array, dimension (LWORK)
127 *> On exit, if INFO = 0, or if LWORK=-1,
128 *> WORK(1) returns the size of LWORK.
129 *> \endverbatim
130 *>
131 *> \param[in] LWORK
132 *> \verbatim
133 *> LWORK is INTEGER
134 *> The dimension of the array WORK which should be calculated
135 *> by a workspace query. LWORK = MAX(1, LWORK_QUERY)
136 *> If LWORK = -1, then a workspace query is assumed; the routine
137 *> only calculates the optimal size of the WORK array, returns
138 *> this value as the first entry of the WORK array, and no error
139 *> message related to LWORK is issued by XERBLA.
140 *> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
141 *> where FACTOPTNB is the blocking used by the QR or LQ
142 *> algorithm, usually FACTOPTNB=128 is a good choice otherwise
143 *> putting LWORK=-1 will provide the size of WORK.
144 *> \endverbatim
145 *>
146 *> \param[out] INFO
147 *> \verbatim
148 *> INFO is INTEGER
149 *> = 0: successful exit
150 *> < 0: if INFO = -i, the i-th argument had an illegal value
151 *> \endverbatim
152 *
153 * Authors:
154 * ========
155 *
156 *> \author Univ. of Tennessee
157 *> \author Univ. of California Berkeley
158 *> \author Univ. of Colorado Denver
159 *> \author NAG Ltd.
160 *
161 *> \ingroup doubleSYcomputational
162 *
163 *> \par Further Details:
164 * =====================
165 *>
166 *> \verbatim
167 *>
168 *> Implemented by Azzam Haidar.
169 *>
170 *> All details are available on technical report, SC11, SC13 papers.
171 *>
172 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
173 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
174 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
175 *> of 2011 International Conference for High Performance Computing,
176 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
177 *> Article 8 , 11 pages.
178 *> http://doi.acm.org/10.1145/2063384.2063394
179 *>
180 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
181 *> An improved parallel singular value algorithm and its implementation
182 *> for multicore hardware, In Proceedings of 2013 International Conference
183 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
184 *> Denver, Colorado, USA, 2013.
185 *> Article 90, 12 pages.
186 *> http://doi.acm.org/10.1145/2503210.2503292
187 *>
188 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
189 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
190 *> calculations based on fine-grained memory aware tasks.
191 *> International Journal of High Performance Computing Applications.
192 *> Volume 28 Issue 2, Pages 196-209, May 2014.
193 *> http://hpc.sagepub.com/content/28/2/196
194 *>
195 *> \endverbatim
196 *>
197 *> \verbatim
198 *>
199 *> If UPLO = 'U', the matrix Q is represented as a product of elementary
200 *> reflectors
201 *>
202 *> Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd.
203 *>
204 *> Each H(i) has the form
205 *>
206 *> H(i) = I - tau * v * v**T
207 *>
208 *> where tau is a real scalar, and v is a real vector with
209 *> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
210 *> A(i,i+kd+1:n), and tau in TAU(i).
211 *>
212 *> If UPLO = 'L', the matrix Q is represented as a product of elementary
213 *> reflectors
214 *>
215 *> Q = H(1) H(2) . . . H(k), where k = n-kd.
216 *>
217 *> Each H(i) has the form
218 *>
219 *> H(i) = I - tau * v * v**T
220 *>
221 *> where tau is a real scalar, and v is a real vector with
222 *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
223 *> A(i+kd+2:n,i), and tau in TAU(i).
224 *>
225 *> The contents of A on exit are illustrated by the following examples
226 *> with n = 5:
227 *>
228 *> if UPLO = 'U': if UPLO = 'L':
229 *>
230 *> ( ab ab/v1 v1 v1 v1 ) ( ab )
231 *> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )
232 *> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )
233 *> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )
234 *> ( ab ) ( v1 v2 v3 ab/v4 ab )
235 *>
236 *> where d and e denote diagonal and off-diagonal elements of T, and vi
237 *> denotes an element of the vector defining H(i).
238 *> \endverbatim
239 *>
240 * =====================================================================
241  SUBROUTINE dsytrd_sy2sb( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
242  $ WORK, LWORK, INFO )
243 *
244  IMPLICIT NONE
245 *
246 * -- LAPACK computational routine --
247 * -- LAPACK is a software package provided by Univ. of Tennessee, --
248 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
249 *
250 * .. Scalar Arguments ..
251  CHARACTER UPLO
252  INTEGER INFO, LDA, LDAB, LWORK, N, KD
253 * ..
254 * .. Array Arguments ..
255  DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ),
256  $ tau( * ), work( * )
257 * ..
258 *
259 * =====================================================================
260 *
261 * .. Parameters ..
262  DOUBLE PRECISION RONE
263  DOUBLE PRECISION ZERO, ONE, HALF
264  parameter( rone = 1.0d+0,
265  $ zero = 0.0d+0,
266  $ one = 1.0d+0,
267  $ half = 0.5d+0 )
268 * ..
269 * .. Local Scalars ..
270  LOGICAL LQUERY, UPPER
271  INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
272  $ ldt, ldw, lds2, lds1,
273  $ ls2, ls1, lw, lt,
274  $ tpos, wpos, s2pos, s1pos
275 * ..
276 * .. External Subroutines ..
277  EXTERNAL xerbla, dsyr2k, dsymm, dgemm, dcopy,
279 * ..
280 * .. Intrinsic Functions ..
281  INTRINSIC min, max
282 * ..
283 * .. External Functions ..
284  LOGICAL LSAME
285  INTEGER ILAENV2STAGE
286  EXTERNAL lsame, ilaenv2stage
287 * ..
288 * .. Executable Statements ..
289 *
290 * Determine the minimal workspace size required
291 * and test the input parameters
292 *
293  info = 0
294  upper = lsame( uplo, 'U' )
295  lquery = ( lwork.EQ.-1 )
296  lwmin = ilaenv2stage( 4, 'DSYTRD_SY2SB', '', n, kd, -1, -1 )
297 
298  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
299  info = -1
300  ELSE IF( n.LT.0 ) THEN
301  info = -2
302  ELSE IF( kd.LT.0 ) THEN
303  info = -3
304  ELSE IF( lda.LT.max( 1, n ) ) THEN
305  info = -5
306  ELSE IF( ldab.LT.max( 1, kd+1 ) ) THEN
307  info = -7
308  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
309  info = -10
310  END IF
311 *
312  IF( info.NE.0 ) THEN
313  CALL xerbla( 'DSYTRD_SY2SB', -info )
314  RETURN
315  ELSE IF( lquery ) THEN
316  work( 1 ) = lwmin
317  RETURN
318  END IF
319 *
320 * Quick return if possible
321 * Copy the upper/lower portion of A into AB
322 *
323  IF( n.LE.kd+1 ) THEN
324  IF( upper ) THEN
325  DO 100 i = 1, n
326  lk = min( kd+1, i )
327  CALL dcopy( lk, a( i-lk+1, i ), 1,
328  $ ab( kd+1-lk+1, i ), 1 )
329  100 CONTINUE
330  ELSE
331  DO 110 i = 1, n
332  lk = min( kd+1, n-i+1 )
333  CALL dcopy( lk, a( i, i ), 1, ab( 1, i ), 1 )
334  110 CONTINUE
335  ENDIF
336  work( 1 ) = 1
337  RETURN
338  END IF
339 *
340 * Determine the pointer position for the workspace
341 *
342  ldt = kd
343  lds1 = kd
344  lt = ldt*kd
345  lw = n*kd
346  ls1 = lds1*kd
347  ls2 = lwmin - lt - lw - ls1
348 * LS2 = N*MAX(KD,FACTOPTNB)
349  tpos = 1
350  wpos = tpos + lt
351  s1pos = wpos + lw
352  s2pos = s1pos + ls1
353  IF( upper ) THEN
354  ldw = kd
355  lds2 = kd
356  ELSE
357  ldw = n
358  lds2 = n
359  ENDIF
360 *
361 *
362 * Set the workspace of the triangular matrix T to zero once such a
363 * way every time T is generated the upper/lower portion will be always zero
364 *
365  CALL dlaset( "A", ldt, kd, zero, zero, work( tpos ), ldt )
366 *
367  IF( upper ) THEN
368  DO 10 i = 1, n - kd, kd
369  pn = n-i-kd+1
370  pk = min( n-i-kd+1, kd )
371 *
372 * Compute the LQ factorization of the current block
373 *
374  CALL dgelqf( kd, pn, a( i, i+kd ), lda,
375  $ tau( i ), work( s2pos ), ls2, iinfo )
376 *
377 * Copy the upper portion of A into AB
378 *
379  DO 20 j = i, i+pk-1
380  lk = min( kd, n-j ) + 1
381  CALL dcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )
382  20 CONTINUE
383 *
384  CALL dlaset( 'Lower', pk, pk, zero, one,
385  $ a( i, i+kd ), lda )
386 *
387 * Form the matrix T
388 *
389  CALL dlarft( 'Forward', 'Rowwise', pn, pk,
390  $ a( i, i+kd ), lda, tau( i ),
391  $ work( tpos ), ldt )
392 *
393 * Compute W:
394 *
395  CALL dgemm( 'Conjugate', 'No transpose', pk, pn, pk,
396  $ one, work( tpos ), ldt,
397  $ a( i, i+kd ), lda,
398  $ zero, work( s2pos ), lds2 )
399 *
400  CALL dsymm( 'Right', uplo, pk, pn,
401  $ one, a( i+kd, i+kd ), lda,
402  $ work( s2pos ), lds2,
403  $ zero, work( wpos ), ldw )
404 *
405  CALL dgemm( 'No transpose', 'Conjugate', pk, pk, pn,
406  $ one, work( wpos ), ldw,
407  $ work( s2pos ), lds2,
408  $ zero, work( s1pos ), lds1 )
409 *
410  CALL dgemm( 'No transpose', 'No transpose', pk, pn, pk,
411  $ -half, work( s1pos ), lds1,
412  $ a( i, i+kd ), lda,
413  $ one, work( wpos ), ldw )
414 *
415 *
416 * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
417 * an update of the form: A := A - V'*W - W'*V
418 *
419  CALL dsyr2k( uplo, 'Conjugate', pn, pk,
420  $ -one, a( i, i+kd ), lda,
421  $ work( wpos ), ldw,
422  $ rone, a( i+kd, i+kd ), lda )
423  10 CONTINUE
424 *
425 * Copy the upper band to AB which is the band storage matrix
426 *
427  DO 30 j = n-kd+1, n
428  lk = min(kd, n-j) + 1
429  CALL dcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )
430  30 CONTINUE
431 *
432  ELSE
433 *
434 * Reduce the lower triangle of A to lower band matrix
435 *
436  DO 40 i = 1, n - kd, kd
437  pn = n-i-kd+1
438  pk = min( n-i-kd+1, kd )
439 *
440 * Compute the QR factorization of the current block
441 *
442  CALL dgeqrf( pn, kd, a( i+kd, i ), lda,
443  $ tau( i ), work( s2pos ), ls2, iinfo )
444 *
445 * Copy the upper portion of A into AB
446 *
447  DO 50 j = i, i+pk-1
448  lk = min( kd, n-j ) + 1
449  CALL dcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
450  50 CONTINUE
451 *
452  CALL dlaset( 'Upper', pk, pk, zero, one,
453  $ a( i+kd, i ), lda )
454 *
455 * Form the matrix T
456 *
457  CALL dlarft( 'Forward', 'Columnwise', pn, pk,
458  $ a( i+kd, i ), lda, tau( i ),
459  $ work( tpos ), ldt )
460 *
461 * Compute W:
462 *
463  CALL dgemm( 'No transpose', 'No transpose', pn, pk, pk,
464  $ one, a( i+kd, i ), lda,
465  $ work( tpos ), ldt,
466  $ zero, work( s2pos ), lds2 )
467 *
468  CALL dsymm( 'Left', uplo, pn, pk,
469  $ one, a( i+kd, i+kd ), lda,
470  $ work( s2pos ), lds2,
471  $ zero, work( wpos ), ldw )
472 *
473  CALL dgemm( 'Conjugate', 'No transpose', pk, pk, pn,
474  $ one, work( s2pos ), lds2,
475  $ work( wpos ), ldw,
476  $ zero, work( s1pos ), lds1 )
477 *
478  CALL dgemm( 'No transpose', 'No transpose', pn, pk, pk,
479  $ -half, a( i+kd, i ), lda,
480  $ work( s1pos ), lds1,
481  $ one, work( wpos ), ldw )
482 *
483 *
484 * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
485 * an update of the form: A := A - V*W' - W*V'
486 *
487  CALL dsyr2k( uplo, 'No transpose', pn, pk,
488  $ -one, a( i+kd, i ), lda,
489  $ work( wpos ), ldw,
490  $ rone, a( i+kd, i+kd ), lda )
491 * ==================================================================
492 * RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
493 * DO 45 J = I, I+PK-1
494 * LK = MIN( KD, N-J ) + 1
495 * CALL DCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
496 * 45 CONTINUE
497 * ==================================================================
498  40 CONTINUE
499 *
500 * Copy the lower band to AB which is the band storage matrix
501 *
502  DO 60 j = n-kd+1, n
503  lk = min(kd, n-j) + 1
504  CALL dcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
505  60 CONTINUE
506 
507  END IF
508 *
509  work( 1 ) = lwmin
510  RETURN
511 *
512 * End of DSYTRD_SY2SB
513 *
514  END
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: dlaset.f:110
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine dcopy(N, DX, INCX, DY, INCY)
DCOPY
Definition: dcopy.f:82
subroutine dsymm(SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DSYMM
Definition: dsymm.f:189
subroutine dsyr2k(UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DSYR2K
Definition: dsyr2k.f:192
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187
subroutine dgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGEQRF
Definition: dgeqrf.f:145
subroutine dgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGELQF
Definition: dgelqf.f:143
subroutine dlarft(DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT)
DLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition: dlarft.f:163
subroutine dsytrd_sy2sb(UPLO, N, KD, A, LDA, AB, LDAB, TAU, WORK, LWORK, INFO)
DSYTRD_SY2SB
Definition: dsytrd_sy2sb.f:243