LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
zhetrd_he2hb.f
Go to the documentation of this file.
1*> \brief \b ZHETRD_HE2HB
2*
3* @precisions fortran z -> s d c
4*
5* =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8* http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download ZHETRD_HE2HB + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20* Definition:
21* ===========
22*
23* SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
24* WORK, LWORK, INFO )
25*
26* IMPLICIT NONE
27*
28* .. Scalar Arguments ..
29* CHARACTER UPLO
30* INTEGER INFO, LDA, LDAB, LWORK, N, KD
31* ..
32* .. Array Arguments ..
33* COMPLEX*16 A( LDA, * ), AB( LDAB, * ),
34* TAU( * ), WORK( * )
35* ..
36*
37*
38*> \par Purpose:
39* =============
40*>
41*> \verbatim
42*>
43*> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
44*> band-diagonal form AB by a unitary similarity transformation:
45*> Q**H * A * Q = AB.
46*> \endverbatim
47*
48* Arguments:
49* ==========
50*
51*> \param[in] UPLO
52*> \verbatim
53*> UPLO is CHARACTER*1
54*> = 'U': Upper triangle of A is stored;
55*> = 'L': Lower triangle of A is stored.
56*> \endverbatim
57*>
58*> \param[in] N
59*> \verbatim
60*> N is INTEGER
61*> The order of the matrix A. N >= 0.
62*> \endverbatim
63*>
64*> \param[in] KD
65*> \verbatim
66*> KD is INTEGER
67*> The number of superdiagonals of the reduced matrix if UPLO = 'U',
68*> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
69*> The reduced matrix is stored in the array AB.
70*> \endverbatim
71*>
72*> \param[in,out] A
73*> \verbatim
74*> A is COMPLEX*16 array, dimension (LDA,N)
75*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
76*> N-by-N upper triangular part of A contains the upper
77*> triangular part of the matrix A, and the strictly lower
78*> triangular part of A is not referenced. If UPLO = 'L', the
79*> leading N-by-N lower triangular part of A contains the lower
80*> triangular part of the matrix A, and the strictly upper
81*> triangular part of A is not referenced.
82*> On exit, if UPLO = 'U', the diagonal and first superdiagonal
83*> of A are overwritten by the corresponding elements of the
84*> tridiagonal matrix T, and the elements above the first
85*> superdiagonal, with the array TAU, represent the unitary
86*> matrix Q as a product of elementary reflectors; if UPLO
87*> = 'L', the diagonal and first subdiagonal of A are over-
88*> written by the corresponding elements of the tridiagonal
89*> matrix T, and the elements below the first subdiagonal, with
90*> the array TAU, represent the unitary matrix Q as a product
91*> of elementary reflectors. See Further Details.
92*> \endverbatim
93*>
94*> \param[in] LDA
95*> \verbatim
96*> LDA is INTEGER
97*> The leading dimension of the array A. LDA >= max(1,N).
98*> \endverbatim
99*>
100*> \param[out] AB
101*> \verbatim
102*> AB is COMPLEX*16 array, dimension (LDAB,N)
103*> On exit, the upper or lower triangle of the Hermitian band
104*> matrix A, stored in the first KD+1 rows of the array. The
105*> j-th column of A is stored in the j-th column of the array AB
106*> as follows:
107*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
108*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
109*> \endverbatim
110*>
111*> \param[in] LDAB
112*> \verbatim
113*> LDAB is INTEGER
114*> The leading dimension of the array AB. LDAB >= KD+1.
115*> \endverbatim
116*>
117*> \param[out] TAU
118*> \verbatim
119*> TAU is COMPLEX*16 array, dimension (N-KD)
120*> The scalar factors of the elementary reflectors (see Further
121*> Details).
122*> \endverbatim
123*>
124*> \param[out] WORK
125*> \verbatim
126*> WORK is COMPLEX*16 array, dimension (LWORK)
127*> On exit, if INFO = 0, or if LWORK=-1,
128*> WORK(1) returns the size of LWORK.
129*> \endverbatim
130*>
131*> \param[in] LWORK
132*> \verbatim
133*> LWORK is INTEGER
134*> The dimension of the array WORK which should be calculated
135*> by a workspace query. LWORK = MAX(1, LWORK_QUERY)
136*> If LWORK = -1, then a workspace query is assumed; the routine
137*> only calculates the optimal size of the WORK array, returns
138*> this value as the first entry of the WORK array, and no error
139*> message related to LWORK is issued by XERBLA.
140*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
141*> where FACTOPTNB is the blocking used by the QR or LQ
142*> algorithm, usually FACTOPTNB=128 is a good choice otherwise
143*> putting LWORK=-1 will provide the size of WORK.
144*> \endverbatim
145*>
146*> \param[out] INFO
147*> \verbatim
148*> INFO is INTEGER
149*> = 0: successful exit
150*> < 0: if INFO = -i, the i-th argument had an illegal value
151*> \endverbatim
152*
153* Authors:
154* ========
155*
156*> \author Univ. of Tennessee
157*> \author Univ. of California Berkeley
158*> \author Univ. of Colorado Denver
159*> \author NAG Ltd.
160*
161*> \ingroup hetrd_he2hb
162*
163*> \par Further Details:
164* =====================
165*>
166*> \verbatim
167*>
168*> Implemented by Azzam Haidar.
169*>
170*> All details are available on technical report, SC11, SC13 papers.
171*>
172*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
173*> Parallel reduction to condensed forms for symmetric eigenvalue problems
174*> using aggregated fine-grained and memory-aware kernels. In Proceedings
175*> of 2011 International Conference for High Performance Computing,
176*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
177*> Article 8 , 11 pages.
178*> http://doi.acm.org/10.1145/2063384.2063394
179*>
180*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
181*> An improved parallel singular value algorithm and its implementation
182*> for multicore hardware, In Proceedings of 2013 International Conference
183*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
184*> Denver, Colorado, USA, 2013.
185*> Article 90, 12 pages.
186*> http://doi.acm.org/10.1145/2503210.2503292
187*>
188*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
189*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
190*> calculations based on fine-grained memory aware tasks.
191*> International Journal of High Performance Computing Applications.
192*> Volume 28 Issue 2, Pages 196-209, May 2014.
193*> http://hpc.sagepub.com/content/28/2/196
194*>
195*> \endverbatim
196*>
197*> \verbatim
198*>
199*> If UPLO = 'U', the matrix Q is represented as a product of elementary
200*> reflectors
201*>
202*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
203*>
204*> Each H(i) has the form
205*>
206*> H(i) = I - tau * v * v**H
207*>
208*> where tau is a complex scalar, and v is a complex vector with
209*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
210*> A(i,i+kd+1:n), and tau in TAU(i).
211*>
212*> If UPLO = 'L', the matrix Q is represented as a product of elementary
213*> reflectors
214*>
215*> Q = H(1) H(2) . . . H(k), where k = n-kd.
216*>
217*> Each H(i) has the form
218*>
219*> H(i) = I - tau * v * v**H
220*>
221*> where tau is a complex scalar, and v is a complex vector with
222*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
223*> A(i+kd+2:n,i), and tau in TAU(i).
224*>
225*> The contents of A on exit are illustrated by the following examples
226*> with n = 5:
227*>
228*> if UPLO = 'U': if UPLO = 'L':
229*>
230*> ( ab ab/v1 v1 v1 v1 ) ( ab )
231*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )
232*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )
233*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )
234*> ( ab ) ( v1 v2 v3 ab/v4 ab )
235*>
236*> where d and e denote diagonal and off-diagonal elements of T, and vi
237*> denotes an element of the vector defining H(i).
238*> \endverbatim
239*>
240* =====================================================================
241 SUBROUTINE zhetrd_he2hb( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
242 $ WORK, LWORK, INFO )
243*
244 IMPLICIT NONE
245*
246* -- LAPACK computational routine --
247* -- LAPACK is a software package provided by Univ. of Tennessee, --
248* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
249*
250* .. Scalar Arguments ..
251 CHARACTER UPLO
252 INTEGER INFO, LDA, LDAB, LWORK, N, KD
253* ..
254* .. Array Arguments ..
255 COMPLEX*16 A( LDA, * ), AB( LDAB, * ),
256 $ tau( * ), work( * )
257* ..
258*
259* =====================================================================
260*
261* .. Parameters ..
262 DOUBLE PRECISION RONE
263 COMPLEX*16 ZERO, ONE, HALF
264 parameter( rone = 1.0d+0,
265 $ zero = ( 0.0d+0, 0.0d+0 ),
266 $ one = ( 1.0d+0, 0.0d+0 ),
267 $ half = ( 0.5d+0, 0.0d+0 ) )
268* ..
269* .. Local Scalars ..
270 LOGICAL LQUERY, UPPER
271 INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
272 $ ldt, ldw, lds2, lds1,
273 $ ls2, ls1, lw, lt,
274 $ tpos, wpos, s2pos, s1pos
275* ..
276* .. External Subroutines ..
277 EXTERNAL xerbla, zher2k, zhemm, zgemm, zcopy,
279* ..
280* .. Intrinsic Functions ..
281 INTRINSIC min, max
282* ..
283* .. External Functions ..
284 LOGICAL LSAME
285 INTEGER ILAENV2STAGE
286 EXTERNAL lsame, ilaenv2stage
287* ..
288* .. Executable Statements ..
289*
290* Determine the minimal workspace size required
291* and test the input parameters
292*
293 info = 0
294 upper = lsame( uplo, 'U' )
295 lquery = ( lwork.EQ.-1 )
296 lwmin = ilaenv2stage( 4, 'ZHETRD_HE2HB', ' ', n, kd, -1, -1 )
297
298 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
299 info = -1
300 ELSE IF( n.LT.0 ) THEN
301 info = -2
302 ELSE IF( kd.LT.0 ) THEN
303 info = -3
304 ELSE IF( lda.LT.max( 1, n ) ) THEN
305 info = -5
306 ELSE IF( ldab.LT.max( 1, kd+1 ) ) THEN
307 info = -7
308 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
309 info = -10
310 END IF
311*
312 IF( info.NE.0 ) THEN
313 CALL xerbla( 'ZHETRD_HE2HB', -info )
314 RETURN
315 ELSE IF( lquery ) THEN
316 work( 1 ) = lwmin
317 RETURN
318 END IF
319*
320* Quick return if possible
321* Copy the upper/lower portion of A into AB
322*
323 IF( n.LE.kd+1 ) THEN
324 IF( upper ) THEN
325 DO 100 i = 1, n
326 lk = min( kd+1, i )
327 CALL zcopy( lk, a( i-lk+1, i ), 1,
328 $ ab( kd+1-lk+1, i ), 1 )
329 100 CONTINUE
330 ELSE
331 DO 110 i = 1, n
332 lk = min( kd+1, n-i+1 )
333 CALL zcopy( lk, a( i, i ), 1, ab( 1, i ), 1 )
334 110 CONTINUE
335 ENDIF
336 work( 1 ) = 1
337 RETURN
338 END IF
339*
340* Determine the pointer position for the workspace
341*
342 ldt = kd
343 lds1 = kd
344 lt = ldt*kd
345 lw = n*kd
346 ls1 = lds1*kd
347 ls2 = lwmin - lt - lw - ls1
348* LS2 = N*MAX(KD,FACTOPTNB)
349 tpos = 1
350 wpos = tpos + lt
351 s1pos = wpos + lw
352 s2pos = s1pos + ls1
353 IF( upper ) THEN
354 ldw = kd
355 lds2 = kd
356 ELSE
357 ldw = n
358 lds2 = n
359 ENDIF
360*
361*
362* Set the workspace of the triangular matrix T to zero once such a
363* way every time T is generated the upper/lower portion will be always zero
364*
365 CALL zlaset( "A", ldt, kd, zero, zero, work( tpos ), ldt )
366*
367 IF( upper ) THEN
368 DO 10 i = 1, n - kd, kd
369 pn = n-i-kd+1
370 pk = min( n-i-kd+1, kd )
371*
372* Compute the LQ factorization of the current block
373*
374 CALL zgelqf( kd, pn, a( i, i+kd ), lda,
375 $ tau( i ), work( s2pos ), ls2, iinfo )
376*
377* Copy the upper portion of A into AB
378*
379 DO 20 j = i, i+pk-1
380 lk = min( kd, n-j ) + 1
381 CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )
382 20 CONTINUE
383*
384 CALL zlaset( 'Lower', pk, pk, zero, one,
385 $ a( i, i+kd ), lda )
386*
387* Form the matrix T
388*
389 CALL zlarft( 'Forward', 'Rowwise', pn, pk,
390 $ a( i, i+kd ), lda, tau( i ),
391 $ work( tpos ), ldt )
392*
393* Compute W:
394*
395 CALL zgemm( 'Conjugate', 'No transpose', pk, pn, pk,
396 $ one, work( tpos ), ldt,
397 $ a( i, i+kd ), lda,
398 $ zero, work( s2pos ), lds2 )
399*
400 CALL zhemm( 'Right', uplo, pk, pn,
401 $ one, a( i+kd, i+kd ), lda,
402 $ work( s2pos ), lds2,
403 $ zero, work( wpos ), ldw )
404*
405 CALL zgemm( 'No transpose', 'Conjugate', pk, pk, pn,
406 $ one, work( wpos ), ldw,
407 $ work( s2pos ), lds2,
408 $ zero, work( s1pos ), lds1 )
409*
410 CALL zgemm( 'No transpose', 'No transpose', pk, pn, pk,
411 $ -half, work( s1pos ), lds1,
412 $ a( i, i+kd ), lda,
413 $ one, work( wpos ), ldw )
414*
415*
416* Update the unreduced submatrix A(i+kd:n,i+kd:n), using
417* an update of the form: A := A - V'*W - W'*V
418*
419 CALL zher2k( uplo, 'Conjugate', pn, pk,
420 $ -one, a( i, i+kd ), lda,
421 $ work( wpos ), ldw,
422 $ rone, a( i+kd, i+kd ), lda )
423 10 CONTINUE
424*
425* Copy the upper band to AB which is the band storage matrix
426*
427 DO 30 j = n-kd+1, n
428 lk = min(kd, n-j) + 1
429 CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )
430 30 CONTINUE
431*
432 ELSE
433*
434* Reduce the lower triangle of A to lower band matrix
435*
436 DO 40 i = 1, n - kd, kd
437 pn = n-i-kd+1
438 pk = min( n-i-kd+1, kd )
439*
440* Compute the QR factorization of the current block
441*
442 CALL zgeqrf( pn, kd, a( i+kd, i ), lda,
443 $ tau( i ), work( s2pos ), ls2, iinfo )
444*
445* Copy the upper portion of A into AB
446*
447 DO 50 j = i, i+pk-1
448 lk = min( kd, n-j ) + 1
449 CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
450 50 CONTINUE
451*
452 CALL zlaset( 'Upper', pk, pk, zero, one,
453 $ a( i+kd, i ), lda )
454*
455* Form the matrix T
456*
457 CALL zlarft( 'Forward', 'Columnwise', pn, pk,
458 $ a( i+kd, i ), lda, tau( i ),
459 $ work( tpos ), ldt )
460*
461* Compute W:
462*
463 CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,
464 $ one, a( i+kd, i ), lda,
465 $ work( tpos ), ldt,
466 $ zero, work( s2pos ), lds2 )
467*
468 CALL zhemm( 'Left', uplo, pn, pk,
469 $ one, a( i+kd, i+kd ), lda,
470 $ work( s2pos ), lds2,
471 $ zero, work( wpos ), ldw )
472*
473 CALL zgemm( 'Conjugate', 'No transpose', pk, pk, pn,
474 $ one, work( s2pos ), lds2,
475 $ work( wpos ), ldw,
476 $ zero, work( s1pos ), lds1 )
477*
478 CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,
479 $ -half, a( i+kd, i ), lda,
480 $ work( s1pos ), lds1,
481 $ one, work( wpos ), ldw )
482*
483*
484* Update the unreduced submatrix A(i+kd:n,i+kd:n), using
485* an update of the form: A := A - V*W' - W*V'
486*
487 CALL zher2k( uplo, 'No transpose', pn, pk,
488 $ -one, a( i+kd, i ), lda,
489 $ work( wpos ), ldw,
490 $ rone, a( i+kd, i+kd ), lda )
491* ==================================================================
492* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
493* DO 45 J = I, I+PK-1
494* LK = MIN( KD, N-J ) + 1
495* CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
496* 45 CONTINUE
497* ==================================================================
498 40 CONTINUE
499*
500* Copy the lower band to AB which is the band storage matrix
501*
502 DO 60 j = n-kd+1, n
503 lk = min(kd, n-j) + 1
504 CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
505 60 CONTINUE
506
507 END IF
508*
509 work( 1 ) = lwmin
510 RETURN
511*
512* End of ZHETRD_HE2HB
513*
514 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zcopy(n, zx, incx, zy, incy)
ZCOPY
Definition zcopy.f:81
subroutine zgelqf(m, n, a, lda, tau, work, lwork, info)
ZGELQF
Definition zgelqf.f:143
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:188
subroutine zgeqrf(m, n, a, lda, tau, work, lwork, info)
ZGEQRF
Definition zgeqrf.f:146
subroutine zhemm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)
ZHEMM
Definition zhemm.f:191
subroutine zher2k(uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZHER2K
Definition zher2k.f:198
subroutine zhetrd_he2hb(uplo, n, kd, a, lda, ab, ldab, tau, work, lwork, info)
ZHETRD_HE2HB
subroutine zlarft(direct, storev, n, k, v, ldv, tau, t, ldt)
ZLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition zlarft.f:163
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:106