LAPACK  3.10.1
LAPACK: Linear Algebra PACKage
zhetrf_aa.f
Go to the documentation of this file.
1 *> \brief \b ZHETRF_AA
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZHETRF_AA + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrf_aa.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrf_aa.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrf_aa.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
22 *
23 * .. Scalar Arguments ..
24 * CHARACTER UPLO
25 * INTEGER N, LDA, LWORK, INFO
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * COMPLEX*16 A( LDA, * ), WORK( * )
30 * ..
31 *
32 *> \par Purpose:
33 * =============
34 *>
35 *> \verbatim
36 *>
37 *> ZHETRF_AA computes the factorization of a complex hermitian matrix A
38 *> using the Aasen's algorithm. The form of the factorization is
39 *>
40 *> A = U**H*T*U or A = L*T*L**H
41 *>
42 *> where U (or L) is a product of permutation and unit upper (lower)
43 *> triangular matrices, and T is a hermitian tridiagonal matrix.
44 *>
45 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
46 *> \endverbatim
47 *
48 * Arguments:
49 * ==========
50 *
51 *> \param[in] UPLO
52 *> \verbatim
53 *> UPLO is CHARACTER*1
54 *> = 'U': Upper triangle of A is stored;
55 *> = 'L': Lower triangle of A is stored.
56 *> \endverbatim
57 *>
58 *> \param[in] N
59 *> \verbatim
60 *> N is INTEGER
61 *> The order of the matrix A. N >= 0.
62 *> \endverbatim
63 *>
64 *> \param[in,out] A
65 *> \verbatim
66 *> A is COMPLEX*16 array, dimension (LDA,N)
67 *> On entry, the hermitian matrix A. If UPLO = 'U', the leading
68 *> N-by-N upper triangular part of A contains the upper
69 *> triangular part of the matrix A, and the strictly lower
70 *> triangular part of A is not referenced. If UPLO = 'L', the
71 *> leading N-by-N lower triangular part of A contains the lower
72 *> triangular part of the matrix A, and the strictly upper
73 *> triangular part of A is not referenced.
74 *>
75 *> On exit, the tridiagonal matrix is stored in the diagonals
76 *> and the subdiagonals of A just below (or above) the diagonals,
77 *> and L is stored below (or above) the subdiaonals, when UPLO
78 *> is 'L' (or 'U').
79 *> \endverbatim
80 *>
81 *> \param[in] LDA
82 *> \verbatim
83 *> LDA is INTEGER
84 *> The leading dimension of the array A. LDA >= max(1,N).
85 *> \endverbatim
86 *>
87 *> \param[out] IPIV
88 *> \verbatim
89 *> IPIV is INTEGER array, dimension (N)
90 *> On exit, it contains the details of the interchanges, i.e.,
91 *> the row and column k of A were interchanged with the
92 *> row and column IPIV(k).
93 *> \endverbatim
94 *>
95 *> \param[out] WORK
96 *> \verbatim
97 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
98 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99 *> \endverbatim
100 *>
101 *> \param[in] LWORK
102 *> \verbatim
103 *> LWORK is INTEGER
104 *> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
105 *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
106 *>
107 *> If LWORK = -1, then a workspace query is assumed; the routine
108 *> only calculates the optimal size of the WORK array, returns
109 *> this value as the first entry of the WORK array, and no error
110 *> message related to LWORK is issued by XERBLA.
111 *> \endverbatim
112 *>
113 *> \param[out] INFO
114 *> \verbatim
115 *> INFO is INTEGER
116 *> = 0: successful exit
117 *> < 0: if INFO = -i, the i-th argument had an illegal value.
118 *> \endverbatim
119 *
120 * Authors:
121 * ========
122 *
123 *> \author Univ. of Tennessee
124 *> \author Univ. of California Berkeley
125 *> \author Univ. of Colorado Denver
126 *> \author NAG Ltd.
127 *
128 *> \ingroup complex16HEcomputational
129 *
130 * =====================================================================
131  SUBROUTINE zhetrf_aa( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
132 *
133 * -- LAPACK computational routine --
134 * -- LAPACK is a software package provided by Univ. of Tennessee, --
135 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136 *
137  IMPLICIT NONE
138 *
139 * .. Scalar Arguments ..
140  CHARACTER UPLO
141  INTEGER N, LDA, LWORK, INFO
142 * ..
143 * .. Array Arguments ..
144  INTEGER IPIV( * )
145  COMPLEX*16 A( LDA, * ), WORK( * )
146 * ..
147 *
148 * =====================================================================
149 * .. Parameters ..
150  COMPLEX*16 ZERO, ONE
151  parameter( zero = (0.0d+0, 0.0d+0), one = (1.0d+0, 0.0d+0) )
152 *
153 * .. Local Scalars ..
154  LOGICAL LQUERY, UPPER
155  INTEGER J, LWKOPT
156  INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157  COMPLEX*16 ALPHA
158 * ..
159 * .. External Functions ..
160  LOGICAL LSAME
161  INTEGER ILAENV
162  EXTERNAL lsame, ilaenv
163 * ..
164 * .. External Subroutines ..
165  EXTERNAL zlahef_aa, zgemm, zgemv, zcopy, zscal, zswap, xerbla
166 * ..
167 * .. Intrinsic Functions ..
168  INTRINSIC dble, dconjg, max
169 * ..
170 * .. Executable Statements ..
171 *
172 * Determine the block size
173 *
174  nb = ilaenv( 1, 'ZHETRF_AA', uplo, n, -1, -1, -1 )
175 *
176 * Test the input parameters.
177 *
178  info = 0
179  upper = lsame( uplo, 'U' )
180  lquery = ( lwork.EQ.-1 )
181  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
182  info = -1
183  ELSE IF( n.LT.0 ) THEN
184  info = -2
185  ELSE IF( lda.LT.max( 1, n ) ) THEN
186  info = -4
187  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
188  info = -7
189  END IF
190 *
191  IF( info.EQ.0 ) THEN
192  lwkopt = (nb+1)*n
193  work( 1 ) = lwkopt
194  END IF
195 *
196  IF( info.NE.0 ) THEN
197  CALL xerbla( 'ZHETRF_AA', -info )
198  RETURN
199  ELSE IF( lquery ) THEN
200  RETURN
201  END IF
202 *
203 * Quick return
204 *
205  IF ( n.EQ.0 ) THEN
206  RETURN
207  ENDIF
208  ipiv( 1 ) = 1
209  IF ( n.EQ.1 ) THEN
210  a( 1, 1 ) = dble( a( 1, 1 ) )
211  RETURN
212  END IF
213 *
214 * Adjust block size based on the workspace size
215 *
216  IF( lwork.LT.((1+nb)*n) ) THEN
217  nb = ( lwork-n ) / n
218  END IF
219 *
220  IF( upper ) THEN
221 *
222 * .....................................................
223 * Factorize A as U**H*D*U using the upper triangle of A
224 * .....................................................
225 *
226 * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
227 *
228  CALL zcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
229 *
230 * J is the main loop index, increasing from 1 to N in steps of
231 * JB, where JB is the number of columns factorized by ZLAHEF;
232 * JB is either NB, or N-J+1 for the last block
233 *
234  j = 0
235  10 CONTINUE
236  IF( j.GE.n )
237  $ GO TO 20
238 *
239 * each step of the main loop
240 * J is the last column of the previous panel
241 * J1 is the first column of the current panel
242 * K1 identifies if the previous column of the panel has been
243 * explicitly stored, e.g., K1=1 for the first panel, and
244 * K1=0 for the rest
245 *
246  j1 = j + 1
247  jb = min( n-j1+1, nb )
248  k1 = max(1, j)-j
249 *
250 * Panel factorization
251 *
252  CALL zlahef_aa( uplo, 2-k1, n-j, jb,
253  $ a( max(1, j), j+1 ), lda,
254  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
255 *
256 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
257 *
258  DO j2 = j+2, min(n, j+jb+1)
259  ipiv( j2 ) = ipiv( j2 ) + j
260  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
261  CALL zswap( j1-k1-2, a( 1, j2 ), 1,
262  $ a( 1, ipiv(j2) ), 1 )
263  END IF
264  END DO
265  j = j + jb
266 *
267 * Trailing submatrix update, where
268 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
269 * WORK stores the current block of the auxiriarly matrix H
270 *
271  IF( j.LT.n ) THEN
272 *
273 * if the first panel and JB=1 (NB=1), then nothing to do
274 *
275  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
276 *
277 * Merge rank-1 update with BLAS-3 update
278 *
279  alpha = dconjg( a( j, j+1 ) )
280  a( j, j+1 ) = one
281  CALL zcopy( n-j, a( j-1, j+1 ), lda,
282  $ work( (j+1-j1+1)+jb*n ), 1 )
283  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
284 *
285 * K1 identifies if the previous column of the panel has been
286 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
287 * and K1=1 and K2=0 for the rest
288 *
289  IF( j1.GT.1 ) THEN
290 *
291 * Not first panel
292 *
293  k2 = 1
294  ELSE
295 *
296 * First panel
297 *
298  k2 = 0
299 *
300 * First update skips the first column
301 *
302  jb = jb - 1
303  END IF
304 *
305  DO j2 = j+1, n, nb
306  nj = min( nb, n-j2+1 )
307 *
308 * Update (J2, J2) diagonal block with ZGEMV
309 *
310  j3 = j2
311  DO mj = nj-1, 1, -1
312  CALL zgemm( 'Conjugate transpose', 'Transpose',
313  $ 1, mj, jb+1,
314  $ -one, a( j1-k2, j3 ), lda,
315  $ work( (j3-j1+1)+k1*n ), n,
316  $ one, a( j3, j3 ), lda )
317  j3 = j3 + 1
318  END DO
319 *
320 * Update off-diagonal block of J2-th block row with ZGEMM
321 *
322  CALL zgemm( 'Conjugate transpose', 'Transpose',
323  $ nj, n-j3+1, jb+1,
324  $ -one, a( j1-k2, j2 ), lda,
325  $ work( (j3-j1+1)+k1*n ), n,
326  $ one, a( j2, j3 ), lda )
327  END DO
328 *
329 * Recover T( J, J+1 )
330 *
331  a( j, j+1 ) = dconjg( alpha )
332  END IF
333 *
334 * WORK(J+1, 1) stores H(J+1, 1)
335 *
336  CALL zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
337  END IF
338  GO TO 10
339  ELSE
340 *
341 * .....................................................
342 * Factorize A as L*D*L**H using the lower triangle of A
343 * .....................................................
344 *
345 * copy first column A(1:N, 1) into H(1:N, 1)
346 * (stored in WORK(1:N))
347 *
348  CALL zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
349 *
350 * J is the main loop index, increasing from 1 to N in steps of
351 * JB, where JB is the number of columns factorized by ZLAHEF;
352 * JB is either NB, or N-J+1 for the last block
353 *
354  j = 0
355  11 CONTINUE
356  IF( j.GE.n )
357  $ GO TO 20
358 *
359 * each step of the main loop
360 * J is the last column of the previous panel
361 * J1 is the first column of the current panel
362 * K1 identifies if the previous column of the panel has been
363 * explicitly stored, e.g., K1=1 for the first panel, and
364 * K1=0 for the rest
365 *
366  j1 = j+1
367  jb = min( n-j1+1, nb )
368  k1 = max(1, j)-j
369 *
370 * Panel factorization
371 *
372  CALL zlahef_aa( uplo, 2-k1, n-j, jb,
373  $ a( j+1, max(1, j) ), lda,
374  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
375 *
376 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
377 *
378  DO j2 = j+2, min(n, j+jb+1)
379  ipiv( j2 ) = ipiv( j2 ) + j
380  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
381  CALL zswap( j1-k1-2, a( j2, 1 ), lda,
382  $ a( ipiv(j2), 1 ), lda )
383  END IF
384  END DO
385  j = j + jb
386 *
387 * Trailing submatrix update, where
388 * A(J2+1, J1-1) stores L(J2+1, J1) and
389 * WORK(J2+1, 1) stores H(J2+1, 1)
390 *
391  IF( j.LT.n ) THEN
392 *
393 * if the first panel and JB=1 (NB=1), then nothing to do
394 *
395  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
396 *
397 * Merge rank-1 update with BLAS-3 update
398 *
399  alpha = dconjg( a( j+1, j ) )
400  a( j+1, j ) = one
401  CALL zcopy( n-j, a( j+1, j-1 ), 1,
402  $ work( (j+1-j1+1)+jb*n ), 1 )
403  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
404 *
405 * K1 identifies if the previous column of the panel has been
406 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
407 * and K1=1 and K2=0 for the rest
408 *
409  IF( j1.GT.1 ) THEN
410 *
411 * Not first panel
412 *
413  k2 = 1
414  ELSE
415 *
416 * First panel
417 *
418  k2 = 0
419 *
420 * First update skips the first column
421 *
422  jb = jb - 1
423  END IF
424 *
425  DO j2 = j+1, n, nb
426  nj = min( nb, n-j2+1 )
427 *
428 * Update (J2, J2) diagonal block with ZGEMV
429 *
430  j3 = j2
431  DO mj = nj-1, 1, -1
432  CALL zgemm( 'No transpose', 'Conjugate transpose',
433  $ mj, 1, jb+1,
434  $ -one, work( (j3-j1+1)+k1*n ), n,
435  $ a( j3, j1-k2 ), lda,
436  $ one, a( j3, j3 ), lda )
437  j3 = j3 + 1
438  END DO
439 *
440 * Update off-diagonal block of J2-th block column with ZGEMM
441 *
442  CALL zgemm( 'No transpose', 'Conjugate transpose',
443  $ n-j3+1, nj, jb+1,
444  $ -one, work( (j3-j1+1)+k1*n ), n,
445  $ a( j2, j1-k2 ), lda,
446  $ one, a( j3, j2 ), lda )
447  END DO
448 *
449 * Recover T( J+1, J )
450 *
451  a( j+1, j ) = dconjg( alpha )
452  END IF
453 *
454 * WORK(J+1, 1) stores H(J+1, 1)
455 *
456  CALL zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
457  END IF
458  GO TO 11
459  END IF
460 *
461  20 CONTINUE
462  work( 1 ) = lwkopt
463  RETURN
464 *
465 * End of ZHETRF_AA
466 *
467  END
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:81
subroutine zscal(N, ZA, ZX, INCX)
ZSCAL
Definition: zscal.f:78
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:81
subroutine zgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
ZGEMV
Definition: zgemv.f:158
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:187
subroutine zhetrf_aa(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
ZHETRF_AA
Definition: zhetrf_aa.f:132
subroutine zlahef_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK)
ZLAHEF_AA
Definition: zlahef_aa.f:144