LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ zhetrf_aa()

subroutine zhetrf_aa ( character  UPLO,
integer  N,
complex*16, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
complex*16, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

ZHETRF_AA

Download ZHETRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZHETRF_AA computes the factorization of a complex hermitian matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**H*T*U  or  A = L*T*L**H

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a hermitian tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is COMPLEX*16 array, dimension (LDA,N)
          On entry, the hermitian matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file zhetrf_aa.f.

132 *
133 * -- LAPACK computational routine --
134 * -- LAPACK is a software package provided by Univ. of Tennessee, --
135 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136 *
137  IMPLICIT NONE
138 *
139 * .. Scalar Arguments ..
140  CHARACTER UPLO
141  INTEGER N, LDA, LWORK, INFO
142 * ..
143 * .. Array Arguments ..
144  INTEGER IPIV( * )
145  COMPLEX*16 A( LDA, * ), WORK( * )
146 * ..
147 *
148 * =====================================================================
149 * .. Parameters ..
150  COMPLEX*16 ZERO, ONE
151  parameter( zero = (0.0d+0, 0.0d+0), one = (1.0d+0, 0.0d+0) )
152 *
153 * .. Local Scalars ..
154  LOGICAL LQUERY, UPPER
155  INTEGER J, LWKOPT
156  INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157  COMPLEX*16 ALPHA
158 * ..
159 * .. External Functions ..
160  LOGICAL LSAME
161  INTEGER ILAENV
162  EXTERNAL lsame, ilaenv
163 * ..
164 * .. External Subroutines ..
165  EXTERNAL zlahef_aa, zgemm, zgemv, zcopy, zscal, zswap, xerbla
166 * ..
167 * .. Intrinsic Functions ..
168  INTRINSIC dble, dconjg, max
169 * ..
170 * .. Executable Statements ..
171 *
172 * Determine the block size
173 *
174  nb = ilaenv( 1, 'ZHETRF_AA', uplo, n, -1, -1, -1 )
175 *
176 * Test the input parameters.
177 *
178  info = 0
179  upper = lsame( uplo, 'U' )
180  lquery = ( lwork.EQ.-1 )
181  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
182  info = -1
183  ELSE IF( n.LT.0 ) THEN
184  info = -2
185  ELSE IF( lda.LT.max( 1, n ) ) THEN
186  info = -4
187  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
188  info = -7
189  END IF
190 *
191  IF( info.EQ.0 ) THEN
192  lwkopt = (nb+1)*n
193  work( 1 ) = lwkopt
194  END IF
195 *
196  IF( info.NE.0 ) THEN
197  CALL xerbla( 'ZHETRF_AA', -info )
198  RETURN
199  ELSE IF( lquery ) THEN
200  RETURN
201  END IF
202 *
203 * Quick return
204 *
205  IF ( n.EQ.0 ) THEN
206  RETURN
207  ENDIF
208  ipiv( 1 ) = 1
209  IF ( n.EQ.1 ) THEN
210  a( 1, 1 ) = dble( a( 1, 1 ) )
211  RETURN
212  END IF
213 *
214 * Adjust block size based on the workspace size
215 *
216  IF( lwork.LT.((1+nb)*n) ) THEN
217  nb = ( lwork-n ) / n
218  END IF
219 *
220  IF( upper ) THEN
221 *
222 * .....................................................
223 * Factorize A as U**H*D*U using the upper triangle of A
224 * .....................................................
225 *
226 * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
227 *
228  CALL zcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
229 *
230 * J is the main loop index, increasing from 1 to N in steps of
231 * JB, where JB is the number of columns factorized by ZLAHEF;
232 * JB is either NB, or N-J+1 for the last block
233 *
234  j = 0
235  10 CONTINUE
236  IF( j.GE.n )
237  $ GO TO 20
238 *
239 * each step of the main loop
240 * J is the last column of the previous panel
241 * J1 is the first column of the current panel
242 * K1 identifies if the previous column of the panel has been
243 * explicitly stored, e.g., K1=1 for the first panel, and
244 * K1=0 for the rest
245 *
246  j1 = j + 1
247  jb = min( n-j1+1, nb )
248  k1 = max(1, j)-j
249 *
250 * Panel factorization
251 *
252  CALL zlahef_aa( uplo, 2-k1, n-j, jb,
253  $ a( max(1, j), j+1 ), lda,
254  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
255 *
256 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
257 *
258  DO j2 = j+2, min(n, j+jb+1)
259  ipiv( j2 ) = ipiv( j2 ) + j
260  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
261  CALL zswap( j1-k1-2, a( 1, j2 ), 1,
262  $ a( 1, ipiv(j2) ), 1 )
263  END IF
264  END DO
265  j = j + jb
266 *
267 * Trailing submatrix update, where
268 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
269 * WORK stores the current block of the auxiriarly matrix H
270 *
271  IF( j.LT.n ) THEN
272 *
273 * if the first panel and JB=1 (NB=1), then nothing to do
274 *
275  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
276 *
277 * Merge rank-1 update with BLAS-3 update
278 *
279  alpha = dconjg( a( j, j+1 ) )
280  a( j, j+1 ) = one
281  CALL zcopy( n-j, a( j-1, j+1 ), lda,
282  $ work( (j+1-j1+1)+jb*n ), 1 )
283  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
284 *
285 * K1 identifies if the previous column of the panel has been
286 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
287 * and K1=1 and K2=0 for the rest
288 *
289  IF( j1.GT.1 ) THEN
290 *
291 * Not first panel
292 *
293  k2 = 1
294  ELSE
295 *
296 * First panel
297 *
298  k2 = 0
299 *
300 * First update skips the first column
301 *
302  jb = jb - 1
303  END IF
304 *
305  DO j2 = j+1, n, nb
306  nj = min( nb, n-j2+1 )
307 *
308 * Update (J2, J2) diagonal block with ZGEMV
309 *
310  j3 = j2
311  DO mj = nj-1, 1, -1
312  CALL zgemm( 'Conjugate transpose', 'Transpose',
313  $ 1, mj, jb+1,
314  $ -one, a( j1-k2, j3 ), lda,
315  $ work( (j3-j1+1)+k1*n ), n,
316  $ one, a( j3, j3 ), lda )
317  j3 = j3 + 1
318  END DO
319 *
320 * Update off-diagonal block of J2-th block row with ZGEMM
321 *
322  CALL zgemm( 'Conjugate transpose', 'Transpose',
323  $ nj, n-j3+1, jb+1,
324  $ -one, a( j1-k2, j2 ), lda,
325  $ work( (j3-j1+1)+k1*n ), n,
326  $ one, a( j2, j3 ), lda )
327  END DO
328 *
329 * Recover T( J, J+1 )
330 *
331  a( j, j+1 ) = dconjg( alpha )
332  END IF
333 *
334 * WORK(J+1, 1) stores H(J+1, 1)
335 *
336  CALL zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
337  END IF
338  GO TO 10
339  ELSE
340 *
341 * .....................................................
342 * Factorize A as L*D*L**H using the lower triangle of A
343 * .....................................................
344 *
345 * copy first column A(1:N, 1) into H(1:N, 1)
346 * (stored in WORK(1:N))
347 *
348  CALL zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
349 *
350 * J is the main loop index, increasing from 1 to N in steps of
351 * JB, where JB is the number of columns factorized by ZLAHEF;
352 * JB is either NB, or N-J+1 for the last block
353 *
354  j = 0
355  11 CONTINUE
356  IF( j.GE.n )
357  $ GO TO 20
358 *
359 * each step of the main loop
360 * J is the last column of the previous panel
361 * J1 is the first column of the current panel
362 * K1 identifies if the previous column of the panel has been
363 * explicitly stored, e.g., K1=1 for the first panel, and
364 * K1=0 for the rest
365 *
366  j1 = j+1
367  jb = min( n-j1+1, nb )
368  k1 = max(1, j)-j
369 *
370 * Panel factorization
371 *
372  CALL zlahef_aa( uplo, 2-k1, n-j, jb,
373  $ a( j+1, max(1, j) ), lda,
374  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
375 *
376 * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
377 *
378  DO j2 = j+2, min(n, j+jb+1)
379  ipiv( j2 ) = ipiv( j2 ) + j
380  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
381  CALL zswap( j1-k1-2, a( j2, 1 ), lda,
382  $ a( ipiv(j2), 1 ), lda )
383  END IF
384  END DO
385  j = j + jb
386 *
387 * Trailing submatrix update, where
388 * A(J2+1, J1-1) stores L(J2+1, J1) and
389 * WORK(J2+1, 1) stores H(J2+1, 1)
390 *
391  IF( j.LT.n ) THEN
392 *
393 * if the first panel and JB=1 (NB=1), then nothing to do
394 *
395  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
396 *
397 * Merge rank-1 update with BLAS-3 update
398 *
399  alpha = dconjg( a( j+1, j ) )
400  a( j+1, j ) = one
401  CALL zcopy( n-j, a( j+1, j-1 ), 1,
402  $ work( (j+1-j1+1)+jb*n ), 1 )
403  CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
404 *
405 * K1 identifies if the previous column of the panel has been
406 * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
407 * and K1=1 and K2=0 for the rest
408 *
409  IF( j1.GT.1 ) THEN
410 *
411 * Not first panel
412 *
413  k2 = 1
414  ELSE
415 *
416 * First panel
417 *
418  k2 = 0
419 *
420 * First update skips the first column
421 *
422  jb = jb - 1
423  END IF
424 *
425  DO j2 = j+1, n, nb
426  nj = min( nb, n-j2+1 )
427 *
428 * Update (J2, J2) diagonal block with ZGEMV
429 *
430  j3 = j2
431  DO mj = nj-1, 1, -1
432  CALL zgemm( 'No transpose', 'Conjugate transpose',
433  $ mj, 1, jb+1,
434  $ -one, work( (j3-j1+1)+k1*n ), n,
435  $ a( j3, j1-k2 ), lda,
436  $ one, a( j3, j3 ), lda )
437  j3 = j3 + 1
438  END DO
439 *
440 * Update off-diagonal block of J2-th block column with ZGEMM
441 *
442  CALL zgemm( 'No transpose', 'Conjugate transpose',
443  $ n-j3+1, nj, jb+1,
444  $ -one, work( (j3-j1+1)+k1*n ), n,
445  $ a( j2, j1-k2 ), lda,
446  $ one, a( j3, j2 ), lda )
447  END DO
448 *
449 * Recover T( J+1, J )
450 *
451  a( j+1, j ) = dconjg( alpha )
452  END IF
453 *
454 * WORK(J+1, 1) stores H(J+1, 1)
455 *
456  CALL zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
457  END IF
458  GO TO 11
459  END IF
460 *
461  20 CONTINUE
462  work( 1 ) = lwkopt
463  RETURN
464 *
465 * End of ZHETRF_AA
466 *
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:81
subroutine zscal(N, ZA, ZX, INCX)
ZSCAL
Definition: zscal.f:78
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:81
subroutine zgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
ZGEMV
Definition: zgemv.f:158
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:187
subroutine zlahef_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK)
ZLAHEF_AA
Definition: zlahef_aa.f:144
Here is the call graph for this function:
Here is the caller graph for this function: