LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ csytrf_aa()

subroutine csytrf_aa ( character  uplo,
integer  n,
complex, dimension( lda, * )  a,
integer  lda,
integer, dimension( * )  ipiv,
complex, dimension( * )  work,
integer  lwork,
integer  info 
)

CSYTRF_AA

Download CSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CSYTRF_AA computes the factorization of a complex symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**T*T*U  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a complex symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is COMPLEX array, dimension (LDA,N)
          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiagonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is COMPLEX array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file csytrf_aa.f.

132*
133* -- LAPACK computational routine --
134* -- LAPACK is a software package provided by Univ. of Tennessee, --
135* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136*
137 IMPLICIT NONE
138*
139* .. Scalar Arguments ..
140 CHARACTER UPLO
141 INTEGER N, LDA, LWORK, INFO
142* ..
143* .. Array Arguments ..
144 INTEGER IPIV( * )
145 COMPLEX A( LDA, * ), WORK( * )
146* ..
147*
148* =====================================================================
149* .. Parameters ..
150 COMPLEX ZERO, ONE
151 parameter( zero = 0.0e+0, one = 1.0e+0 )
152*
153* .. Local Scalars ..
154 LOGICAL LQUERY, UPPER
155 INTEGER J, LWKOPT
156 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157 COMPLEX ALPHA
158* ..
159* .. External Functions ..
160 LOGICAL LSAME
161 INTEGER ILAENV
162 REAL SROUNDUP_LWORK
163 EXTERNAL lsame, ilaenv, sroundup_lwork
164* ..
165* .. External Subroutines ..
166 EXTERNAL clasyf_aa, cgemm, cgemv, cscal, cswap, ccopy,
167 $ xerbla
168* ..
169* .. Intrinsic Functions ..
170 INTRINSIC max
171* ..
172* .. Executable Statements ..
173*
174* Determine the block size
175*
176 nb = ilaenv( 1, 'CSYTRF_AA', uplo, n, -1, -1, -1 )
177*
178* Test the input parameters.
179*
180 info = 0
181 upper = lsame( uplo, 'U' )
182 lquery = ( lwork.EQ.-1 )
183 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
184 info = -1
185 ELSE IF( n.LT.0 ) THEN
186 info = -2
187 ELSE IF( lda.LT.max( 1, n ) ) THEN
188 info = -4
189 ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
190 info = -7
191 END IF
192*
193 IF( info.EQ.0 ) THEN
194 lwkopt = (nb+1)*n
195 work( 1 ) = sroundup_lwork(lwkopt)
196 END IF
197*
198 IF( info.NE.0 ) THEN
199 CALL xerbla( 'CSYTRF_AA', -info )
200 RETURN
201 ELSE IF( lquery ) THEN
202 RETURN
203 END IF
204*
205* Quick return
206*
207 IF ( n.EQ.0 ) THEN
208 RETURN
209 ENDIF
210 ipiv( 1 ) = 1
211 IF ( n.EQ.1 ) THEN
212 RETURN
213 END IF
214*
215* Adjust block size based on the workspace size
216*
217 IF( lwork.LT.((1+nb)*n) ) THEN
218 nb = ( lwork-n ) / n
219 END IF
220*
221 IF( upper ) THEN
222*
223* .....................................................
224* Factorize A as U**T*D*U using the upper triangle of A
225* .....................................................
226*
227* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
228*
229 CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
230*
231* J is the main loop index, increasing from 1 to N in steps of
232* JB, where JB is the number of columns factorized by CLASYF;
233* JB is either NB, or N-J+1 for the last block
234*
235 j = 0
236 10 CONTINUE
237 IF( j.GE.n )
238 $ GO TO 20
239*
240* each step of the main loop
241* J is the last column of the previous panel
242* J1 is the first column of the current panel
243* K1 identifies if the previous column of the panel has been
244* explicitly stored, e.g., K1=1 for the first panel, and
245* K1=0 for the rest
246*
247 j1 = j + 1
248 jb = min( n-j1+1, nb )
249 k1 = max(1, j)-j
250*
251* Panel factorization
252*
253 CALL clasyf_aa( uplo, 2-k1, n-j, jb,
254 $ a( max(1, j), j+1 ), lda,
255 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
256*
257* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
258*
259 DO j2 = j+2, min(n, j+jb+1)
260 ipiv( j2 ) = ipiv( j2 ) + j
261 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
262 CALL cswap( j1-k1-2, a( 1, j2 ), 1,
263 $ a( 1, ipiv(j2) ), 1 )
264 END IF
265 END DO
266 j = j + jb
267*
268* Trailing submatrix update, where
269* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
270* WORK stores the current block of the auxiriarly matrix H
271*
272 IF( j.LT.n ) THEN
273*
274* If first panel and JB=1 (NB=1), then nothing to do
275*
276 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
277*
278* Merge rank-1 update with BLAS-3 update
279*
280 alpha = a( j, j+1 )
281 a( j, j+1 ) = one
282 CALL ccopy( n-j, a( j-1, j+1 ), lda,
283 $ work( (j+1-j1+1)+jb*n ), 1 )
284 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
285*
286* K1 identifies if the previous column of the panel has been
287* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
288* while K1=0 and K2=1 for the rest
289*
290 IF( j1.GT.1 ) THEN
291*
292* Not first panel
293*
294 k2 = 1
295 ELSE
296*
297* First panel
298*
299 k2 = 0
300*
301* First update skips the first column
302*
303 jb = jb - 1
304 END IF
305*
306 DO j2 = j+1, n, nb
307 nj = min( nb, n-j2+1 )
308*
309* Update (J2, J2) diagonal block with CGEMV
310*
311 j3 = j2
312 DO mj = nj-1, 1, -1
313 CALL cgemv( 'No transpose', mj, jb+1,
314 $ -one, work( j3-j1+1+k1*n ), n,
315 $ a( j1-k2, j3 ), 1,
316 $ one, a( j3, j3 ), lda )
317 j3 = j3 + 1
318 END DO
319*
320* Update off-diagonal block of J2-th block row with CGEMM
321*
322 CALL cgemm( 'Transpose', 'Transpose',
323 $ nj, n-j3+1, jb+1,
324 $ -one, a( j1-k2, j2 ), lda,
325 $ work( j3-j1+1+k1*n ), n,
326 $ one, a( j2, j3 ), lda )
327 END DO
328*
329* Recover T( J, J+1 )
330*
331 a( j, j+1 ) = alpha
332 END IF
333*
334* WORK(J+1, 1) stores H(J+1, 1)
335*
336 CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
337 END IF
338 GO TO 10
339 ELSE
340*
341* .....................................................
342* Factorize A as L*D*L**T using the lower triangle of A
343* .....................................................
344*
345* copy first column A(1:N, 1) into H(1:N, 1)
346* (stored in WORK(1:N))
347*
348 CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
349*
350* J is the main loop index, increasing from 1 to N in steps of
351* JB, where JB is the number of columns factorized by CLASYF;
352* JB is either NB, or N-J+1 for the last block
353*
354 j = 0
355 11 CONTINUE
356 IF( j.GE.n )
357 $ GO TO 20
358*
359* each step of the main loop
360* J is the last column of the previous panel
361* J1 is the first column of the current panel
362* K1 identifies if the previous column of the panel has been
363* explicitly stored, e.g., K1=1 for the first panel, and
364* K1=0 for the rest
365*
366 j1 = j+1
367 jb = min( n-j1+1, nb )
368 k1 = max(1, j)-j
369*
370* Panel factorization
371*
372 CALL clasyf_aa( uplo, 2-k1, n-j, jb,
373 $ a( j+1, max(1, j) ), lda,
374 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
375*
376* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
377*
378 DO j2 = j+2, min(n, j+jb+1)
379 ipiv( j2 ) = ipiv( j2 ) + j
380 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
381 CALL cswap( j1-k1-2, a( j2, 1 ), lda,
382 $ a( ipiv(j2), 1 ), lda )
383 END IF
384 END DO
385 j = j + jb
386*
387* Trailing submatrix update, where
388* A(J2+1, J1-1) stores L(J2+1, J1) and
389* WORK(J2+1, 1) stores H(J2+1, 1)
390*
391 IF( j.LT.n ) THEN
392*
393* if first panel and JB=1 (NB=1), then nothing to do
394*
395 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
396*
397* Merge rank-1 update with BLAS-3 update
398*
399 alpha = a( j+1, j )
400 a( j+1, j ) = one
401 CALL ccopy( n-j, a( j+1, j-1 ), 1,
402 $ work( (j+1-j1+1)+jb*n ), 1 )
403 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
404*
405* K1 identifies if the previous column of the panel has been
406* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
407* while K1=0 and K2=1 for the rest
408*
409 IF( j1.GT.1 ) THEN
410*
411* Not first panel
412*
413 k2 = 1
414 ELSE
415*
416* First panel
417*
418 k2 = 0
419*
420* First update skips the first column
421*
422 jb = jb - 1
423 END IF
424*
425 DO j2 = j+1, n, nb
426 nj = min( nb, n-j2+1 )
427*
428* Update (J2, J2) diagonal block with CGEMV
429*
430 j3 = j2
431 DO mj = nj-1, 1, -1
432 CALL cgemv( 'No transpose', mj, jb+1,
433 $ -one, work( j3-j1+1+k1*n ), n,
434 $ a( j3, j1-k2 ), lda,
435 $ one, a( j3, j3 ), 1 )
436 j3 = j3 + 1
437 END DO
438*
439* Update off-diagonal block in J2-th block column with CGEMM
440*
441 CALL cgemm( 'No transpose', 'Transpose',
442 $ n-j3+1, nj, jb+1,
443 $ -one, work( j3-j1+1+k1*n ), n,
444 $ a( j2, j1-k2 ), lda,
445 $ one, a( j3, j2 ), lda )
446 END DO
447*
448* Recover T( J+1, J )
449*
450 a( j+1, j ) = alpha
451 END IF
452*
453* WORK(J+1, 1) stores H(J+1, 1)
454*
455 CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
456 END IF
457 GO TO 11
458 END IF
459*
460 20 CONTINUE
461 work( 1 ) = sroundup_lwork(lwkopt)
462 RETURN
463*
464* End of CSYTRF_AA
465*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine cgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
CGEMV
Definition cgemv.f:160
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine clasyf_aa(uplo, j1, m, nb, a, lda, ipiv, h, ldh, work)
CLASYF_AA
Definition clasyf_aa.f:144
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine cscal(n, ca, cx, incx)
CSCAL
Definition cscal.f:78
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
Here is the call graph for this function:
Here is the caller graph for this function: