LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ ssytrf_aa()

subroutine ssytrf_aa ( character  uplo,
integer  n,
real, dimension( lda, * )  a,
integer  lda,
integer, dimension( * )  ipiv,
real, dimension( * )  work,
integer  lwork,
integer  info 
)

SSYTRF_AA

Download SSYTRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSYTRF_AA computes the factorization of a real symmetric matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**T*T*U  or  A = L*T*L**T

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a symmetric tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is REAL array, dimension (LDA,N)
          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiagonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK.  LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file ssytrf_aa.f.

132*
133* -- LAPACK computational routine --
134* -- LAPACK is a software package provided by Univ. of Tennessee, --
135* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136*
137 IMPLICIT NONE
138*
139* .. Scalar Arguments ..
140 CHARACTER UPLO
141 INTEGER N, LDA, LWORK, INFO
142* ..
143* .. Array Arguments ..
144 INTEGER IPIV( * )
145 REAL A( LDA, * ), WORK( * )
146* ..
147*
148* =====================================================================
149* .. Parameters ..
150 REAL ZERO, ONE
151 parameter( zero = 0.0e+0, one = 1.0e+0 )
152*
153* .. Local Scalars ..
154 LOGICAL LQUERY, UPPER
155 INTEGER J, LWKOPT
156 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157 REAL ALPHA
158* ..
159* .. External Functions ..
160 LOGICAL LSAME
161 INTEGER ILAENV
162 REAL SROUNDUP_LWORK
163 EXTERNAL lsame, ilaenv, sroundup_lwork
164* ..
165* .. External Subroutines ..
166 EXTERNAL slasyf_aa, sgemv, sscal, scopy, sswap, sgemm,
167 $ xerbla
168* ..
169* .. Intrinsic Functions ..
170 INTRINSIC max
171* ..
172* .. Executable Statements ..
173*
174* Determine the block size
175*
176 nb = ilaenv( 1, 'SSYTRF_AA', uplo, n, -1, -1, -1 )
177*
178* Test the input parameters.
179*
180 info = 0
181 upper = lsame( uplo, 'U' )
182 lquery = ( lwork.EQ.-1 )
183 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
184 info = -1
185 ELSE IF( n.LT.0 ) THEN
186 info = -2
187 ELSE IF( lda.LT.max( 1, n ) ) THEN
188 info = -4
189 ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
190 info = -7
191 END IF
192*
193 IF( info.EQ.0 ) THEN
194 lwkopt = (nb+1)*n
195 work( 1 ) = sroundup_lwork(lwkopt)
196 END IF
197*
198 IF( info.NE.0 ) THEN
199 CALL xerbla( 'SSYTRF_AA', -info )
200 RETURN
201 ELSE IF( lquery ) THEN
202 RETURN
203 END IF
204*
205* Quick return
206*
207 IF ( n.EQ.0 ) THEN
208 RETURN
209 ENDIF
210 ipiv( 1 ) = 1
211 IF ( n.EQ.1 ) THEN
212 RETURN
213 END IF
214*
215* Adjust block size based on the workspace size
216*
217 IF( lwork.LT.((1+nb)*n) ) THEN
218 nb = ( lwork-n ) / n
219 END IF
220*
221 IF( upper ) THEN
222*
223* .....................................................
224* Factorize A as U**T*D*U using the upper triangle of A
225* .....................................................
226*
227* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
228*
229 CALL scopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
230*
231* J is the main loop index, increasing from 1 to N in steps of
232* JB, where JB is the number of columns factorized by SLASYF;
233* JB is either NB, or N-J+1 for the last block
234*
235 j = 0
236 10 CONTINUE
237 IF( j.GE.n )
238 $ GO TO 20
239*
240* each step of the main loop
241* J is the last column of the previous panel
242* J1 is the first column of the current panel
243* K1 identifies if the previous column of the panel has been
244* explicitly stored, e.g., K1=1 for the first panel, and
245* K1=0 for the rest
246*
247 j1 = j + 1
248 jb = min( n-j1+1, nb )
249 k1 = max(1, j)-j
250*
251* Panel factorization
252*
253 CALL slasyf_aa( uplo, 2-k1, n-j, jb,
254 $ a( max(1, j), j+1 ), lda,
255 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
256*
257* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
258*
259 DO j2 = j+2, min(n, j+jb+1)
260 ipiv( j2 ) = ipiv( j2 ) + j
261 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
262 CALL sswap( j1-k1-2, a( 1, j2 ), 1,
263 $ a( 1, ipiv(j2) ), 1 )
264 END IF
265 END DO
266 j = j + jb
267*
268* Trailing submatrix update, where
269* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
270* WORK stores the current block of the auxiriarly matrix H
271*
272 IF( j.LT.n ) THEN
273*
274* If first panel and JB=1 (NB=1), then nothing to do
275*
276 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
277*
278* Merge rank-1 update with BLAS-3 update
279*
280 alpha = a( j, j+1 )
281 a( j, j+1 ) = one
282 CALL scopy( n-j, a( j-1, j+1 ), lda,
283 $ work( (j+1-j1+1)+jb*n ), 1 )
284 CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
285*
286* K1 identifies if the previous column of the panel has been
287* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
288* while K1=0 and K2=1 for the rest
289*
290 IF( j1.GT.1 ) THEN
291*
292* Not first panel
293*
294 k2 = 1
295 ELSE
296*
297* First panel
298*
299 k2 = 0
300*
301* First update skips the first column
302*
303 jb = jb - 1
304 END IF
305*
306 DO j2 = j+1, n, nb
307 nj = min( nb, n-j2+1 )
308*
309* Update (J2, J2) diagonal block with SGEMV
310*
311 j3 = j2
312 DO mj = nj-1, 1, -1
313 CALL sgemv( 'No transpose', mj, jb+1,
314 $ -one, work( j3-j1+1+k1*n ), n,
315 $ a( j1-k2, j3 ), 1,
316 $ one, a( j3, j3 ), lda )
317 j3 = j3 + 1
318 END DO
319*
320* Update off-diagonal block of J2-th block row with SGEMM
321*
322 CALL sgemm( 'Transpose', 'Transpose',
323 $ nj, n-j3+1, jb+1,
324 $ -one, a( j1-k2, j2 ), lda,
325 $ work( j3-j1+1+k1*n ), n,
326 $ one, a( j2, j3 ), lda )
327 END DO
328*
329* Recover T( J, J+1 )
330*
331 a( j, j+1 ) = alpha
332 END IF
333*
334* WORK(J+1, 1) stores H(J+1, 1)
335*
336 CALL scopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
337 END IF
338 GO TO 10
339 ELSE
340*
341* .....................................................
342* Factorize A as L*D*L**T using the lower triangle of A
343* .....................................................
344*
345* copy first column A(1:N, 1) into H(1:N, 1)
346* (stored in WORK(1:N))
347*
348 CALL scopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
349*
350* J is the main loop index, increasing from 1 to N in steps of
351* JB, where JB is the number of columns factorized by SLASYF;
352* JB is either NB, or N-J+1 for the last block
353*
354 j = 0
355 11 CONTINUE
356 IF( j.GE.n )
357 $ GO TO 20
358*
359* each step of the main loop
360* J is the last column of the previous panel
361* J1 is the first column of the current panel
362* K1 identifies if the previous column of the panel has been
363* explicitly stored, e.g., K1=1 for the first panel, and
364* K1=0 for the rest
365*
366 j1 = j+1
367 jb = min( n-j1+1, nb )
368 k1 = max(1, j)-j
369*
370* Panel factorization
371*
372 CALL slasyf_aa( uplo, 2-k1, n-j, jb,
373 $ a( j+1, max(1, j) ), lda,
374 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
375*
376* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
377*
378 DO j2 = j+2, min(n, j+jb+1)
379 ipiv( j2 ) = ipiv( j2 ) + j
380 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
381 CALL sswap( j1-k1-2, a( j2, 1 ), lda,
382 $ a( ipiv(j2), 1 ), lda )
383 END IF
384 END DO
385 j = j + jb
386*
387* Trailing submatrix update, where
388* A(J2+1, J1-1) stores L(J2+1, J1) and
389* WORK(J2+1, 1) stores H(J2+1, 1)
390*
391 IF( j.LT.n ) THEN
392*
393* if first panel and JB=1 (NB=1), then nothing to do
394*
395 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
396*
397* Merge rank-1 update with BLAS-3 update
398*
399 alpha = a( j+1, j )
400 a( j+1, j ) = one
401 CALL scopy( n-j, a( j+1, j-1 ), 1,
402 $ work( (j+1-j1+1)+jb*n ), 1 )
403 CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
404*
405* K1 identifies if the previous column of the panel has been
406* explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
407* while K1=0 and K2=1 for the rest
408*
409 IF( j1.GT.1 ) THEN
410*
411* Not first panel
412*
413 k2 = 1
414 ELSE
415*
416* First panel
417*
418 k2 = 0
419*
420* First update skips the first column
421*
422 jb = jb - 1
423 END IF
424*
425 DO j2 = j+1, n, nb
426 nj = min( nb, n-j2+1 )
427*
428* Update (J2, J2) diagonal block with SGEMV
429*
430 j3 = j2
431 DO mj = nj-1, 1, -1
432 CALL sgemv( 'No transpose', mj, jb+1,
433 $ -one, work( j3-j1+1+k1*n ), n,
434 $ a( j3, j1-k2 ), lda,
435 $ one, a( j3, j3 ), 1 )
436 j3 = j3 + 1
437 END DO
438*
439* Update off-diagonal block in J2-th block column with SGEMM
440*
441 CALL sgemm( 'No transpose', 'Transpose',
442 $ n-j3+1, nj, jb+1,
443 $ -one, work( j3-j1+1+k1*n ), n,
444 $ a( j2, j1-k2 ), lda,
445 $ one, a( j3, j2 ), lda )
446 END DO
447*
448* Recover T( J+1, J )
449*
450 a( j+1, j ) = alpha
451 END IF
452*
453* WORK(J+1, 1) stores H(J+1, 1)
454*
455 CALL scopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
456 END IF
457 GO TO 11
458 END IF
459*
460 20 CONTINUE
461 work( 1 ) = sroundup_lwork(lwkopt)
462 RETURN
463*
464* End of SSYTRF_AA
465*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
Definition scopy.f:82
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
SGEMV
Definition sgemv.f:158
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine slasyf_aa(uplo, j1, m, nb, a, lda, ipiv, h, ldh, work)
SLASYF_AA
Definition slasyf_aa.f:144
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine sscal(n, sa, sx, incx)
SSCAL
Definition sscal.f:79
subroutine sswap(n, sx, incx, sy, incy)
SSWAP
Definition sswap.f:82
Here is the call graph for this function:
Here is the caller graph for this function: