LAPACK  3.8.0
LAPACK: Linear Algebra PACKage
ssytrf_aa.f
Go to the documentation of this file.
1 *> \brief \b SSYTRF_AA
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SSYTRF_AA + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytrf_aa.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytrf_aa.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytrf_aa.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
22 *
23 * .. Scalar Arguments ..
24 * CHARACTER UPLO
25 * INTEGER N, LDA, LWORK, INFO
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * REAL A( LDA, * ), WORK( * )
30 * ..
31 *
32 *> \par Purpose:
33 * =============
34 *>
35 *> \verbatim
36 *>
37 *> SSYTRF_AA computes the factorization of a real symmetric matrix A
38 *> using the Aasen's algorithm. The form of the factorization is
39 *>
40 *> A = U*T*U**T or A = L*T*L**T
41 *>
42 *> where U (or L) is a product of permutation and unit upper (lower)
43 *> triangular matrices, and T is a symmetric tridiagonal matrix.
44 *>
45 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
46 *> \endverbatim
47 *
48 * Arguments:
49 * ==========
50 *
51 *> \param[in] UPLO
52 *> \verbatim
53 *> UPLO is CHARACTER*1
54 *> = 'U': Upper triangle of A is stored;
55 *> = 'L': Lower triangle of A is stored.
56 *> \endverbatim
57 *>
58 *> \param[in] N
59 *> \verbatim
60 *> N is INTEGER
61 *> The order of the matrix A. N >= 0.
62 *> \endverbatim
63 *>
64 *> \param[in,out] A
65 *> \verbatim
66 *> A is REAL array, dimension (LDA,N)
67 *> On entry, the symmetric matrix A. If UPLO = 'U', the leading
68 *> N-by-N upper triangular part of A contains the upper
69 *> triangular part of the matrix A, and the strictly lower
70 *> triangular part of A is not referenced. If UPLO = 'L', the
71 *> leading N-by-N lower triangular part of A contains the lower
72 *> triangular part of the matrix A, and the strictly upper
73 *> triangular part of A is not referenced.
74 *>
75 *> On exit, the tridiagonal matrix is stored in the diagonals
76 *> and the subdiagonals of A just below (or above) the diagonals,
77 *> and L is stored below (or above) the subdiaonals, when UPLO
78 *> is 'L' (or 'U').
79 *> \endverbatim
80 *>
81 *> \param[in] LDA
82 *> \verbatim
83 *> LDA is INTEGER
84 *> The leading dimension of the array A. LDA >= max(1,N).
85 *> \endverbatim
86 *>
87 *> \param[out] IPIV
88 *> \verbatim
89 *> IPIV is INTEGER array, dimension (N)
90 *> On exit, it contains the details of the interchanges, i.e.,
91 *> the row and column k of A were interchanged with the
92 *> row and column IPIV(k).
93 *> \endverbatim
94 *>
95 *> \param[out] WORK
96 *> \verbatim
97 *> WORK is REAL array, dimension (MAX(1,LWORK))
98 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99 *> \endverbatim
100 *>
101 *> \param[in] LWORK
102 *> \verbatim
103 *> LWORK is INTEGER
104 *> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
105 *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
106 *>
107 *> If LWORK = -1, then a workspace query is assumed; the routine
108 *> only calculates the optimal size of the WORK array, returns
109 *> this value as the first entry of the WORK array, and no error
110 *> message related to LWORK is issued by XERBLA.
111 *> \endverbatim
112 *>
113 *> \param[out] INFO
114 *> \verbatim
115 *> INFO is INTEGER
116 *> = 0: successful exit
117 *> < 0: if INFO = -i, the i-th argument had an illegal value.
118 *> \endverbatim
119 *
120 * Authors:
121 * ========
122 *
123 *> \author Univ. of Tennessee
124 *> \author Univ. of California Berkeley
125 *> \author Univ. of Colorado Denver
126 *> \author NAG Ltd.
127 *
128 *> \date November 2017
129 *
130 *> \ingroup realSYcomputational
131 *
132 * =====================================================================
133  SUBROUTINE ssytrf_aa( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
134 *
135 * -- LAPACK computational routine (version 3.8.0) --
136 * -- LAPACK is a software package provided by Univ. of Tennessee, --
137 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
138 * November 2017
139 *
140  IMPLICIT NONE
141 *
142 * .. Scalar Arguments ..
143  CHARACTER UPLO
144  INTEGER N, LDA, LWORK, INFO
145 * ..
146 * .. Array Arguments ..
147  INTEGER IPIV( * )
148  REAL A( lda, * ), WORK( * )
149 * ..
150 *
151 * =====================================================================
152 * .. Parameters ..
153  REAL ZERO, ONE
154  parameter( zero = 0.0e+0, one = 1.0e+0 )
155 *
156 * .. Local Scalars ..
157  LOGICAL LQUERY, UPPER
158  INTEGER J, LWKOPT
159  INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
160  REAL ALPHA
161 * ..
162 * .. External Functions ..
163  LOGICAL LSAME
164  INTEGER ILAENV
165  EXTERNAL lsame, ilaenv
166 * ..
167 * .. External Subroutines ..
168  EXTERNAL slasyf_aa, sgemv, sscal, scopy, sswap, sgemm,
169  $ xerbla
170 * ..
171 * .. Intrinsic Functions ..
172  INTRINSIC max
173 * ..
174 * .. Executable Statements ..
175 *
176 * Determine the block size
177 *
178  nb = ilaenv( 1, 'SSYTRF_AA', uplo, n, -1, -1, -1 )
179 *
180 * Test the input parameters.
181 *
182  info = 0
183  upper = lsame( uplo, 'U' )
184  lquery = ( lwork.EQ.-1 )
185  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
186  info = -1
187  ELSE IF( n.LT.0 ) THEN
188  info = -2
189  ELSE IF( lda.LT.max( 1, n ) ) THEN
190  info = -4
191  ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
192  info = -7
193  END IF
194 *
195  IF( info.EQ.0 ) THEN
196  lwkopt = (nb+1)*n
197  work( 1 ) = lwkopt
198  END IF
199 *
200  IF( info.NE.0 ) THEN
201  CALL xerbla( 'SSYTRF_AA', -info )
202  RETURN
203  ELSE IF( lquery ) THEN
204  RETURN
205  END IF
206 *
207 * Quick return
208 *
209  IF ( n.EQ.0 ) THEN
210  RETURN
211  ENDIF
212  ipiv( 1 ) = 1
213  IF ( n.EQ.1 ) THEN
214  RETURN
215  END IF
216 *
217 * Adjust block size based on the workspace size
218 *
219  IF( lwork.LT.((1+nb)*n) ) THEN
220  nb = ( lwork-n ) / n
221  END IF
222 *
223  IF( upper ) THEN
224 *
225 * .....................................................
226 * Factorize A as L*D*L**T using the upper triangle of A
227 * .....................................................
228 *
229 * Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
230 *
231  CALL scopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
232 *
233 * J is the main loop index, increasing from 1 to N in steps of
234 * JB, where JB is the number of columns factorized by SLASYF;
235 * JB is either NB, or N-J+1 for the last block
236 *
237  j = 0
238  10 CONTINUE
239  IF( j.GE.n )
240  $ GO TO 20
241 *
242 * each step of the main loop
243 * J is the last column of the previous panel
244 * J1 is the first column of the current panel
245 * K1 identifies if the previous column of the panel has been
246 * explicitly stored, e.g., K1=1 for the first panel, and
247 * K1=0 for the rest
248 *
249  j1 = j + 1
250  jb = min( n-j1+1, nb )
251  k1 = max(1, j)-j
252 *
253 * Panel factorization
254 *
255  CALL slasyf_aa( uplo, 2-k1, n-j, jb,
256  $ a( max(1, j), j+1 ), lda,
257  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
258 *
259 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
260 *
261  DO j2 = j+2, min(n, j+jb+1)
262  ipiv( j2 ) = ipiv( j2 ) + j
263  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
264  CALL sswap( j1-k1-2, a( 1, j2 ), 1,
265  $ a( 1, ipiv(j2) ), 1 )
266  END IF
267  END DO
268  j = j + jb
269 *
270 * Trailing submatrix update, where
271 * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
272 * WORK stores the current block of the auxiriarly matrix H
273 *
274  IF( j.LT.n ) THEN
275 *
276 * If first panel and JB=1 (NB=1), then nothing to do
277 *
278  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
279 *
280 * Merge rank-1 update with BLAS-3 update
281 *
282  alpha = a( j, j+1 )
283  a( j, j+1 ) = one
284  CALL scopy( n-j, a( j-1, j+1 ), lda,
285  $ work( (j+1-j1+1)+jb*n ), 1 )
286  CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
287 *
288 * K1 identifies if the previous column of the panel has been
289 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
290 * while K1=0 and K2=1 for the rest
291 *
292  IF( j1.GT.1 ) THEN
293 *
294 * Not first panel
295 *
296  k2 = 1
297  ELSE
298 *
299 * First panel
300 *
301  k2 = 0
302 *
303 * First update skips the first column
304 *
305  jb = jb - 1
306  END IF
307 *
308  DO j2 = j+1, n, nb
309  nj = min( nb, n-j2+1 )
310 *
311 * Update (J2, J2) diagonal block with SGEMV
312 *
313  j3 = j2
314  DO mj = nj-1, 1, -1
315  CALL sgemv( 'No transpose', mj, jb+1,
316  $ -one, work( j3-j1+1+k1*n ), n,
317  $ a( j1-k2, j3 ), 1,
318  $ one, a( j3, j3 ), lda )
319  j3 = j3 + 1
320  END DO
321 *
322 * Update off-diagonal block of J2-th block row with SGEMM
323 *
324  CALL sgemm( 'Transpose', 'Transpose',
325  $ nj, n-j3+1, jb+1,
326  $ -one, a( j1-k2, j2 ), lda,
327  $ work( j3-j1+1+k1*n ), n,
328  $ one, a( j2, j3 ), lda )
329  END DO
330 *
331 * Recover T( J, J+1 )
332 *
333  a( j, j+1 ) = alpha
334  END IF
335 *
336 * WORK(J+1, 1) stores H(J+1, 1)
337 *
338  CALL scopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
339  END IF
340  GO TO 10
341  ELSE
342 *
343 * .....................................................
344 * Factorize A as L*D*L**T using the lower triangle of A
345 * .....................................................
346 *
347 * copy first column A(1:N, 1) into H(1:N, 1)
348 * (stored in WORK(1:N))
349 *
350  CALL scopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
351 *
352 * J is the main loop index, increasing from 1 to N in steps of
353 * JB, where JB is the number of columns factorized by SLASYF;
354 * JB is either NB, or N-J+1 for the last block
355 *
356  j = 0
357  11 CONTINUE
358  IF( j.GE.n )
359  $ GO TO 20
360 *
361 * each step of the main loop
362 * J is the last column of the previous panel
363 * J1 is the first column of the current panel
364 * K1 identifies if the previous column of the panel has been
365 * explicitly stored, e.g., K1=1 for the first panel, and
366 * K1=0 for the rest
367 *
368  j1 = j+1
369  jb = min( n-j1+1, nb )
370  k1 = max(1, j)-j
371 *
372 * Panel factorization
373 *
374  CALL slasyf_aa( uplo, 2-k1, n-j, jb,
375  $ a( j+1, max(1, j) ), lda,
376  $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
377 *
378 * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
379 *
380  DO j2 = j+2, min(n, j+jb+1)
381  ipiv( j2 ) = ipiv( j2 ) + j
382  IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
383  CALL sswap( j1-k1-2, a( j2, 1 ), lda,
384  $ a( ipiv(j2), 1 ), lda )
385  END IF
386  END DO
387  j = j + jb
388 *
389 * Trailing submatrix update, where
390 * A(J2+1, J1-1) stores L(J2+1, J1) and
391 * WORK(J2+1, 1) stores H(J2+1, 1)
392 *
393  IF( j.LT.n ) THEN
394 *
395 * if first panel and JB=1 (NB=1), then nothing to do
396 *
397  IF( j1.GT.1 .OR. jb.GT.1 ) THEN
398 *
399 * Merge rank-1 update with BLAS-3 update
400 *
401  alpha = a( j+1, j )
402  a( j+1, j ) = one
403  CALL scopy( n-j, a( j+1, j-1 ), 1,
404  $ work( (j+1-j1+1)+jb*n ), 1 )
405  CALL sscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
406 *
407 * K1 identifies if the previous column of the panel has been
408 * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
409 * while K1=0 and K2=1 for the rest
410 *
411  IF( j1.GT.1 ) THEN
412 *
413 * Not first panel
414 *
415  k2 = 1
416  ELSE
417 *
418 * First panel
419 *
420  k2 = 0
421 *
422 * First update skips the first column
423 *
424  jb = jb - 1
425  END IF
426 *
427  DO j2 = j+1, n, nb
428  nj = min( nb, n-j2+1 )
429 *
430 * Update (J2, J2) diagonal block with SGEMV
431 *
432  j3 = j2
433  DO mj = nj-1, 1, -1
434  CALL sgemv( 'No transpose', mj, jb+1,
435  $ -one, work( j3-j1+1+k1*n ), n,
436  $ a( j3, j1-k2 ), lda,
437  $ one, a( j3, j3 ), 1 )
438  j3 = j3 + 1
439  END DO
440 *
441 * Update off-diagonal block in J2-th block column with SGEMM
442 *
443  CALL sgemm( 'No transpose', 'Transpose',
444  $ n-j3+1, nj, jb+1,
445  $ -one, work( j3-j1+1+k1*n ), n,
446  $ a( j2, j1-k2 ), lda,
447  $ one, a( j3, j2 ), lda )
448  END DO
449 *
450 * Recover T( J+1, J )
451 *
452  a( j+1, j ) = alpha
453  END IF
454 *
455 * WORK(J+1, 1) stores H(J+1, 1)
456 *
457  CALL scopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
458  END IF
459  GO TO 11
460  END IF
461 *
462  20 CONTINUE
463  RETURN
464 *
465 * End of SSYTRF_AA
466 *
467  END
subroutine scopy(N, SX, INCX, SY, INCY)
SCOPY
Definition: scopy.f:84
subroutine slasyf_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK)
SLASYF_AA
Definition: slasyf_aa.f:146
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine sswap(N, SX, INCX, SY, INCY)
SSWAP
Definition: sswap.f:84
subroutine sgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
SGEMV
Definition: sgemv.f:158
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:81
subroutine ssytrf_aa(UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
SSYTRF_AA
Definition: ssytrf_aa.f:134