SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
cdbtrf.f
Go to the documentation of this file.
1 SUBROUTINE cdbtrf( M, N, KL, KU, AB, LDAB, INFO )
2*
3* -- ScaLAPACK auxiliary routine (version 2.0) --
4* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
5*
6* Written by Andrew J. Cleary, University of Tennessee.
7* August, 1996.
8* Modified from CGBTRF:
9* -- LAPACK routine (preliminary version) --
10* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11* Courant Institute, Argonne National Lab, and Rice University
12* August 6, 1991
13*
14* .. Scalar Arguments ..
15 INTEGER INFO, KL, KU, LDAB, M, N
16* ..
17* .. Array Arguments ..
18 COMPLEX AB( LDAB, * )
19* ..
20*
21* Purpose
22* =======
23*
24* Cdbtrf computes an LU factorization of a real m-by-n band matrix A
25* without using partial pivoting or row interchanges.
26*
27* This is the blocked version of the algorithm, calling Level 3 BLAS.
28*
29* Arguments
30* =========
31*
32* M (input) INTEGER
33* The number of rows of the matrix A. M >= 0.
34*
35* N (input) INTEGER
36* The number of columns of the matrix A. N >= 0.
37*
38* KL (input) INTEGER
39* The number of subdiagonals within the band of A. KL >= 0.
40*
41* KU (input) INTEGER
42* The number of superdiagonals within the band of A. KU >= 0.
43*
44* AB (input/output) REAL array, dimension (LDAB,N)
45* On entry, the matrix A in band storage, in rows KL+1 to
46* 2*KL+KU+1; rows 1 to KL of the array need not be set.
47* The j-th column of A is stored in the j-th column of the
48* array AB as follows:
49* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
50*
51* On exit, details of the factorization: U is stored as an
52* upper triangular band matrix with KL+KU superdiagonals in
53* rows 1 to KL+KU+1, and the multipliers used during the
54* factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
55* See below for further details.
56*
57* LDAB (input) INTEGER
58* The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
59*
60* INFO (output) INTEGER
61* = 0: successful exit
62* < 0: if INFO = -i, the i-th argument had an illegal value
63* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
64* has been completed, but the factor U is exactly
65* singular, and division by zero will occur if it is used
66* to solve a system of equations.
67*
68* Further Details
69* ===============
70*
71* The band storage scheme is illustrated by the following example, when
72* M = N = 6, KL = 2, KU = 1:
73*
74* On entry: On exit:
75*
76* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
77* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
78* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
79* a31 a42 a53 a64 * * m31 m42 m53 m64 * *
80*
81* Array elements marked * are not used by the routine.
82*
83* =====================================================================
84*
85* .. Parameters ..
86 REAL ONE, ZERO
87 parameter( one = 1.0e+0 )
88 parameter( zero = 0.0e+0 )
89 COMPLEX CONE, CZERO
90 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
91 parameter( czero = ( 0.0e+0, 0.0e+0 ) )
92 INTEGER NBMAX, LDWORK
93 parameter( nbmax = 64, ldwork = nbmax+1 )
94* ..
95* .. Local Scalars ..
96 INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,
97 $ JU, KM, KV, NB, NW
98* ..
99* .. Local Arrays ..
100 COMPLEX WORK13( LDWORK, NBMAX ),
101 $ WORK31( LDWORK, NBMAX )
102* ..
103* .. External Functions ..
104 INTEGER ILAENV, ISAMAX
105 EXTERNAL ilaenv, isamax
106* ..
107* .. External Subroutines ..
108 EXTERNAL ccopy, cdbtf2, cgemm, cgeru, cscal,
109 $ cswap, ctrsm, xerbla
110* ..
111* .. Intrinsic Functions ..
112 INTRINSIC max, min
113* ..
114* .. Executable Statements ..
115*
116* KV is the number of superdiagonals in the factor U
117*
118 kv = ku
119*
120* Test the input parameters.
121*
122 info = 0
123 IF( m.LT.0 ) THEN
124 info = -1
125 ELSE IF( n.LT.0 ) THEN
126 info = -2
127 ELSE IF( kl.LT.0 ) THEN
128 info = -3
129 ELSE IF( ku.LT.0 ) THEN
130 info = -4
131 ELSE IF( ldab.LT.min( min( kl+kv+1,m ),n ) ) THEN
132 info = -6
133 END IF
134 IF( info.NE.0 ) THEN
135 CALL xerbla( 'CDBTRF', -info )
136 RETURN
137 END IF
138*
139* Quick return if possible
140*
141 IF( m.EQ.0 .OR. n.EQ.0 )
142 $ RETURN
143*
144* Determine the block size for this environment
145*
146 nb = ilaenv( 1, 'CDBTRF', ' ', m, n, kl, ku )
147*
148* The block size must not exceed the limit set by the size of the
149* local arrays WORK13 and WORK31.
150*
151 nb = min( nb, nbmax )
152*
153 IF( nb.LE.1 .OR. nb.GT.kl ) THEN
154*
155* Use unblocked code
156*
157 CALL cdbtf2( m, n, kl, ku, ab, ldab, info )
158 ELSE
159*
160* Use blocked code
161*
162* Zero the superdiagonal elements of the work array WORK13
163*
164 DO 20 j = 1, nb
165 DO 10 i = 1, j - 1
166 work13( i, j ) = zero
167 10 CONTINUE
168 20 CONTINUE
169*
170* Zero the subdiagonal elements of the work array WORK31
171*
172 DO 40 j = 1, nb
173 DO 30 i = j + 1, nb
174 work31( i, j ) = zero
175 30 CONTINUE
176 40 CONTINUE
177*
178* JU is the index of the last column affected by the current
179* stage of the factorization
180*
181 ju = 1
182*
183 DO 180 j = 1, min( m, n ), nb
184 jb = min( nb, min( m, n )-j+1 )
185*
186* The active part of the matrix is partitioned
187*
188* A11 A12 A13
189* A21 A22 A23
190* A31 A32 A33
191*
192* Here A11, A21 and A31 denote the current block of JB columns
193* which is about to be factorized. The number of rows in the
194* partitioning are JB, I2, I3 respectively, and the numbers
195* of columns are JB, J2, J3. The superdiagonal elements of A13
196* and the subdiagonal elements of A31 lie outside the band.
197*
198 i2 = min( kl-jb, m-j-jb+1 )
199 i3 = min( jb, m-j-kl+1 )
200*
201* J2 and J3 are computed after JU has been updated.
202*
203* Factorize the current block of JB columns
204*
205 DO 80 jj = j, j + jb - 1
206*
207* Find pivot and test for singularity. KM is the number of
208* subdiagonal elements in the current column.
209*
210 km = min( kl, m-jj )
211 jp = 1
212 IF( ab( kv+jp, jj ).NE.zero ) THEN
213 ju = max( ju, min( jj+ku+jp-1, n ) )
214*
215* Compute multipliers
216*
217 CALL cscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
218 $ 1 )
219*
220* Update trailing submatrix within the band and within
221* the current block. JM is the index of the last column
222* which needs to be updated.
223*
224 jm = min( ju, j+jb-1 )
225 IF( jm.GT.jj ) THEN
226 CALL cgeru( km, jm-jj, -cone, ab( kv+2, jj ), 1,
227 $ ab( kv, jj+1 ), ldab-1,
228 $ ab( kv+1, jj+1 ), ldab-1 )
229 END IF
230 END IF
231*
232* Copy current column of A31 into the work array WORK31
233*
234 nw = min( jj-j+1, i3 )
235 IF( nw.GT.0 )
236 $ CALL ccopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
237 $ work31( 1, jj-j+1 ), 1 )
238 80 CONTINUE
239 IF( j+jb.LE.n ) THEN
240*
241* Apply the row interchanges to the other blocks.
242*
243 j2 = min( ju-j+1, kv ) - jb
244 j3 = max( 0, ju-j-kv+1 )
245*
246* Update the relevant part of the trailing submatrix
247*
248 IF( j2.GT.0 ) THEN
249*
250* Update A12
251*
252 CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit',
253 $ jb, j2, cone, ab( kv+1, j ), ldab-1,
254 $ ab( kv+1-jb, j+jb ), ldab-1 )
255*
256 IF( i2.GT.0 ) THEN
257*
258* Update A22
259*
260 CALL cgemm( 'No transpose', 'No transpose', i2, j2,
261 $ jb, -cone, ab( kv+1+jb, j ), ldab-1,
262 $ ab( kv+1-jb, j+jb ), ldab-1, cone,
263 $ ab( kv+1, j+jb ), ldab-1 )
264 END IF
265*
266 IF( i3.GT.0 ) THEN
267*
268* Update A32
269*
270 CALL cgemm( 'No transpose', 'No transpose', i3, j2,
271 $ jb, -cone, work31, ldwork,
272 $ ab( kv+1-jb, j+jb ), ldab-1, cone,
273 $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
274 END IF
275 END IF
276*
277 IF( j3.GT.0 ) THEN
278*
279* Copy the lower triangle of A13 into the work array
280* WORK13
281*
282 DO 130 jj = 1, j3
283 DO 120 ii = jj, jb
284 work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
285 120 CONTINUE
286 130 CONTINUE
287*
288* Update A13 in the work array
289*
290 CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit',
291 $ jb, j3, cone, ab( kv+1, j ), ldab-1,
292 $ work13, ldwork )
293*
294 IF( i2.GT.0 ) THEN
295*
296* Update A23
297*
298 CALL cgemm( 'No transpose', 'No transpose', i2, j3,
299 $ jb, -cone, ab( kv+1+jb, j ), ldab-1,
300 $ work13, ldwork, cone, ab( 1+jb, j+kv ),
301 $ ldab-1 )
302 END IF
303*
304 IF( i3.GT.0 ) THEN
305*
306* Update A33
307*
308 CALL cgemm( 'No transpose', 'No transpose', i3, j3,
309 $ jb, -cone, work31, ldwork, work13,
310 $ ldwork, cone, ab( 1+kl, j+kv ), ldab-1 )
311 END IF
312*
313* Copy the lower triangle of A13 back into place
314*
315 DO 150 jj = 1, j3
316 DO 140 ii = jj, jb
317 ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
318 140 CONTINUE
319 150 CONTINUE
320 END IF
321 ELSE
322 END IF
323*
324* copy the upper triangle of A31 back into place
325*
326 DO 170 jj = j + jb - 1, j, -1
327*
328* Copy the current column of A31 back into place
329*
330 nw = min( i3, jj-j+1 )
331 IF( nw.GT.0 )
332 $ CALL ccopy( nw, work31( 1, jj-j+1 ), 1,
333 $ ab( kv+kl+1-jj+j, jj ), 1 )
334 170 CONTINUE
335 180 CONTINUE
336 END IF
337*
338 RETURN
339*
340* End of CDBTRF
341*
342 END
subroutine cdbtf2(m, n, kl, ku, ab, ldab, info)
Definition cdbtf2.f:2
subroutine cdbtrf(m, n, kl, ku, ab, ldab, info)
Definition cdbtrf.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181