ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
cdbtrf.f
Go to the documentation of this file.
1  SUBROUTINE cdbtrf( M, N, KL, KU, AB, LDAB, INFO )
2 *
3 * -- ScaLAPACK auxiliary routine (version 2.0) --
4 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
5 *
6 * Written by Andrew J. Cleary, University of Tennessee.
7 * August, 1996.
8 * Modified from CGBTRF:
9 * -- LAPACK routine (preliminary version) --
10 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11 * Courant Institute, Argonne National Lab, and Rice University
12 * August 6, 1991
13 *
14 * .. Scalar Arguments ..
15  INTEGER INFO, KL, KU, LDAB, M, N
16 * ..
17 * .. Array Arguments ..
18  COMPLEX AB( LDAB, * )
19 * ..
20 *
21 * Purpose
22 * =======
23 *
24 * Cdbtrf computes an LU factorization of a real m-by-n band matrix A
25 * without using partial pivoting or row interchanges.
26 *
27 * This is the blocked version of the algorithm, calling Level 3 BLAS.
28 *
29 * Arguments
30 * =========
31 *
32 * M (input) INTEGER
33 * The number of rows of the matrix A. M >= 0.
34 *
35 * N (input) INTEGER
36 * The number of columns of the matrix A. N >= 0.
37 *
38 * KL (input) INTEGER
39 * The number of subdiagonals within the band of A. KL >= 0.
40 *
41 * KU (input) INTEGER
42 * The number of superdiagonals within the band of A. KU >= 0.
43 *
44 * AB (input/output) REAL array, dimension (LDAB,N)
45 * On entry, the matrix A in band storage, in rows KL+1 to
46 * 2*KL+KU+1; rows 1 to KL of the array need not be set.
47 * The j-th column of A is stored in the j-th column of the
48 * array AB as follows:
49 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
50 *
51 * On exit, details of the factorization: U is stored as an
52 * upper triangular band matrix with KL+KU superdiagonals in
53 * rows 1 to KL+KU+1, and the multipliers used during the
54 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
55 * See below for further details.
56 *
57 * LDAB (input) INTEGER
58 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
59 *
60 * INFO (output) INTEGER
61 * = 0: successful exit
62 * < 0: if INFO = -i, the i-th argument had an illegal value
63 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
64 * has been completed, but the factor U is exactly
65 * singular, and division by zero will occur if it is used
66 * to solve a system of equations.
67 *
68 * Further Details
69 * ===============
70 *
71 * The band storage scheme is illustrated by the following example, when
72 * M = N = 6, KL = 2, KU = 1:
73 *
74 * On entry: On exit:
75 *
76 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
77 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
78 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
79 * a31 a42 a53 a64 * * m31 m42 m53 m64 * *
80 *
81 * Array elements marked * are not used by the routine.
82 *
83 * =====================================================================
84 *
85 * .. Parameters ..
86  REAL ONE, ZERO
87  parameter( one = 1.0e+0 )
88  parameter( zero = 0.0e+0 )
89  COMPLEX CONE, CZERO
90  parameter( cone = ( 1.0e+0, 0.0e+0 ) )
91  parameter( czero = ( 0.0e+0, 0.0e+0 ) )
92  INTEGER NBMAX, LDWORK
93  parameter( nbmax = 64, ldwork = nbmax+1 )
94 * ..
95 * .. Local Scalars ..
96  INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,
97  $ JU, KM, KV, NB, NW
98 * ..
99 * .. Local Arrays ..
100  COMPLEX WORK13( LDWORK, NBMAX ),
101  $ WORK31( LDWORK, NBMAX )
102 * ..
103 * .. External Functions ..
104  INTEGER ILAENV, ISAMAX
105  EXTERNAL ilaenv, isamax
106 * ..
107 * .. External Subroutines ..
108  EXTERNAL ccopy, cdbtf2, cgemm, cgeru, cscal,
109  $ cswap, ctrsm, xerbla
110 * ..
111 * .. Intrinsic Functions ..
112  INTRINSIC max, min
113 * ..
114 * .. Executable Statements ..
115 *
116 * KV is the number of superdiagonals in the factor U
117 *
118  kv = ku
119 *
120 * Test the input parameters.
121 *
122  info = 0
123  IF( m.LT.0 ) THEN
124  info = -1
125  ELSE IF( n.LT.0 ) THEN
126  info = -2
127  ELSE IF( kl.LT.0 ) THEN
128  info = -3
129  ELSE IF( ku.LT.0 ) THEN
130  info = -4
131  ELSE IF( ldab.LT.min( min( kl+kv+1,m ),n ) ) THEN
132  info = -6
133  END IF
134  IF( info.NE.0 ) THEN
135  CALL xerbla( 'CDBTRF', -info )
136  RETURN
137  END IF
138 *
139 * Quick return if possible
140 *
141  IF( m.EQ.0 .OR. n.EQ.0 )
142  $ RETURN
143 *
144 * Determine the block size for this environment
145 *
146  nb = ilaenv( 1, 'CDBTRF', ' ', m, n, kl, ku )
147 *
148 * The block size must not exceed the limit set by the size of the
149 * local arrays WORK13 and WORK31.
150 *
151  nb = min( nb, nbmax )
152 *
153  IF( nb.LE.1 .OR. nb.GT.kl ) THEN
154 *
155 * Use unblocked code
156 *
157  CALL cdbtf2( m, n, kl, ku, ab, ldab, info )
158  ELSE
159 *
160 * Use blocked code
161 *
162 * Zero the superdiagonal elements of the work array WORK13
163 *
164  DO 20 j = 1, nb
165  DO 10 i = 1, j - 1
166  work13( i, j ) = zero
167  10 CONTINUE
168  20 CONTINUE
169 *
170 * Zero the subdiagonal elements of the work array WORK31
171 *
172  DO 40 j = 1, nb
173  DO 30 i = j + 1, nb
174  work31( i, j ) = zero
175  30 CONTINUE
176  40 CONTINUE
177 *
178 * JU is the index of the last column affected by the current
179 * stage of the factorization
180 *
181  ju = 1
182 *
183  DO 180 j = 1, min( m, n ), nb
184  jb = min( nb, min( m, n )-j+1 )
185 *
186 * The active part of the matrix is partitioned
187 *
188 * A11 A12 A13
189 * A21 A22 A23
190 * A31 A32 A33
191 *
192 * Here A11, A21 and A31 denote the current block of JB columns
193 * which is about to be factorized. The number of rows in the
194 * partitioning are JB, I2, I3 respectively, and the numbers
195 * of columns are JB, J2, J3. The superdiagonal elements of A13
196 * and the subdiagonal elements of A31 lie outside the band.
197 *
198  i2 = min( kl-jb, m-j-jb+1 )
199  i3 = min( jb, m-j-kl+1 )
200 *
201 * J2 and J3 are computed after JU has been updated.
202 *
203 * Factorize the current block of JB columns
204 *
205  DO 80 jj = j, j + jb - 1
206 *
207 * Find pivot and test for singularity. KM is the number of
208 * subdiagonal elements in the current column.
209 *
210  km = min( kl, m-jj )
211  jp = 1
212  IF( ab( kv+jp, jj ).NE.zero ) THEN
213  ju = max( ju, min( jj+ku+jp-1, n ) )
214 *
215 * Compute multipliers
216 *
217  CALL cscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
218  $ 1 )
219 *
220 * Update trailing submatrix within the band and within
221 * the current block. JM is the index of the last column
222 * which needs to be updated.
223 *
224  jm = min( ju, j+jb-1 )
225  IF( jm.GT.jj ) THEN
226  CALL cgeru( km, jm-jj, -cone, ab( kv+2, jj ), 1,
227  $ ab( kv, jj+1 ), ldab-1,
228  $ ab( kv+1, jj+1 ), ldab-1 )
229  END IF
230  END IF
231 *
232 * Copy current column of A31 into the work array WORK31
233 *
234  nw = min( jj-j+1, i3 )
235  IF( nw.GT.0 )
236  $ CALL ccopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
237  $ work31( 1, jj-j+1 ), 1 )
238  80 CONTINUE
239  IF( j+jb.LE.n ) THEN
240 *
241 * Apply the row interchanges to the other blocks.
242 *
243  j2 = min( ju-j+1, kv ) - jb
244  j3 = max( 0, ju-j-kv+1 )
245 *
246 * Update the relevant part of the trailing submatrix
247 *
248  IF( j2.GT.0 ) THEN
249 *
250 * Update A12
251 *
252  CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit',
253  $ jb, j2, cone, ab( kv+1, j ), ldab-1,
254  $ ab( kv+1-jb, j+jb ), ldab-1 )
255 *
256  IF( i2.GT.0 ) THEN
257 *
258 * Update A22
259 *
260  CALL cgemm( 'No transpose', 'No transpose', i2, j2,
261  $ jb, -cone, ab( kv+1+jb, j ), ldab-1,
262  $ ab( kv+1-jb, j+jb ), ldab-1, cone,
263  $ ab( kv+1, j+jb ), ldab-1 )
264  END IF
265 *
266  IF( i3.GT.0 ) THEN
267 *
268 * Update A32
269 *
270  CALL cgemm( 'No transpose', 'No transpose', i3, j2,
271  $ jb, -cone, work31, ldwork,
272  $ ab( kv+1-jb, j+jb ), ldab-1, cone,
273  $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
274  END IF
275  END IF
276 *
277  IF( j3.GT.0 ) THEN
278 *
279 * Copy the lower triangle of A13 into the work array
280 * WORK13
281 *
282  DO 130 jj = 1, j3
283  DO 120 ii = jj, jb
284  work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
285  120 CONTINUE
286  130 CONTINUE
287 *
288 * Update A13 in the work array
289 *
290  CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit',
291  $ jb, j3, cone, ab( kv+1, j ), ldab-1,
292  $ work13, ldwork )
293 *
294  IF( i2.GT.0 ) THEN
295 *
296 * Update A23
297 *
298  CALL cgemm( 'No transpose', 'No transpose', i2, j3,
299  $ jb, -cone, ab( kv+1+jb, j ), ldab-1,
300  $ work13, ldwork, cone, ab( 1+jb, j+kv ),
301  $ ldab-1 )
302  END IF
303 *
304  IF( i3.GT.0 ) THEN
305 *
306 * Update A33
307 *
308  CALL cgemm( 'No transpose', 'No transpose', i3, j3,
309  $ jb, -cone, work31, ldwork, work13,
310  $ ldwork, cone, ab( 1+kl, j+kv ), ldab-1 )
311  END IF
312 *
313 * Copy the lower triangle of A13 back into place
314 *
315  DO 150 jj = 1, j3
316  DO 140 ii = jj, jb
317  ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
318  140 CONTINUE
319  150 CONTINUE
320  END IF
321  ELSE
322  END IF
323 *
324 * copy the upper triangle of A31 back into place
325 *
326  DO 170 jj = j + jb - 1, j, -1
327 *
328 * Copy the current column of A31 back into place
329 *
330  nw = min( i3, jj-j+1 )
331  IF( nw.GT.0 )
332  $ CALL ccopy( nw, work31( 1, jj-j+1 ), 1,
333  $ ab( kv+kl+1-jj+j, jj ), 1 )
334  170 CONTINUE
335  180 CONTINUE
336  END IF
337 *
338  RETURN
339 *
340 * End of CDBTRF
341 *
342  END
max
#define max(A, B)
Definition: pcgemr.c:180
cdbtrf
subroutine cdbtrf(M, N, KL, KU, AB, LDAB, INFO)
Definition: cdbtrf.f:2
cdbtf2
subroutine cdbtf2(M, N, KL, KU, AB, LDAB, INFO)
Definition: cdbtf2.f:2
min
#define min(A, B)
Definition: pcgemr.c:181