001:       SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
002: *
003: *  -- LAPACK routine (version 3.2) --
004: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
005: *     November 2006
006: *
007: *     .. Scalar Arguments ..
008:       INTEGER            INFO, KL, KU, LDAB, M, N
009: *     ..
010: *     .. Array Arguments ..
011:       INTEGER            IPIV( * )
012:       DOUBLE PRECISION   AB( LDAB, * )
013: *     ..
014: *
015: *  Purpose
016: *  =======
017: *
018: *  DGBTRF computes an LU factorization of a real m-by-n band matrix A
019: *  using partial pivoting with row interchanges.
020: *
021: *  This is the blocked version of the algorithm, calling Level 3 BLAS.
022: *
023: *  Arguments
024: *  =========
025: *
026: *  M       (input) INTEGER
027: *          The number of rows of the matrix A.  M >= 0.
028: *
029: *  N       (input) INTEGER
030: *          The number of columns of the matrix A.  N >= 0.
031: *
032: *  KL      (input) INTEGER
033: *          The number of subdiagonals within the band of A.  KL >= 0.
034: *
035: *  KU      (input) INTEGER
036: *          The number of superdiagonals within the band of A.  KU >= 0.
037: *
038: *  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
039: *          On entry, the matrix A in band storage, in rows KL+1 to
040: *          2*KL+KU+1; rows 1 to KL of the array need not be set.
041: *          The j-th column of A is stored in the j-th column of the
042: *          array AB as follows:
043: *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
044: *
045: *          On exit, details of the factorization: U is stored as an
046: *          upper triangular band matrix with KL+KU superdiagonals in
047: *          rows 1 to KL+KU+1, and the multipliers used during the
048: *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
049: *          See below for further details.
050: *
051: *  LDAB    (input) INTEGER
052: *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
053: *
054: *  IPIV    (output) INTEGER array, dimension (min(M,N))
055: *          The pivot indices; for 1 <= i <= min(M,N), row i of the
056: *          matrix was interchanged with row IPIV(i).
057: *
058: *  INFO    (output) INTEGER
059: *          = 0: successful exit
060: *          < 0: if INFO = -i, the i-th argument had an illegal value
061: *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
062: *               has been completed, but the factor U is exactly
063: *               singular, and division by zero will occur if it is used
064: *               to solve a system of equations.
065: *
066: *  Further Details
067: *  ===============
068: *
069: *  The band storage scheme is illustrated by the following example, when
070: *  M = N = 6, KL = 2, KU = 1:
071: *
072: *  On entry:                       On exit:
073: *
074: *      *    *    *    +    +    +       *    *    *   u14  u25  u36
075: *      *    *    +    +    +    +       *    *   u13  u24  u35  u46
076: *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
077: *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
078: *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
079: *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
080: *
081: *  Array elements marked * are not used by the routine; elements marked
082: *  + need not be set on entry, but are required by the routine to store
083: *  elements of U because of fill-in resulting from the row interchanges.
084: *
085: *  =====================================================================
086: *
087: *     .. Parameters ..
088:       DOUBLE PRECISION   ONE, ZERO
089:       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
090:       INTEGER            NBMAX, LDWORK
091:       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
092: *     ..
093: *     .. Local Scalars ..
094:       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
095:      $                   JU, K2, KM, KV, NB, NW
096:       DOUBLE PRECISION   TEMP
097: *     ..
098: *     .. Local Arrays ..
099:       DOUBLE PRECISION   WORK13( LDWORK, NBMAX ),
100:      $                   WORK31( LDWORK, NBMAX )
101: *     ..
102: *     .. External Functions ..
103:       INTEGER            IDAMAX, ILAENV
104:       EXTERNAL           IDAMAX, ILAENV
105: *     ..
106: *     .. External Subroutines ..
107:       EXTERNAL           DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
108:      $                   DSWAP, DTRSM, XERBLA
109: *     ..
110: *     .. Intrinsic Functions ..
111:       INTRINSIC          MAX, MIN
112: *     ..
113: *     .. Executable Statements ..
114: *
115: *     KV is the number of superdiagonals in the factor U, allowing for
116: *     fill-in
117: *
118:       KV = KU + KL
119: *
120: *     Test the input parameters.
121: *
122:       INFO = 0
123:       IF( M.LT.0 ) THEN
124:          INFO = -1
125:       ELSE IF( N.LT.0 ) THEN
126:          INFO = -2
127:       ELSE IF( KL.LT.0 ) THEN
128:          INFO = -3
129:       ELSE IF( KU.LT.0 ) THEN
130:          INFO = -4
131:       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
132:          INFO = -6
133:       END IF
134:       IF( INFO.NE.0 ) THEN
135:          CALL XERBLA( 'DGBTRF', -INFO )
136:          RETURN
137:       END IF
138: *
139: *     Quick return if possible
140: *
141:       IF( M.EQ.0 .OR. N.EQ.0 )
142:      $   RETURN
143: *
144: *     Determine the block size for this environment
145: *
146:       NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
147: *
148: *     The block size must not exceed the limit set by the size of the
149: *     local arrays WORK13 and WORK31.
150: *
151:       NB = MIN( NB, NBMAX )
152: *
153:       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
154: *
155: *        Use unblocked code
156: *
157:          CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
158:       ELSE
159: *
160: *        Use blocked code
161: *
162: *        Zero the superdiagonal elements of the work array WORK13
163: *
164:          DO 20 J = 1, NB
165:             DO 10 I = 1, J - 1
166:                WORK13( I, J ) = ZERO
167:    10       CONTINUE
168:    20    CONTINUE
169: *
170: *        Zero the subdiagonal elements of the work array WORK31
171: *
172:          DO 40 J = 1, NB
173:             DO 30 I = J + 1, NB
174:                WORK31( I, J ) = ZERO
175:    30       CONTINUE
176:    40    CONTINUE
177: *
178: *        Gaussian elimination with partial pivoting
179: *
180: *        Set fill-in elements in columns KU+2 to KV to zero
181: *
182:          DO 60 J = KU + 2, MIN( KV, N )
183:             DO 50 I = KV - J + 2, KL
184:                AB( I, J ) = ZERO
185:    50       CONTINUE
186:    60    CONTINUE
187: *
188: *        JU is the index of the last column affected by the current
189: *        stage of the factorization
190: *
191:          JU = 1
192: *
193:          DO 180 J = 1, MIN( M, N ), NB
194:             JB = MIN( NB, MIN( M, N )-J+1 )
195: *
196: *           The active part of the matrix is partitioned
197: *
198: *              A11   A12   A13
199: *              A21   A22   A23
200: *              A31   A32   A33
201: *
202: *           Here A11, A21 and A31 denote the current block of JB columns
203: *           which is about to be factorized. The number of rows in the
204: *           partitioning are JB, I2, I3 respectively, and the numbers
205: *           of columns are JB, J2, J3. The superdiagonal elements of A13
206: *           and the subdiagonal elements of A31 lie outside the band.
207: *
208:             I2 = MIN( KL-JB, M-J-JB+1 )
209:             I3 = MIN( JB, M-J-KL+1 )
210: *
211: *           J2 and J3 are computed after JU has been updated.
212: *
213: *           Factorize the current block of JB columns
214: *
215:             DO 80 JJ = J, J + JB - 1
216: *
217: *              Set fill-in elements in column JJ+KV to zero
218: *
219:                IF( JJ+KV.LE.N ) THEN
220:                   DO 70 I = 1, KL
221:                      AB( I, JJ+KV ) = ZERO
222:    70             CONTINUE
223:                END IF
224: *
225: *              Find pivot and test for singularity. KM is the number of
226: *              subdiagonal elements in the current column.
227: *
228:                KM = MIN( KL, M-JJ )
229:                JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
230:                IPIV( JJ ) = JP + JJ - J
231:                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
232:                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
233:                   IF( JP.NE.1 ) THEN
234: *
235: *                    Apply interchange to columns J to J+JB-1
236: *
237:                      IF( JP+JJ-1.LT.J+KL ) THEN
238: *
239:                         CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
240:      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
241:                      ELSE
242: *
243: *                       The interchange affects columns J to JJ-1 of A31
244: *                       which are stored in the work array WORK31
245: *
246:                         CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
247:      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
248:                         CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
249:      $                              AB( KV+JP, JJ ), LDAB-1 )
250:                      END IF
251:                   END IF
252: *
253: *                 Compute multipliers
254: *
255:                   CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
256:      $                        1 )
257: *
258: *                 Update trailing submatrix within the band and within
259: *                 the current block. JM is the index of the last column
260: *                 which needs to be updated.
261: *
262:                   JM = MIN( JU, J+JB-1 )
263:                   IF( JM.GT.JJ )
264:      $               CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
265:      $                          AB( KV, JJ+1 ), LDAB-1,
266:      $                          AB( KV+1, JJ+1 ), LDAB-1 )
267:                ELSE
268: *
269: *                 If pivot is zero, set INFO to the index of the pivot
270: *                 unless a zero pivot has already been found.
271: *
272:                   IF( INFO.EQ.0 )
273:      $               INFO = JJ
274:                END IF
275: *
276: *              Copy current column of A31 into the work array WORK31
277: *
278:                NW = MIN( JJ-J+1, I3 )
279:                IF( NW.GT.0 )
280:      $            CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
281:      $                        WORK31( 1, JJ-J+1 ), 1 )
282:    80       CONTINUE
283:             IF( J+JB.LE.N ) THEN
284: *
285: *              Apply the row interchanges to the other blocks.
286: *
287:                J2 = MIN( JU-J+1, KV ) - JB
288:                J3 = MAX( 0, JU-J-KV+1 )
289: *
290: *              Use DLASWP to apply the row interchanges to A12, A22, and
291: *              A32.
292: *
293:                CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
294:      $                      IPIV( J ), 1 )
295: *
296: *              Adjust the pivot indices.
297: *
298:                DO 90 I = J, J + JB - 1
299:                   IPIV( I ) = IPIV( I ) + J - 1
300:    90          CONTINUE
301: *
302: *              Apply the row interchanges to A13, A23, and A33
303: *              columnwise.
304: *
305:                K2 = J - 1 + JB + J2
306:                DO 110 I = 1, J3
307:                   JJ = K2 + I
308:                   DO 100 II = J + I - 1, J + JB - 1
309:                      IP = IPIV( II )
310:                      IF( IP.NE.II ) THEN
311:                         TEMP = AB( KV+1+II-JJ, JJ )
312:                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
313:                         AB( KV+1+IP-JJ, JJ ) = TEMP
314:                      END IF
315:   100             CONTINUE
316:   110          CONTINUE
317: *
318: *              Update the relevant part of the trailing submatrix
319: *
320:                IF( J2.GT.0 ) THEN
321: *
322: *                 Update A12
323: *
324:                   CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
325:      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
326:      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
327: *
328:                   IF( I2.GT.0 ) THEN
329: *
330: *                    Update A22
331: *
332:                      CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
333:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
334:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
335:      $                           AB( KV+1, J+JB ), LDAB-1 )
336:                   END IF
337: *
338:                   IF( I3.GT.0 ) THEN
339: *
340: *                    Update A32
341: *
342:                      CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
343:      $                           JB, -ONE, WORK31, LDWORK,
344:      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
345:      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
346:                   END IF
347:                END IF
348: *
349:                IF( J3.GT.0 ) THEN
350: *
351: *                 Copy the lower triangle of A13 into the work array
352: *                 WORK13
353: *
354:                   DO 130 JJ = 1, J3
355:                      DO 120 II = JJ, JB
356:                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
357:   120                CONTINUE
358:   130             CONTINUE
359: *
360: *                 Update A13 in the work array
361: *
362:                   CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
363:      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
364:      $                        WORK13, LDWORK )
365: *
366:                   IF( I2.GT.0 ) THEN
367: *
368: *                    Update A23
369: *
370:                      CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
371:      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
372:      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
373:      $                           LDAB-1 )
374:                   END IF
375: *
376:                   IF( I3.GT.0 ) THEN
377: *
378: *                    Update A33
379: *
380:                      CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
381:      $                           JB, -ONE, WORK31, LDWORK, WORK13,
382:      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
383:                   END IF
384: *
385: *                 Copy the lower triangle of A13 back into place
386: *
387:                   DO 150 JJ = 1, J3
388:                      DO 140 II = JJ, JB
389:                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
390:   140                CONTINUE
391:   150             CONTINUE
392:                END IF
393:             ELSE
394: *
395: *              Adjust the pivot indices.
396: *
397:                DO 160 I = J, J + JB - 1
398:                   IPIV( I ) = IPIV( I ) + J - 1
399:   160          CONTINUE
400:             END IF
401: *
402: *           Partially undo the interchanges in the current block to
403: *           restore the upper triangular form of A31 and copy the upper
404: *           triangle of A31 back into place
405: *
406:             DO 170 JJ = J + JB - 1, J, -1
407:                JP = IPIV( JJ ) - JJ + 1
408:                IF( JP.NE.1 ) THEN
409: *
410: *                 Apply interchange to columns J to JJ-1
411: *
412:                   IF( JP+JJ-1.LT.J+KL ) THEN
413: *
414: *                    The interchange does not affect A31
415: *
416:                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
417:      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
418:                   ELSE
419: *
420: *                    The interchange does affect A31
421: *
422:                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
423:      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
424:                   END IF
425:                END IF
426: *
427: *              Copy the current column of A31 back into place
428: *
429:                NW = MIN( I3, JJ-J+1 )
430:                IF( NW.GT.0 )
431:      $            CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
432:      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
433:   170       CONTINUE
434:   180    CONTINUE
435:       END IF
436: *
437:       RETURN
438: *
439: *     End of DGBTRF
440: *
441:       END
442: