d0/d57/ddbtrf_8f_source.html

      SUBROUTINE ddbtrf( M, N, KL, KU, AB, LDAB, INFO )

*

*  -- ScaLAPACK auxiliary routine (version 2.0) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*

*     Written by Andrew J. Cleary, University of Tennessee.

*     August, 1996.

*     Modified from DGBTRF:

*  -- LAPACK routine (preliminary version) --

*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,

*     Courant Institute, Argonne National Lab, and Rice University

*     August 6, 1991

*

*     .. Scalar Arguments ..

      INTEGER            INFO, KL, KU, LDAB, M, N

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION   AB( LDAB, * )

*     ..

*

*  Purpose

*  =======

*

*  Ddbtrf computes an LU factorization of a real m-by-n band matrix A

*  without using partial pivoting or row interchanges.

*

*  This is the blocked version of the algorithm, calling Level 3 BLAS.

*

*  Arguments

*  =========

*

*  M       (input) INTEGER

*          The number of rows of the matrix A.  M >= 0.

*

*  N       (input) INTEGER

*          The number of columns of the matrix A.  N >= 0.

*

*  KL      (input) INTEGER

*          The number of subdiagonals within the band of A.  KL >= 0.

*

*  KU      (input) INTEGER

*          The number of superdiagonals within the band of A.  KU >= 0.

*

*  AB      (input/output) REAL array, dimension (LDAB,N)

*          On entry, the matrix A in band storage, in rows KL+1 to

*          2*KL+KU+1; rows 1 to KL of the array need not be set.

*          The j-th column of A is stored in the j-th column of the

*          array AB as follows:

*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)

*

*          On exit, details of the factorization: U is stored as an

*          upper triangular band matrix with KL+KU superdiagonals in

*          rows 1 to KL+KU+1, and the multipliers used during the

*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.

*          See below for further details.

*

*  LDAB    (input) INTEGER

*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.

*

*  INFO    (output) INTEGER

*          = 0: successful exit

*          < 0: if INFO = -i, the i-th argument had an illegal value

*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization

*               has been completed, but the factor U is exactly

*               singular, and division by zero will occur if it is used

*               to solve a system of equations.

*

*  Further Details

*  ===============

*

*  The band storage scheme is illustrated by the following example, when

*  M = N = 6, KL = 2, KU = 1:

*

*  On entry:                       On exit:

*

*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56

*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66

*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *

*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *

*

*  Array elements marked * are not used by the routine.

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ONE, ZERO

      parameter( one = 1.0d+0 )

      parameter( zero = 0.0d+0 )

      INTEGER            NBMAX, LDWORK

      parameter( nbmax = 64, ldwork = nbmax+1 )

*     ..

*     .. Local Scalars ..

      INTEGER            I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,

     $                   JU, KM, KV, NB, NW

*     ..

*     .. Local Arrays ..

      DOUBLE PRECISION     WORK13( LDWORK, NBMAX ),

     $                   WORK31( LDWORK, NBMAX )

*     ..

*     .. External Functions ..

      INTEGER            ILAENV, ISAMAX

      EXTERNAL           ilaenv, isamax

*     ..

*     .. External Subroutines ..

      EXTERNAL           dcopy, ddbtf2, dgemm, dger, dscal,

     $                   dswap, dtrsm, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min

*     ..

*     .. Executable Statements ..

*

*     KV is the number of superdiagonals in the factor U

*

      kv = ku

*

*     Test the input parameters.

*

      info = 0

      IF( m.LT.0 ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( kl.LT.0 ) THEN

         info = -3

      ELSE IF( ku.LT.0 ) THEN

         info = -4

      ELSE IF( ldab.LT.min( min( kl+kv+1,m ),n ) ) THEN

         info = -6

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'DDBTRF', -info )

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( m.EQ.0 .OR. n.EQ.0 )

     $     RETURN

*

*     Determine the block size for this environment

*

      nb = ilaenv( 1, 'DDBTRF', ' ', m, n, kl, ku )

*

*     The block size must not exceed the limit set by the size of the

*     local arrays WORK13 and WORK31.

*

      nb = min( nb, nbmax )

*

      IF( nb.LE.1 .OR. nb.GT.kl ) THEN

*

*        Use unblocked code

*

         CALL ddbtf2( m, n, kl, ku, ab, ldab, info )

      ELSE

*

*        Use blocked code

*

*        Zero the superdiagonal elements of the work array WORK13

*

         DO 20 j = 1, nb

            DO 10 i = 1, j - 1

               work13( i, j ) = zero

   10       CONTINUE

   20    CONTINUE

*

*        Zero the subdiagonal elements of the work array WORK31

*

         DO 40 j = 1, nb

            DO 30 i = j + 1, nb

               work31( i, j ) = zero

   30       CONTINUE

   40    CONTINUE

*

*        JU is the index of the last column affected by the current

*        stage of the factorization

*

         ju = 1

*

         DO 180 j = 1, min( m, n ), nb

            jb = min( nb, min( m, n )-j+1 )

*

*           The active part of the matrix is partitioned

*

*              A11   A12   A13

*              A21   A22   A23

*              A31   A32   A33

*

*           Here A11, A21 and A31 denote the current block of JB columns

*           which is about to be factorized. The number of rows in the

*           partitioning are JB, I2, I3 respectively, and the numbers

*           of columns are JB, J2, J3. The superdiagonal elements of A13

*           and the subdiagonal elements of A31 lie outside the band.

*

            i2 = min( kl-jb, m-j-jb+1 )

            i3 = min( jb, m-j-kl+1 )

*

*           J2 and J3 are computed after JU has been updated.

*

*           Factorize the current block of JB columns

*

            DO 80 jj = j, j + jb - 1

*

*              Find pivot and test for singularity. KM is the number of

*              subdiagonal elements in the current column.

*

               km = min( kl, m-jj )

               jp = 1

               IF( ab( kv+jp, jj ).NE.zero ) THEN

                  ju = max( ju, min( jj+ku+jp-1, n ) )

*

*                 Compute multipliers

*

                  CALL dscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),

     $                 1 )

*

*                 Update trailing submatrix within the band and within

*                 the current block. JM is the index of the last column

*                 which needs to be updated.

*

                  jm = min( ju, j+jb-1 )

                  IF( jm.GT.jj ) THEN

                     CALL dger( km, jm-jj, -one, ab( kv+2, jj ), 1,

     $                          ab( kv, jj+1 ), ldab-1,

     $                          ab( kv+1, jj+1 ), ldab-1 )

                  END IF

               END IF

*

*              Copy current column of A31 into the work array WORK31

*

               nw = min( jj-j+1, i3 )

               IF( nw.GT.0 )

     $            CALL dcopy( nw, ab( kv+kl+1-jj+j, jj ), 1,

     $                        work31( 1, jj-j+1 ), 1 )

   80       CONTINUE

            IF( j+jb.LE.n ) THEN

*

*              Apply the row interchanges to the other blocks.

*

               j2 = min( ju-j+1, kv ) - jb

               j3 = max( 0, ju-j-kv+1 )

*

*              Update the relevant part of the trailing submatrix

*

               IF( j2.GT.0 ) THEN

*

*                 Update A12

*

                  CALL dtrsm( 'Left', 'Lower', 'No transpose', 'Unit',

     $                        jb, j2, one, ab( kv+1, j ), ldab-1,

     $                        ab( kv+1-jb, j+jb ), ldab-1 )

*

                  IF( i2.GT.0 ) THEN

*

*                    Update A22

*

                     CALL dgemm( 'No transpose', 'No transpose', i2, j2,

     $                           jb, -one, ab( kv+1+jb, j ), ldab-1,

     $                           ab( kv+1-jb, j+jb ), ldab-1, one,

     $                           ab( kv+1, j+jb ), ldab-1 )

                  END IF

*

                  IF( i3.GT.0 ) THEN

*

*                    Update A32

*

                     CALL dgemm( 'No transpose', 'No transpose', i3, j2,

     $                           jb, -one, work31, ldwork,

     $                           ab( kv+1-jb, j+jb ), ldab-1, one,

     $                           ab( kv+kl+1-jb, j+jb ), ldab-1 )

                  END IF

               END IF

*

               IF( j3.GT.0 ) THEN

*

*                 Copy the lower triangle of A13 into the work array

*                 WORK13

*

                  DO 130 jj = 1, j3

                     DO 120 ii = jj, jb

                        work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )

  120                CONTINUE

  130             CONTINUE

*

*                 Update A13 in the work array

*

                  CALL dtrsm( 'Left', 'Lower', 'No transpose', 'Unit',

     $                        jb, j3, one, ab( kv+1, j ), ldab-1,

     $                        work13, ldwork )

*

                  IF( i2.GT.0 ) THEN

*

*                    Update A23

*

                     CALL dgemm( 'No transpose', 'No transpose', i2, j3,

     $                           jb, -one, ab( kv+1+jb, j ), ldab-1,

     $                           work13, ldwork, one, ab( 1+jb, j+kv ),

     $                           ldab-1 )

                  END IF

*

                  IF( i3.GT.0 ) THEN

*

*                    Update A33

*

                     CALL dgemm( 'No transpose', 'No transpose', i3, j3,

     $                         jb, -one, work31, ldwork, work13,

     $                         ldwork, one, ab( 1+kl, j+kv ), ldab-1 )

                  END IF

*

*                 Copy the lower triangle of A13 back into place

*

                  DO 150 jj = 1, j3

                     DO 140 ii = jj, jb

                        ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )

  140                CONTINUE

  150             CONTINUE

               END IF

            ELSE

            END IF

*

*           copy the upper triangle of A31 back into place

*

            DO 170 jj = j + jb - 1, j, -1

*

*              Copy the current column of A31 back into place

*

               nw = min( i3, jj-j+1 )

               IF( nw.GT.0 )

     $            CALL dcopy( nw, work31( 1, jj-j+1 ), 1,

     $                        ab( kv+kl+1-jj+j, jj ), 1 )

  170       CONTINUE

  180    CONTINUE

      END IF

*

      RETURN

*

*     End of DDBTRF

*

      END