ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
sdbtrf.f
Go to the documentation of this file.
00001       SUBROUTINE SDBTRF( M, N, KL, KU, AB, LDAB, INFO )
00002 *
00003 *  -- ScaLAPACK auxiliary routine (version 2.0) --
00004 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
00005 *
00006 *     Written by Andrew J. Cleary, University of Tennessee.
00007 *     August, 1996.
00008 *     Modified from SGBTRF:
00009 *  -- LAPACK routine (preliminary version) --
00010 *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
00011 *     Courant Institute, Argonne National Lab, and Rice University
00012 *     August 6, 1991
00013 *
00014 *     .. Scalar Arguments ..
00015       INTEGER            INFO, KL, KU, LDAB, M, N
00016 *     ..
00017 *     .. Array Arguments ..
00018       REAL               AB( LDAB, * )
00019 *     ..
00020 *
00021 *  Purpose
00022 *  =======
00023 *
00024 *  Sdbtrf computes an LU factorization of a real m-by-n band matrix A
00025 *  without using partial pivoting or row interchanges.
00026 *
00027 *  This is the blocked version of the algorithm, calling Level 3 BLAS.
00028 *
00029 *  Arguments
00030 *  =========
00031 *
00032 *  M       (input) INTEGER
00033 *          The number of rows of the matrix A.  M >= 0.
00034 *
00035 *  N       (input) INTEGER
00036 *          The number of columns of the matrix A.  N >= 0.
00037 *
00038 *  KL      (input) INTEGER
00039 *          The number of subdiagonals within the band of A.  KL >= 0.
00040 *
00041 *  KU      (input) INTEGER
00042 *          The number of superdiagonals within the band of A.  KU >= 0.
00043 *
00044 *  AB      (input/output) REAL array, dimension (LDAB,N)
00045 *          On entry, the matrix A in band storage, in rows KL+1 to
00046 *          2*KL+KU+1; rows 1 to KL of the array need not be set.
00047 *          The j-th column of A is stored in the j-th column of the
00048 *          array AB as follows:
00049 *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
00050 *
00051 *          On exit, details of the factorization: U is stored as an
00052 *          upper triangular band matrix with KL+KU superdiagonals in
00053 *          rows 1 to KL+KU+1, and the multipliers used during the
00054 *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
00055 *          See below for further details.
00056 *
00057 *  LDAB    (input) INTEGER
00058 *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
00059 *
00060 *  INFO    (output) INTEGER
00061 *          = 0: successful exit
00062 *          < 0: if INFO = -i, the i-th argument had an illegal value
00063 *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
00064 *               has been completed, but the factor U is exactly
00065 *               singular, and division by zero will occur if it is used
00066 *               to solve a system of equations.
00067 *
00068 *  Further Details
00069 *  ===============
00070 *
00071 *  The band storage scheme is illustrated by the following example, when
00072 *  M = N = 6, KL = 2, KU = 1:
00073 *
00074 *  On entry:                       On exit:
00075 *
00076 *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
00077 *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
00078 *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
00079 *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
00080 *
00081 *  Array elements marked * are not used by the routine.
00082 *
00083 *  =====================================================================
00084 *
00085 *     .. Parameters ..
00086       REAL               ONE, ZERO
00087       PARAMETER          ( ONE = 1.0E+0 )
00088       PARAMETER          ( ZERO = 0.0E+0 )
00089       INTEGER            NBMAX, LDWORK
00090       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
00091 *     ..
00092 *     .. Local Scalars ..
00093       INTEGER            I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,
00094      $                   JU, KM, KV, NB, NW
00095 *     ..
00096 *     .. Local Arrays ..
00097       REAL                 WORK13( LDWORK, NBMAX ),
00098      $                   WORK31( LDWORK, NBMAX )
00099 *     ..
00100 *     .. External Functions ..
00101       INTEGER            ILAENV, ISAMAX
00102       EXTERNAL           ILAENV, ISAMAX
00103 *     ..
00104 *     .. External Subroutines ..
00105       EXTERNAL           SCOPY, SDBTF2, SGEMM, SGER, SSCAL,
00106      $                   SSWAP, STRSM, XERBLA
00107 *     ..
00108 *     .. Intrinsic Functions ..
00109       INTRINSIC          MAX, MIN
00110 *     ..
00111 *     .. Executable Statements ..
00112 *
00113 *     KV is the number of superdiagonals in the factor U
00114 *
00115       KV = KU
00116 *
00117 *     Test the input parameters.
00118 *
00119       INFO = 0
00120       IF( M.LT.0 ) THEN
00121          INFO = -1
00122       ELSE IF( N.LT.0 ) THEN
00123          INFO = -2
00124       ELSE IF( KL.LT.0 ) THEN
00125          INFO = -3
00126       ELSE IF( KU.LT.0 ) THEN
00127          INFO = -4
00128       ELSE IF( LDAB.LT.MIN( MIN( KL+KV+1,M ),N ) ) THEN
00129          INFO = -6
00130       END IF
00131       IF( INFO.NE.0 ) THEN
00132          CALL XERBLA( 'SDBTRF', -INFO )
00133          RETURN
00134       END IF
00135 *
00136 *     Quick return if possible
00137 *
00138       IF( M.EQ.0 .OR. N.EQ.0 )
00139      $     RETURN
00140 *
00141 *     Determine the block size for this environment
00142 *
00143       NB = ILAENV( 1, 'SDBTRF', ' ', M, N, KL, KU )
00144 *
00145 *     The block size must not exceed the limit set by the size of the
00146 *     local arrays WORK13 and WORK31.
00147 *
00148       NB = MIN( NB, NBMAX )
00149 *
00150       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
00151 *
00152 *        Use unblocked code
00153 *
00154          CALL SDBTF2( M, N, KL, KU, AB, LDAB, INFO )
00155       ELSE
00156 *
00157 *        Use blocked code
00158 *
00159 *        Zero the superdiagonal elements of the work array WORK13
00160 *
00161          DO 20 J = 1, NB
00162             DO 10 I = 1, J - 1
00163                WORK13( I, J ) = ZERO
00164    10       CONTINUE
00165    20    CONTINUE
00166 *
00167 *        Zero the subdiagonal elements of the work array WORK31
00168 *
00169          DO 40 J = 1, NB
00170             DO 30 I = J + 1, NB
00171                WORK31( I, J ) = ZERO
00172    30       CONTINUE
00173    40    CONTINUE
00174 *
00175 *        JU is the index of the last column affected by the current
00176 *        stage of the factorization
00177 *
00178          JU = 1
00179 *
00180          DO 180 J = 1, MIN( M, N ), NB
00181             JB = MIN( NB, MIN( M, N )-J+1 )
00182 *
00183 *           The active part of the matrix is partitioned
00184 *
00185 *              A11   A12   A13
00186 *              A21   A22   A23
00187 *              A31   A32   A33
00188 *
00189 *           Here A11, A21 and A31 denote the current block of JB columns
00190 *           which is about to be factorized. The number of rows in the
00191 *           partitioning are JB, I2, I3 respectively, and the numbers
00192 *           of columns are JB, J2, J3. The superdiagonal elements of A13
00193 *           and the subdiagonal elements of A31 lie outside the band.
00194 *
00195             I2 = MIN( KL-JB, M-J-JB+1 )
00196             I3 = MIN( JB, M-J-KL+1 )
00197 *
00198 *           J2 and J3 are computed after JU has been updated.
00199 *
00200 *           Factorize the current block of JB columns
00201 *
00202             DO 80 JJ = J, J + JB - 1
00203 *
00204 *              Find pivot and test for singularity. KM is the number of
00205 *              subdiagonal elements in the current column.
00206 *
00207                KM = MIN( KL, M-JJ )
00208                JP = 1
00209                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
00210                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
00211 *
00212 *                 Compute multipliers
00213 *
00214                   CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
00215      $                 1 )
00216 *
00217 *                 Update trailing submatrix within the band and within
00218 *                 the current block. JM is the index of the last column
00219 *                 which needs to be updated.
00220 *
00221                   JM = MIN( JU, J+JB-1 )
00222                   IF( JM.GT.JJ ) THEN
00223                      CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
00224      $                          AB( KV, JJ+1 ), LDAB-1,
00225      $                          AB( KV+1, JJ+1 ), LDAB-1 )
00226                   END IF
00227                END IF
00228 *
00229 *              Copy current column of A31 into the work array WORK31
00230 *
00231                NW = MIN( JJ-J+1, I3 )
00232                IF( NW.GT.0 )
00233      $            CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
00234      $                        WORK31( 1, JJ-J+1 ), 1 )
00235    80       CONTINUE
00236             IF( J+JB.LE.N ) THEN
00237 *
00238 *              Apply the row interchanges to the other blocks.
00239 *
00240                J2 = MIN( JU-J+1, KV ) - JB
00241                J3 = MAX( 0, JU-J-KV+1 )
00242 *
00243 *              Update the relevant part of the trailing submatrix
00244 *
00245                IF( J2.GT.0 ) THEN
00246 *
00247 *                 Update A12
00248 *
00249                   CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
00250      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
00251      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
00252 *
00253                   IF( I2.GT.0 ) THEN
00254 *
00255 *                    Update A22
00256 *
00257                      CALL SGEMM( 'No transpose', 'No transpose', I2, J2,
00258      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
00259      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
00260      $                           AB( KV+1, J+JB ), LDAB-1 )
00261                   END IF
00262 *
00263                   IF( I3.GT.0 ) THEN
00264 *
00265 *                    Update A32
00266 *
00267                      CALL SGEMM( 'No transpose', 'No transpose', I3, J2,
00268      $                           JB, -ONE, WORK31, LDWORK,
00269      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
00270      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
00271                   END IF
00272                END IF
00273 *
00274                IF( J3.GT.0 ) THEN
00275 *
00276 *                 Copy the lower triangle of A13 into the work array
00277 *                 WORK13
00278 *
00279                   DO 130 JJ = 1, J3
00280                      DO 120 II = JJ, JB
00281                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
00282   120                CONTINUE
00283   130             CONTINUE
00284 *
00285 *                 Update A13 in the work array
00286 *
00287                   CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
00288      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
00289      $                        WORK13, LDWORK )
00290 *
00291                   IF( I2.GT.0 ) THEN
00292 *
00293 *                    Update A23
00294 *
00295                      CALL SGEMM( 'No transpose', 'No transpose', I2, J3,
00296      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
00297      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
00298      $                           LDAB-1 )
00299                   END IF
00300 *
00301                   IF( I3.GT.0 ) THEN
00302 *
00303 *                    Update A33
00304 *
00305                      CALL SGEMM( 'No transpose', 'No transpose', I3, J3,
00306      $                         JB, -ONE, WORK31, LDWORK, WORK13,
00307      $                         LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
00308                   END IF
00309 *
00310 *                 Copy the lower triangle of A13 back into place
00311 *
00312                   DO 150 JJ = 1, J3
00313                      DO 140 II = JJ, JB
00314                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
00315   140                CONTINUE
00316   150             CONTINUE
00317                END IF
00318             ELSE
00319             END IF
00320 *
00321 *           copy the upper triangle of A31 back into place
00322 *
00323             DO 170 JJ = J + JB - 1, J, -1
00324 *
00325 *              Copy the current column of A31 back into place
00326 *
00327                NW = MIN( I3, JJ-J+1 )
00328                IF( NW.GT.0 )
00329      $            CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
00330      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
00331   170       CONTINUE
00332   180    CONTINUE
00333       END IF
00334 *
00335       RETURN
00336 *
00337 *     End of SDBTRF
00338 *
00339       END