|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 SUBROUTINE DDBTRF( M, N, KL, KU, AB, LDAB, INFO ) 00002 * 00003 * -- ScaLAPACK auxiliary routine (version 2.0) -- 00004 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver 00005 * 00006 * Written by Andrew J. Cleary, University of Tennessee. 00007 * August, 1996. 00008 * Modified from DGBTRF: 00009 * -- LAPACK routine (preliminary version) -- 00010 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00011 * Courant Institute, Argonne National Lab, and Rice University 00012 * August 6, 1991 00013 * 00014 * .. Scalar Arguments .. 00015 INTEGER INFO, KL, KU, LDAB, M, N 00016 * .. 00017 * .. Array Arguments .. 00018 DOUBLE PRECISION AB( LDAB, * ) 00019 * .. 00020 * 00021 * Purpose 00022 * ======= 00023 * 00024 * Ddbtrf computes an LU factorization of a real m-by-n band matrix A 00025 * without using partial pivoting or row interchanges. 00026 * 00027 * This is the blocked version of the algorithm, calling Level 3 BLAS. 00028 * 00029 * Arguments 00030 * ========= 00031 * 00032 * M (input) INTEGER 00033 * The number of rows of the matrix A. M >= 0. 00034 * 00035 * N (input) INTEGER 00036 * The number of columns of the matrix A. N >= 0. 00037 * 00038 * KL (input) INTEGER 00039 * The number of subdiagonals within the band of A. KL >= 0. 00040 * 00041 * KU (input) INTEGER 00042 * The number of superdiagonals within the band of A. KU >= 0. 00043 * 00044 * AB (input/output) REAL array, dimension (LDAB,N) 00045 * On entry, the matrix A in band storage, in rows KL+1 to 00046 * 2*KL+KU+1; rows 1 to KL of the array need not be set. 00047 * The j-th column of A is stored in the j-th column of the 00048 * array AB as follows: 00049 * AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) 00050 * 00051 * On exit, details of the factorization: U is stored as an 00052 * upper triangular band matrix with KL+KU superdiagonals in 00053 * rows 1 to KL+KU+1, and the multipliers used during the 00054 * factorization are stored in rows KL+KU+2 to 2*KL+KU+1. 00055 * See below for further details. 00056 * 00057 * LDAB (input) INTEGER 00058 * The leading dimension of the array AB. LDAB >= 2*KL+KU+1. 00059 * 00060 * INFO (output) INTEGER 00061 * = 0: successful exit 00062 * < 0: if INFO = -i, the i-th argument had an illegal value 00063 * > 0: if INFO = +i, U(i,i) is exactly zero. The factorization 00064 * has been completed, but the factor U is exactly 00065 * singular, and division by zero will occur if it is used 00066 * to solve a system of equations. 00067 * 00068 * Further Details 00069 * =============== 00070 * 00071 * The band storage scheme is illustrated by the following example, when 00072 * M = N = 6, KL = 2, KU = 1: 00073 * 00074 * On entry: On exit: 00075 * 00076 * * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 00077 * a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 00078 * a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * 00079 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * 00080 * 00081 * Array elements marked * are not used by the routine. 00082 * 00083 * ===================================================================== 00084 * 00085 * .. Parameters .. 00086 DOUBLE PRECISION ONE, ZERO 00087 PARAMETER ( ONE = 1.0D+0 ) 00088 PARAMETER ( ZERO = 0.0D+0 ) 00089 INTEGER NBMAX, LDWORK 00090 PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) 00091 * .. 00092 * .. Local Scalars .. 00093 INTEGER I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP, 00094 $ JU, KM, KV, NB, NW 00095 * .. 00096 * .. Local Arrays .. 00097 DOUBLE PRECISION WORK13( LDWORK, NBMAX ), 00098 $ WORK31( LDWORK, NBMAX ) 00099 * .. 00100 * .. External Functions .. 00101 INTEGER ILAENV, ISAMAX 00102 EXTERNAL ILAENV, ISAMAX 00103 * .. 00104 * .. External Subroutines .. 00105 EXTERNAL DCOPY, DDBTF2, DGEMM, DGER, DSCAL, 00106 $ DSWAP, DTRSM, XERBLA 00107 * .. 00108 * .. Intrinsic Functions .. 00109 INTRINSIC MAX, MIN 00110 * .. 00111 * .. Executable Statements .. 00112 * 00113 * KV is the number of superdiagonals in the factor U 00114 * 00115 KV = KU 00116 * 00117 * Test the input parameters. 00118 * 00119 INFO = 0 00120 IF( M.LT.0 ) THEN 00121 INFO = -1 00122 ELSE IF( N.LT.0 ) THEN 00123 INFO = -2 00124 ELSE IF( KL.LT.0 ) THEN 00125 INFO = -3 00126 ELSE IF( KU.LT.0 ) THEN 00127 INFO = -4 00128 ELSE IF( LDAB.LT.MIN( MIN( KL+KV+1,M ),N ) ) THEN 00129 INFO = -6 00130 END IF 00131 IF( INFO.NE.0 ) THEN 00132 CALL XERBLA( 'DDBTRF', -INFO ) 00133 RETURN 00134 END IF 00135 * 00136 * Quick return if possible 00137 * 00138 IF( M.EQ.0 .OR. N.EQ.0 ) 00139 $ RETURN 00140 * 00141 * Determine the block size for this environment 00142 * 00143 NB = ILAENV( 1, 'DDBTRF', ' ', M, N, KL, KU ) 00144 * 00145 * The block size must not exceed the limit set by the size of the 00146 * local arrays WORK13 and WORK31. 00147 * 00148 NB = MIN( NB, NBMAX ) 00149 * 00150 IF( NB.LE.1 .OR. NB.GT.KL ) THEN 00151 * 00152 * Use unblocked code 00153 * 00154 CALL DDBTF2( M, N, KL, KU, AB, LDAB, INFO ) 00155 ELSE 00156 * 00157 * Use blocked code 00158 * 00159 * Zero the superdiagonal elements of the work array WORK13 00160 * 00161 DO 20 J = 1, NB 00162 DO 10 I = 1, J - 1 00163 WORK13( I, J ) = ZERO 00164 10 CONTINUE 00165 20 CONTINUE 00166 * 00167 * Zero the subdiagonal elements of the work array WORK31 00168 * 00169 DO 40 J = 1, NB 00170 DO 30 I = J + 1, NB 00171 WORK31( I, J ) = ZERO 00172 30 CONTINUE 00173 40 CONTINUE 00174 * 00175 * JU is the index of the last column affected by the current 00176 * stage of the factorization 00177 * 00178 JU = 1 00179 * 00180 DO 180 J = 1, MIN( M, N ), NB 00181 JB = MIN( NB, MIN( M, N )-J+1 ) 00182 * 00183 * The active part of the matrix is partitioned 00184 * 00185 * A11 A12 A13 00186 * A21 A22 A23 00187 * A31 A32 A33 00188 * 00189 * Here A11, A21 and A31 denote the current block of JB columns 00190 * which is about to be factorized. The number of rows in the 00191 * partitioning are JB, I2, I3 respectively, and the numbers 00192 * of columns are JB, J2, J3. The superdiagonal elements of A13 00193 * and the subdiagonal elements of A31 lie outside the band. 00194 * 00195 I2 = MIN( KL-JB, M-J-JB+1 ) 00196 I3 = MIN( JB, M-J-KL+1 ) 00197 * 00198 * J2 and J3 are computed after JU has been updated. 00199 * 00200 * Factorize the current block of JB columns 00201 * 00202 DO 80 JJ = J, J + JB - 1 00203 * 00204 * Find pivot and test for singularity. KM is the number of 00205 * subdiagonal elements in the current column. 00206 * 00207 KM = MIN( KL, M-JJ ) 00208 JP = 1 00209 IF( AB( KV+JP, JJ ).NE.ZERO ) THEN 00210 JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) 00211 * 00212 * Compute multipliers 00213 * 00214 CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), 00215 $ 1 ) 00216 * 00217 * Update trailing submatrix within the band and within 00218 * the current block. JM is the index of the last column 00219 * which needs to be updated. 00220 * 00221 JM = MIN( JU, J+JB-1 ) 00222 IF( JM.GT.JJ ) THEN 00223 CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, 00224 $ AB( KV, JJ+1 ), LDAB-1, 00225 $ AB( KV+1, JJ+1 ), LDAB-1 ) 00226 END IF 00227 END IF 00228 * 00229 * Copy current column of A31 into the work array WORK31 00230 * 00231 NW = MIN( JJ-J+1, I3 ) 00232 IF( NW.GT.0 ) 00233 $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, 00234 $ WORK31( 1, JJ-J+1 ), 1 ) 00235 80 CONTINUE 00236 IF( J+JB.LE.N ) THEN 00237 * 00238 * Apply the row interchanges to the other blocks. 00239 * 00240 J2 = MIN( JU-J+1, KV ) - JB 00241 J3 = MAX( 0, JU-J-KV+1 ) 00242 * 00243 * Update the relevant part of the trailing submatrix 00244 * 00245 IF( J2.GT.0 ) THEN 00246 * 00247 * Update A12 00248 * 00249 CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', 00250 $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, 00251 $ AB( KV+1-JB, J+JB ), LDAB-1 ) 00252 * 00253 IF( I2.GT.0 ) THEN 00254 * 00255 * Update A22 00256 * 00257 CALL DGEMM( 'No transpose', 'No transpose', I2, J2, 00258 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, 00259 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, 00260 $ AB( KV+1, J+JB ), LDAB-1 ) 00261 END IF 00262 * 00263 IF( I3.GT.0 ) THEN 00264 * 00265 * Update A32 00266 * 00267 CALL DGEMM( 'No transpose', 'No transpose', I3, J2, 00268 $ JB, -ONE, WORK31, LDWORK, 00269 $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, 00270 $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) 00271 END IF 00272 END IF 00273 * 00274 IF( J3.GT.0 ) THEN 00275 * 00276 * Copy the lower triangle of A13 into the work array 00277 * WORK13 00278 * 00279 DO 130 JJ = 1, J3 00280 DO 120 II = JJ, JB 00281 WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) 00282 120 CONTINUE 00283 130 CONTINUE 00284 * 00285 * Update A13 in the work array 00286 * 00287 CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', 00288 $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, 00289 $ WORK13, LDWORK ) 00290 * 00291 IF( I2.GT.0 ) THEN 00292 * 00293 * Update A23 00294 * 00295 CALL DGEMM( 'No transpose', 'No transpose', I2, J3, 00296 $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, 00297 $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), 00298 $ LDAB-1 ) 00299 END IF 00300 * 00301 IF( I3.GT.0 ) THEN 00302 * 00303 * Update A33 00304 * 00305 CALL DGEMM( 'No transpose', 'No transpose', I3, J3, 00306 $ JB, -ONE, WORK31, LDWORK, WORK13, 00307 $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) 00308 END IF 00309 * 00310 * Copy the lower triangle of A13 back into place 00311 * 00312 DO 150 JJ = 1, J3 00313 DO 140 II = JJ, JB 00314 AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) 00315 140 CONTINUE 00316 150 CONTINUE 00317 END IF 00318 ELSE 00319 END IF 00320 * 00321 * copy the upper triangle of A31 back into place 00322 * 00323 DO 170 JJ = J + JB - 1, J, -1 00324 * 00325 * Copy the current column of A31 back into place 00326 * 00327 NW = MIN( I3, JJ-J+1 ) 00328 IF( NW.GT.0 ) 00329 $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1, 00330 $ AB( KV+KL+1-JJ+J, JJ ), 1 ) 00331 170 CONTINUE 00332 180 CONTINUE 00333 END IF 00334 * 00335 RETURN 00336 * 00337 * End of DDBTRF 00338 * 00339 END