SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) IMPLICIT NONE * * -- LAPACK routine (version 3.X) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * May 2008 * * .. Scalar Arguments .. INTEGER INFO, LDA, M, N * .. * .. Array Arguments .. INTEGER IPIV( * ) DOUBLE PRECISION A( LDA, * ) * .. * * Purpose * ======= * * DGETRF computes an LU factorization of a general M-by-N matrix A * using partial pivoting with row interchanges. * * The factorization has the form * A = P * L * U * where P is a permutation matrix, L is lower triangular with unit * diagonal elements (lower trapezoidal if m > n), and U is upper * triangular (upper trapezoidal if m < n). * * This code implements an iterative version of Sivan Toledo's recursive * LU algorithm[1]. For square matrices, this iterative versions should * be within a factor of two of the optimum number of memory transfers. * * The pattern is as follows, with the large blocks of U being updated * in one call to DTRSM, and the dotted lines denoting sections that * have had all pending permutations applied: * * 1 2 3 4 5 6 7 8 * +-+-+---+-------+------ * | |1| | | * |.+-+ 2 | | * | | | | | * |.|.+-+-+ 4 | * | | | |1| | * | | |.+-+ | * | | | | | | * |.|.|.|.+-+-+---+ 8 * | | | | | |1| | * | | | | |.+-+ 2 | * | | | | | | | | * | | | | |.|.+-+-+ * | | | | | | | |1| * | | | | | | |.+-+ * | | | | | | | | | * |.|.|.|.|.|.|.|.+----- * | | | | | | | | | * * The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in * the binary expansion of the current column. Each Schur update is * applied as soon as the necessary portion of U is available. * * [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with * Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), * 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * A (input/output) DOUBLE PRECISION array, dimension (LDA,N) * On entry, the M-by-N matrix to be factored. * On exit, the factors L and U from the factorization * A = P*L*U; the unit diagonal elements of L are not stored. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * IPIV (output) INTEGER array, dimension (min(M,N)) * The pivot indices; for 1 <= i <= min(M,N), row i of the * matrix was interchanged with row IPIV(i). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * > 0: if INFO = i, U(i,i) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * ===================================================================== * * .. Parameters .. DOUBLE PRECISION ONE, ZERO, NEGONE PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) PARAMETER ( NEGONE = -1.0D+0 ) * .. * .. Local Scalars .. DOUBLE PRECISION SFMIN, TMP INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS * .. * .. External Functions .. DOUBLE PRECISION DLAMCH INTEGER IDAMAX LOGICAL DISNAN EXTERNAL DLAMCH, IDAMAX, DISNAN * .. * .. External Subroutines .. EXTERNAL DTRSM, DSCAL, XERBLA, DLASWP * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN, IAND * .. * .. Executable Statements .. * * Test the input parameters. * INFO = 0 IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -4 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DGETRF', -INFO ) RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN * * Compute machine safe minimum * SFMIN = DLAMCH( 'S' ) * NSTEP = MIN( M, N ) DO J = 1, NSTEP KAHEAD = IAND( J, -J ) KSTART = J + 1 - KAHEAD KCOLS = MIN( KAHEAD, M-J ) * * Find pivot. * JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 ) IPIV( J ) = JP ! Permute just this column. IF (JP .NE. J) THEN TMP = A( J, J ) A( J, J ) = A( JP, J ) A( JP, J ) = TMP END IF ! Apply pending permutations to L NTOPIV = 1 IPIVSTART = J JPIVSTART = J - NTOPIV DO WHILE ( NTOPIV .LT. KAHEAD ) CALL DLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J, $ IPIV, 1 ) IPIVSTART = IPIVSTART - NTOPIV; NTOPIV = NTOPIV * 2; JPIVSTART = JPIVSTART - NTOPIV; END DO ! Permute U block to match L CALL DLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 ) ! Factor the current column IF( A( J, J ).NE.ZERO .AND. .NOT.DISNAN( A( J, J ) ) ) THEN IF( ABS(A( J, J )) .GE. SFMIN ) THEN CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) ELSE DO I = 1, M-J A( J+I, J ) = A( J+I, J ) / A( J, J ) END DO END IF ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN INFO = J END IF ! Solve for U block. CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD, $ KCOLS, ONE, A( KSTART, KSTART ), LDA, $ A( KSTART, J+1 ), LDA ) ! Schur complement. CALL DGEMM( 'No transpose', 'No transpose', M-J, $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA, $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA ) END DO ! Handle pivot permutations on the way out of the recursion NPIVED = IAND( NSTEP, -NSTEP ) J = NSTEP - NPIVED DO WHILE ( J .GT. 0 ) NTOPIV = IAND( J, -J ) CALL DLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP, $ IPIV, 1 ) J = J - NTOPIV END DO ! If short and wide, handle the rest of the columns. IF ( M .LT. N ) THEN CALL DLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 ) CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M, $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA ) END IF RETURN * * End of DGETRF * END