subroutine dgetrf	(	integer	M,
		integer	N,
		double precision, dimension( lda, * )	A,
		integer	LDA,
		integer, dimension( * )	IPIV,
		integer	INFO
	)

DGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm

Purpose:

 DGETRF computes an LU factorization of a general M-by-N matrix A
 using partial pivoting with row interchanges.

 The factorization has the form
    A = P * L * U
 where P is a permutation matrix, L is lower triangular with unit
 diagonal elements (lower trapezoidal if m > n), and U is upper
 triangular (upper trapezoidal if m < n).

 This code implements an iterative version of Sivan Toledo's recursive
 LU algorithm[1].  For square matrices, this iterative versions should
 be within a factor of two of the optimum number of memory transfers.

 The pattern is as follows, with the large blocks of U being updated
 in one call to DTRSM, and the dotted lines denoting sections that
 have had all pending permutations applied:

  1 2 3 4 5 6 7 8
 +-+-+---+-------+------
 | |1|   |       |
 |.+-+ 2 |       |
 | | |   |       |
 |.|.+-+-+   4   |
 | | | |1|       |
 | | |.+-+       |
 | | | | |       |
 |.|.|.|.+-+-+---+  8
 | | | | | |1|   |
 | | | | |.+-+ 2 |
 | | | | | | |   |
 | | | | |.|.+-+-+
 | | | | | | | |1|
 | | | | | | |.+-+
 | | | | | | | | |
 |.|.|.|.|.|.|.|.+-----
 | | | | | | | | |

 The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
 the binary expansion of the current column.  Each Schur update is
 applied as soon as the necessary portion of U is available.

 [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
 Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
 1065-1081. http://dx.doi.org/10.1137/S0895479896297744

Parameters

[in]	M	M is INTEGER The number of rows of the matrix A. M >= 0.
[in]	N	N is INTEGER The number of columns of the matrix A. N >= 0.
[in,out]	A	A is DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = PLU; the unit diagonal elements of L are not stored.
[in]	LDA	LDA is INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	IPIV	IPIV is INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).
[out]	INFO	INFO is INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Date: November 2011

Definition at line 136 of file dgetrf.f.

 *
 *  -- LAPACK computational routine (version 3.X) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     November 2011
 *
 *     .. Scalar Arguments ..
       INTEGER            info, lda, m, n
 *     ..
 *     .. Array Arguments ..
       INTEGER            ipiv( * )
       DOUBLE PRECISION   a( lda, * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
       DOUBLE PRECISION   one, zero, negone
       parameter                ( one = 1.0d+0, zero = 0.0d+0 )
       parameter                ( negone = -1.0d+0 )
 *     ..
 *     .. Local Scalars ..
       DOUBLE PRECISION   sfmin, tmp
       INTEGER            i, j, jp, nstep, ntopiv, npived, kahead
       INTEGER            kstart, ipivstart, jpivstart, kcols
 *     ..
 *     .. External Functions ..
       DOUBLE PRECISION   dlamch
       INTEGER            idamax
       LOGICAL            disnan
       EXTERNAL           dlamch, idamax, disnan
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           dtrsm, dscal, xerbla, dlaswp
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          max, min, iand
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input parameters.
 *
       info = 0
       IF( m.LT.0 ) THEN
          info = -1
       ELSE IF( n.LT.0 ) THEN
          info = -2
       ELSE IF( lda.LT.max( 1, m ) ) THEN
          info = -4
       END IF
       IF( info.NE.0 ) THEN
          CALL xerbla( 'DGETRF', -info )
          RETURN
       END IF
 *
 *     Quick return if possible
 *
       IF( m.EQ.0 .OR. n.EQ.0 )
      $   RETURN
 *
 *     Compute machine safe minimum
 *
       sfmin = dlamch( 'S' )
 *
       nstep = min( m, n )
       DO j = 1, nstep
          kahead = iand( j, -j )
          kstart = j + 1 - kahead
          kcols = min( kahead, m-j )
 *
 *        Find pivot.
 *
          jp = j - 1 + idamax( m-j+1, a( j, j ), 1 )
          ipiv( j ) = jp
 
 *        Permute just this column.
          IF (jp .NE. j) THEN
             tmp = a( j, j )
             a( j, j ) = a( jp, j )
             a( jp, j ) = tmp
          END IF
 
 *        Apply pending permutations to L
          ntopiv = 1
          ipivstart = j
          jpivstart = j - ntopiv
          DO WHILE ( ntopiv .LT. kahead )
             CALL dlaswp( ntopiv, a( 1, jpivstart ), lda, ipivstart, j,
      $           ipiv, 1 )
             ipivstart = ipivstart - ntopiv;
             ntopiv = ntopiv * 2;
             jpivstart = jpivstart - ntopiv;
          END DO
 
 *        Permute U block to match L
          CALL dlaswp( kcols, a( 1,j+1 ), lda, kstart, j, ipiv, 1 )
 
 *        Factor the current column
          IF( a( j, j ).NE.zero .AND. .NOT.disnan( a( j, j ) ) ) THEN
                IF( abs(a( j, j )) .GE. sfmin ) THEN
                   CALL dscal( m-j, one / a( j, j ), a( j+1, j ), 1 )
                ELSE
                  DO i = 1, m-j
                     a( j+i, j ) = a( j+i, j ) / a( j, j )
                  END DO
                END IF
          ELSE IF( a( j,j ) .EQ. zero .AND. info .EQ. 0 ) THEN
             info = j
          END IF
 
 *        Solve for U block.
          CALL dtrsm( 'Left', 'Lower', 'No transpose', 'Unit', kahead,
      $        kcols, one, a( kstart, kstart ), lda,
      $        a( kstart, j+1 ), lda )
 *        Schur complement.
          CALL dgemm( 'No transpose', 'No transpose', m-j,
      $        kcols, kahead, negone, a( j+1, kstart ), lda,
      $        a( kstart, j+1 ), lda, one, a( j+1, j+1 ), lda )
       END DO
 
 *     Handle pivot permutations on the way out of the recursion
       npived = iand( nstep, -nstep )
       j = nstep - npived
       DO WHILE ( j .GT. 0 )
          ntopiv = iand( j, -j )
          CALL dlaswp( ntopiv, a( 1, j-ntopiv+1 ), lda, j+1, nstep,
      $        ipiv, 1 )
          j = j - ntopiv
       END DO
 
 *     If short and wide, handle the rest of the columns.
       IF ( m .LT. n ) THEN
          CALL dlaswp( n-m, a( 1, m+kcols+1 ), lda, 1, m, ipiv, 1 )
          CALL dtrsm( 'Left', 'Lower', 'No transpose', 'Unit', m,
      $        n-m, one, a, lda, a( 1,m+kcols+1 ), lda )
       END IF
 
       RETURN
 *
 *     End of DGETRF
 *

Here is the call graph for this function: