SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * * -- LAPACK routine (version 3.1) -- * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. * March 2008 * * .. Scalar Arguments .. INTEGER INFO, LDA, LWORK, M, N * .. * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) * .. * * Purpose * ======= * * DGEQRF computes a QR factorization of a real M-by-N matrix A: * A = Q * R. * * This is the left-looking Level 3 BLAS version of the algorithm. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * A (input/output) DOUBLE PRECISION array, dimension (LDA,N) * On entry, the M-by-N matrix A. * On exit, the elements on and above the diagonal of the array * contain the min(M,N)-by-N upper trapezoidal matrix R (R is * upper triangular if m >= n); the elements below the diagonal, * with the array TAU, represent the orthogonal matrix Q as a * product of min(m,n) elementary reflectors (see Further * Details). * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) * The scalar factors of the elementary reflectors (see Further * Details). * * WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) * On exit, if INFO = 0, WORK(1) returns the optimal LWORK. * * LWORK (input) INTEGER * * The dimension of the array WORK. The dimension can be divided into three parts. * * 1) The part for the triangular factor T. If the very last T is not bigger * than any of the rest, then this part is NB x ceiling(K/NB), otherwise, * NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T * * 2) The part for the very last T when T is bigger than any of the rest T. * The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB, * where K = min(M,N), NX is calculated by * NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) * * 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB) * * So LWORK = part1 + part2 + part3 * * If LWORK = -1, then a workspace query is assumed; the routine * only calculates the optimal size of the WORK array, returns * this value as the first entry of the WORK array, and no error * message related to LWORK is issued by XERBLA. * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -i, the i-th argument had an illegal value * * Further Details * =============== * * The matrix Q is represented as a product of elementary reflectors * * Q = H(1) H(2) . . . H(k), where k = min(m,n). * * Each H(i) has the form * * H(i) = I - tau * v * v' * * where tau is a real scalar, and v is a real vector with * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), * and tau in TAU(i). * * ===================================================================== * * .. Local Scalars .. LOGICAL LQUERY INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB, $ NBMIN, NX, LBWORK, NT, LLWORK * .. * .. External Subroutines .. EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. External Functions .. INTEGER ILAENV REAL SCEIL EXTERNAL ILAENV, SCEIL * .. * .. Executable Statements .. INFO = 0 NBMIN = 2 NX = 0 IWS = N K = MIN( M, N ) NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) IF( NB.GT.1 .AND. NB.LT.K ) THEN * * Determine when to cross over from blocked to unblocked code. * NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) END IF * * Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.: * * NB=3 2NB=6 K=10 * | | | * 1--2--3--4--5--6--7--8--9--10 * | \________/ * K-NX=5 NT=4 * * So here 4 x 4 is the last T stored in the workspace * NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB * * optimal workspace = space for dlarfb + space for normal T's + space for the last T * LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) IF ( NT.GT.NB ) THEN LBWORK = K-NT * * Optimal workspace for dlarfb = MAX(1,N)*NT * LWKOPT = (LBWORK+LLWORK)*NB WORK( 1 ) = (LWKOPT+NT*NT) ELSE LBWORK = SCEIL(REAL(K)/REAL(NB))*NB LWKOPT = (LBWORK+LLWORK-NB)*NB WORK( 1 ) = LWKOPT END IF * * Test the input arguments * LQUERY = ( LWORK.EQ.-1 ) IF( M.LT.0 ) THEN INFO = -1 ELSE IF( N.LT.0 ) THEN INFO = -2 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN INFO = -4 ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN INFO = -7 END IF IF( INFO.NE.0 ) THEN CALL XERBLA( 'DGEQRF', -INFO ) RETURN ELSE IF( LQUERY ) THEN RETURN END IF * * Quick return if possible * IF( K.EQ.0 ) THEN WORK( 1 ) = 1 RETURN END IF * IF( NB.GT.1 .AND. NB.LT.K ) THEN IF( NX.LT.K ) THEN * * Determine if workspace is large enough for blocked code. * IF ( NT.LE.NB ) THEN IWS = (LBWORK+LLWORK-NB)*NB ELSE IWS = (LBWORK+LLWORK)*NB+NT*NT END IF IF( LWORK.LT.IWS ) THEN * * Not enough workspace to use optimal NB: reduce NB and * determine the minimum value of NB. * IF ( NT.LE.NB ) THEN NB = LWORK / (LLWORK+(LBWORK-NB)) ELSE NB = (LWORK-NT*NT)/(LBWORK+LLWORK) END IF NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1, $ -1 ) ) END IF END IF END IF * IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN * * Use blocked code initially * DO 10 I = 1, K - NX, NB IB = MIN( K-I+1, NB ) * * Update the current column using old T's * DO 20 J = 1, I - NB, NB * * Apply H' to A(J:M,I:I+IB-1) from the left * CALL DLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', M-J+1, IB, NB, $ A( J, J ), LDA, WORK(J), LBWORK, $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), $ IB) 20 CONTINUE * * Compute the QR factorization of the current block * A(I:M,I:I+IB-1) * CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), $ WORK(LBWORK*NB+NT*NT+1), IINFO ) IF( I+IB.LE.N ) THEN * * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, $ A( I, I ), LDA, TAU( I ), $ WORK(I), LBWORK ) * END IF 10 CONTINUE ELSE I = 1 END IF * * Use unblocked code to factor the last or only block. * IF( I.LE.K ) THEN IF ( I .NE. 1 ) THEN DO 30 J = 1, I - NB, NB * * Apply H' to A(J:M,I:K) from the left * CALL DLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', M-J+1, K-I+1, NB, $ A( J, J ), LDA, WORK(J), LBWORK, $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), $ K-I+1) 30 CONTINUE CALL DGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), $ WORK(LBWORK*NB+NT*NT+1),IINFO ) ELSE * * Use unblocked code to factor the last or only block. * CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), $ WORK,IINFO ) END IF END IF * * Apply update to the column M+1:N when N > M * IF ( M.LT.N .AND. I.NE.1) THEN * * Form the last triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * IF ( NT .LE. NB ) THEN CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK ) ELSE CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, $ A( I, I ), LDA, TAU( I ), $ WORK(LBWORK*NB+1), NT ) END IF * * Apply H' to A(1:M,M+1:N) from the left * DO 40 J = 1, K-NX, NB IB = MIN( K-J+1, NB ) CALL DLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', M-J+1, N-M, IB, $ A( J, J ), LDA, WORK(J), LBWORK, $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), $ N-M) 40 CONTINUE IF ( NT.LE.NB ) THEN CALL DLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', M-J+1, N-M, K-J+1, $ A( J, J ), LDA, WORK(J), LBWORK, $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), $ N-M) ELSE CALL DLARFB( 'Left', 'Transpose', 'Forward', $ 'Columnwise', M-J+1, N-M, K-J+1, $ A( J, J ), LDA, $ WORK(LBWORK*NB+1), $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), $ N-M) END IF END IF WORK( 1 ) = IWS RETURN * * End of DGEQRF * END