d3/d8c/dsgesv_8f_source.html

*> \brief <b> DSGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (mixed precision with iterative refinement)

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> Download DSGESV + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsgesv.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsgesv.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsgesv.f">

*> [TXT]</a>

*

*  Definition:

*  ===========

*

*       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,

*                          SWORK, ITER, INFO )

*

*       .. Scalar Arguments ..

*       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS

*       ..

*       .. Array Arguments ..

*       INTEGER            IPIV( * )

*       REAL               SWORK( * )

*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),

*      $                   X( LDX, * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> DSGESV computes the solution to a real system of linear equations

*>    A * X = B,

*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.

*>

*> DSGESV first attempts to factorize the matrix in SINGLE PRECISION

*> and use this factorization within an iterative refinement procedure

*> to produce a solution with DOUBLE PRECISION normwise backward error

*> quality (see below). If the approach fails the method switches to a

*> DOUBLE PRECISION factorization and solve.

*>

*> The iterative refinement is not going to be a winning strategy if

*> the ratio SINGLE PRECISION performance over DOUBLE PRECISION

*> performance is too small. A reasonable strategy should take the

*> number of right-hand sides and the size of the matrix into account.

*> This might be done with a call to ILAENV in the future. Up to now, we

*> always try iterative refinement.

*>

*> The iterative refinement process is stopped if

*>     ITER > ITERMAX

*> or for all the RHS we have:

*>     RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX

*> where

*>     o ITER is the number of the current iteration in the iterative

*>       refinement process

*>     o RNRM is the infinity-norm of the residual

*>     o XNRM is the infinity-norm of the solution

*>     o ANRM is the infinity-operator-norm of the matrix A

*>     o EPS is the machine epsilon returned by DLAMCH('Epsilon')

*> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00

*> respectively.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The number of linear equations, i.e., the order of the

*>          matrix A.  N >= 0.

*> \endverbatim

*>

*> \param[in] NRHS

*> \verbatim

*>          NRHS is INTEGER

*>          The number of right hand sides, i.e., the number of columns

*>          of the matrix B.  NRHS >= 0.

*> \endverbatim

*>

*> \param[in,out] A

*> \verbatim

*>          A is DOUBLE PRECISION array,

*>          dimension (LDA,N)

*>          On entry, the N-by-N coefficient matrix A.

*>          On exit, if iterative refinement has been successfully used

*>          (INFO = 0 and ITER >= 0, see description below), then A is

*>          unchanged, if double precision factorization has been used

*>          (INFO = 0 and ITER < 0, see description below), then the

*>          array A contains the factors L and U from the factorization

*>          A = P*L*U; the unit diagonal elements of L are not stored.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max(1,N).

*> \endverbatim

*>

*> \param[out] IPIV

*> \verbatim

*>          IPIV is INTEGER array, dimension (N)

*>          The pivot indices that define the permutation matrix P;

*>          row i of the matrix was interchanged with row IPIV(i).

*>          Corresponds either to the single precision factorization

*>          (if INFO = 0 and ITER >= 0) or the double precision

*>          factorization (if INFO = 0 and ITER < 0).

*> \endverbatim

*>

*> \param[in] B

*> \verbatim

*>          B is DOUBLE PRECISION array, dimension (LDB,NRHS)

*>          The N-by-NRHS right hand side matrix B.

*> \endverbatim

*>

*> \param[in] LDB

*> \verbatim

*>          LDB is INTEGER

*>          The leading dimension of the array B.  LDB >= max(1,N).

*> \endverbatim

*>

*> \param[out] X

*> \verbatim

*>          X is DOUBLE PRECISION array, dimension (LDX,NRHS)

*>          If INFO = 0, the N-by-NRHS solution matrix X.

*> \endverbatim

*>

*> \param[in] LDX

*> \verbatim

*>          LDX is INTEGER

*>          The leading dimension of the array X.  LDX >= max(1,N).

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is DOUBLE PRECISION array, dimension (N,NRHS)

*>          This array is used to hold the residual vectors.

*> \endverbatim

*>

*> \param[out] SWORK

*> \verbatim

*>          SWORK is REAL array, dimension (N*(N+NRHS))

*>          This array is used to use the single precision matrix and the

*>          right-hand sides or solutions in single precision.

*> \endverbatim

*>

*> \param[out] ITER

*> \verbatim

*>          ITER is INTEGER

*>          < 0: iterative refinement has failed, double precision

*>               factorization has been performed

*>               -1 : the routine fell back to full precision for

*>                    implementation- or machine-specific reasons

*>               -2 : narrowing the precision induced an overflow,

*>                    the routine fell back to full precision

*>               -3 : failure of SGETRF

*>               -31: stop the iterative refinement after the 30th

*>                    iterations

*>          > 0: iterative refinement has been successfully used.

*>               Returns the number of iterations

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit

*>          < 0:  if INFO = -i, the i-th argument had an illegal value

*>          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is

*>                exactly zero.  The factorization has been completed,

*>                but the factor U is exactly singular, so the solution

*>                could not be computed.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \ingroup gesv_mixed

*

*  =====================================================================


      SUBROUTINE dsgesv( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,

     $                   SWORK, ITER, INFO )

*

*  -- LAPACK driver routine --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

*     .. Scalar Arguments ..

      INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS

*     ..

*     .. Array Arguments ..

      INTEGER            IPIV( * )

      REAL               SWORK( * )

      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),

     $                   x( ldx, * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      LOGICAL            DOITREF

      parameter( doitref = .true. )

*

      INTEGER            ITERMAX

      parameter( itermax = 30 )

*

      DOUBLE PRECISION   BWDMAX

      parameter( bwdmax = 1.0e+00 )

*

      DOUBLE PRECISION   NEGONE, ONE

      parameter( negone = -1.0d+0, one = 1.0d+0 )

*

*     .. Local Scalars ..

      INTEGER            I, IITER, PTSA, PTSX

      DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM

*

*     .. External Subroutines ..

      EXTERNAL           daxpy, dgemm, dlacpy, dlag2s, dgetrf,

     $                   dgetrs,

     $                   sgetrf, sgetrs, slag2d, xerbla

*     ..

*     .. External Functions ..

      INTEGER            IDAMAX

      DOUBLE PRECISION   DLAMCH, DLANGE

      EXTERNAL           idamax, dlamch, dlange

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, dble, max, sqrt

*     ..

*     .. Executable Statements ..

*

      info = 0

      iter = 0

*

*     Test the input parameters.

*

      IF( n.LT.0 ) THEN

         info = -1

      ELSE IF( nrhs.LT.0 ) THEN

         info = -2

      ELSE IF( lda.LT.max( 1, n ) ) THEN

         info = -4

      ELSE IF( ldb.LT.max( 1, n ) ) THEN

         info = -7

      ELSE IF( ldx.LT.max( 1, n ) ) THEN

         info = -9

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'DSGESV', -info )

         RETURN

      END IF

*

*     Quick return if (N.EQ.0).

*

      IF( n.EQ.0 )

     $   RETURN

*

*     Skip single precision iterative refinement if a priori slower

*     than double precision factorization.

*

      IF( .NOT.doitref ) THEN

         iter = -1

         GO TO 40

      END IF

*

*     Compute some constants.

*

      anrm = dlange( 'I', n, n, a, lda, work )

      eps = dlamch( 'Epsilon' )

      cte = anrm*eps*sqrt( dble( n ) )*bwdmax

*

*     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.

*

      ptsa = 1

      ptsx = ptsa + n*n

*

*     Convert B from double precision to single precision and store the

*     result in SX.

*

      CALL dlag2s( n, nrhs, b, ldb, swork( ptsx ), n, info )

*

      IF( info.NE.0 ) THEN

         iter = -2

         GO TO 40

      END IF

*

*     Convert A from double precision to single precision and store the

*     result in SA.

*

      CALL dlag2s( n, n, a, lda, swork( ptsa ), n, info )

*

      IF( info.NE.0 ) THEN

         iter = -2

         GO TO 40

      END IF

*

*     Compute the LU factorization of SA.

*

      CALL sgetrf( n, n, swork( ptsa ), n, ipiv, info )

*

      IF( info.NE.0 ) THEN

         iter = -3

         GO TO 40

      END IF

*

*     Solve the system SA*SX = SB.

*

      CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n, ipiv,

     $             swork( ptsx ), n, info )

*

*     Convert SX back to double precision

*

      CALL slag2d( n, nrhs, swork( ptsx ), n, x, ldx, info )

*

*     Compute R = B - AX (R is WORK).

*

      CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )

*

      CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n, negone,

     $            a,

     $            lda, x, ldx, one, work, n )

*

*     Check whether the NRHS normwise backward errors satisfy the

*     stopping criterion. If yes, set ITER=0 and return.

*

      DO i = 1, nrhs

         xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )

         rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )

         IF( rnrm.GT.xnrm*cte )

     $      GO TO 10

      END DO

*

*     If we are here, the NRHS normwise backward errors satisfy the

*     stopping criterion. We are good to exit.

*

      iter = 0

      RETURN

*

   10 CONTINUE

*

      DO 30 iiter = 1, itermax

*

*        Convert R (in WORK) from double precision to single precision

*        and store the result in SX.

*

         CALL dlag2s( n, nrhs, work, n, swork( ptsx ), n, info )

*

         IF( info.NE.0 ) THEN

            iter = -2

            GO TO 40

         END IF

*

*        Solve the system SA*SX = SR.

*

         CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n,

     $                ipiv,

     $                swork( ptsx ), n, info )

*

*        Convert SX back to double precision and update the current

*        iterate.

*

         CALL slag2d( n, nrhs, swork( ptsx ), n, work, n, info )

*

         DO i = 1, nrhs

            CALL daxpy( n, one, work( 1, i ), 1, x( 1, i ), 1 )

         END DO

*

*        Compute R = B - AX (R is WORK).

*

         CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )

*

         CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n,

     $               negone,

     $               a, lda, x, ldx, one, work, n )

*

*        Check whether the NRHS normwise backward errors satisfy the

*        stopping criterion. If yes, set ITER=IITER>0 and return.

*

         DO i = 1, nrhs

            xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )

            rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )

            IF( rnrm.GT.xnrm*cte )

     $         GO TO 20

         END DO

*

*        If we are here, the NRHS normwise backward errors satisfy the

*        stopping criterion, we are good to exit.

*

         iter = iiter

*

         RETURN

*

   20    CONTINUE

*

   30 CONTINUE

*

*     If we are at this place of the code, this is because we have

*     performed ITER=ITERMAX iterations and never satisfied the

*     stopping criterion, set up the ITER flag accordingly and follow up

*     on double precision routine.

*

      iter = -itermax - 1

*

   40 CONTINUE

*

*     Single-precision iterative refinement failed to converge to a

*     satisfactory solution, so we resort to double precision.

*

      CALL dgetrf( n, n, a, lda, ipiv, info )

*

      IF( info.NE.0 )

     $   RETURN

*

      CALL dlacpy( 'All', n, nrhs, b, ldb, x, ldx )

      CALL dgetrs( 'No transpose', n, nrhs, a, lda, ipiv, x, ldx,

     $             info )

*

      RETURN

*

*     End of DSGESV

*


      END

xerbla
subroutine xerbla(srname, info)
Definition cblat2.f:3285

dlag2s
subroutine dlag2s(m, n, a, lda, sa, ldsa, info)
DLAG2S converts a double precision matrix to a single precision matrix.
Definition dlag2s.f:106

slag2d
subroutine slag2d(m, n, sa, ldsa, a, lda, info)
SLAG2D converts a single precision matrix to a double precision matrix.
Definition slag2d.f:102

daxpy
subroutine daxpy(n, da, dx, incx, dy, incy)
DAXPY
Definition daxpy.f:89

dgemm
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188

dsgesv
subroutine dsgesv(n, nrhs, a, lda, ipiv, b, ldb, x, ldx, work, swork, iter, info)
DSGESV computes the solution to system of linear equations A * X = B for GE matrices (mixed precision...
Definition dsgesv.f:193

sgetrf
subroutine sgetrf(m, n, a, lda, ipiv, info)
SGETRF
Definition sgetrf.f:106

dgetrf
subroutine dgetrf(m, n, a, lda, ipiv, info)
DGETRF
Definition dgetrf.f:106

sgetrs
subroutine sgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info)
SGETRS
Definition sgetrs.f:119

dgetrs
subroutine dgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info)
DGETRS
Definition dgetrs.f:119

dlacpy
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:101