LAPACK  3.8.0
LAPACK: Linear Algebra PACKage

◆ dsgesv()

subroutine dsgesv ( integer  N,
integer  NRHS,
double precision, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
double precision, dimension( ldb, * )  B,
integer  LDB,
double precision, dimension( ldx, * )  X,
integer  LDX,
double precision, dimension( n, * )  WORK,
real, dimension( * )  SWORK,
integer  ITER,
integer  INFO 
)

DSGESV computes the solution to system of linear equations A * X = B for GE matrices (mixed precision with iterative refinement)

Download DSGESV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 DSGESV computes the solution to a real system of linear equations
    A * X = B,
 where A is an N-by-N matrix and X and B are N-by-NRHS matrices.

 DSGESV first attempts to factorize the matrix in SINGLE PRECISION
 and use this factorization within an iterative refinement procedure
 to produce a solution with DOUBLE PRECISION normwise backward error
 quality (see below). If the approach fails the method switches to a
 DOUBLE PRECISION factorization and solve.

 The iterative refinement is not going to be a winning strategy if
 the ratio SINGLE PRECISION performance over DOUBLE PRECISION
 performance is too small. A reasonable strategy should take the
 number of right-hand sides and the size of the matrix into account.
 This might be done with a call to ILAENV in the future. Up to now, we
 always try iterative refinement.

 The iterative refinement process is stopped if
     ITER > ITERMAX
 or for all the RHS we have:
     RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
 where
     o ITER is the number of the current iteration in the iterative
       refinement process
     o RNRM is the infinity-norm of the residual
     o XNRM is the infinity-norm of the solution
     o ANRM is the infinity-operator-norm of the matrix A
     o EPS is the machine epsilon returned by DLAMCH('Epsilon')
 The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
 respectively.
Parameters
[in]N
          N is INTEGER
          The number of linear equations, i.e., the order of the
          matrix A.  N >= 0.
[in]NRHS
          NRHS is INTEGER
          The number of right hand sides, i.e., the number of columns
          of the matrix B.  NRHS >= 0.
[in,out]A
          A is DOUBLE PRECISION array,
          dimension (LDA,N)
          On entry, the N-by-N coefficient matrix A.
          On exit, if iterative refinement has been successfully used
          (INFO.EQ.0 and ITER.GE.0, see description below), then A is
          unchanged, if double precision factorization has been used
          (INFO.EQ.0 and ITER.LT.0, see description below), then the
          array A contains the factors L and U from the factorization
          A = P*L*U; the unit diagonal elements of L are not stored.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          The pivot indices that define the permutation matrix P;
          row i of the matrix was interchanged with row IPIV(i).
          Corresponds either to the single precision factorization
          (if INFO.EQ.0 and ITER.GE.0) or the double precision
          factorization (if INFO.EQ.0 and ITER.LT.0).
[in]B
          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
          The N-by-NRHS right hand side matrix B.
[in]LDB
          LDB is INTEGER
          The leading dimension of the array B.  LDB >= max(1,N).
[out]X
          X is DOUBLE PRECISION array, dimension (LDX,NRHS)
          If INFO = 0, the N-by-NRHS solution matrix X.
[in]LDX
          LDX is INTEGER
          The leading dimension of the array X.  LDX >= max(1,N).
[out]WORK
          WORK is DOUBLE PRECISION array, dimension (N,NRHS)
          This array is used to hold the residual vectors.
[out]SWORK
          SWORK is REAL array, dimension (N*(N+NRHS))
          This array is used to use the single precision matrix and the
          right-hand sides or solutions in single precision.
[out]ITER
          ITER is INTEGER
          < 0: iterative refinement has failed, double precision
               factorization has been performed
               -1 : the routine fell back to full precision for
                    implementation- or machine-specific reasons
               -2 : narrowing the precision induced an overflow,
                    the routine fell back to full precision
               -3 : failure of SGETRF
               -31: stop the iterative refinement after the 30th
                    iterations
          > 0: iterative refinement has been successfully used.
               Returns the number of iterations
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
                exactly zero.  The factorization has been completed,
                but the factor U is exactly singular, so the solution
                could not be computed.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
June 2016

Definition at line 197 of file dsgesv.f.

197 *
198 * -- LAPACK driver routine (version 3.8.0) --
199 * -- LAPACK is a software package provided by Univ. of Tennessee, --
200 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
201 * June 2016
202 *
203 * .. Scalar Arguments ..
204  INTEGER info, iter, lda, ldb, ldx, n, nrhs
205 * ..
206 * .. Array Arguments ..
207  INTEGER ipiv( * )
208  REAL swork( * )
209  DOUBLE PRECISION a( lda, * ), b( ldb, * ), work( n, * ),
210  $ x( ldx, * )
211 * ..
212 *
213 * =====================================================================
214 *
215 * .. Parameters ..
216  LOGICAL doitref
217  parameter( doitref = .true. )
218 *
219  INTEGER itermax
220  parameter( itermax = 30 )
221 *
222  DOUBLE PRECISION bwdmax
223  parameter( bwdmax = 1.0e+00 )
224 *
225  DOUBLE PRECISION negone, one
226  parameter( negone = -1.0d+0, one = 1.0d+0 )
227 *
228 * .. Local Scalars ..
229  INTEGER i, iiter, ptsa, ptsx
230  DOUBLE PRECISION anrm, cte, eps, rnrm, xnrm
231 *
232 * .. External Subroutines ..
233  EXTERNAL daxpy, dgemm, dlacpy, dlag2s, dgetrf, dgetrs,
235 * ..
236 * .. External Functions ..
237  INTEGER idamax
238  DOUBLE PRECISION dlamch, dlange
239  EXTERNAL idamax, dlamch, dlange
240 * ..
241 * .. Intrinsic Functions ..
242  INTRINSIC abs, dble, max, sqrt
243 * ..
244 * .. Executable Statements ..
245 *
246  info = 0
247  iter = 0
248 *
249 * Test the input parameters.
250 *
251  IF( n.LT.0 ) THEN
252  info = -1
253  ELSE IF( nrhs.LT.0 ) THEN
254  info = -2
255  ELSE IF( lda.LT.max( 1, n ) ) THEN
256  info = -4
257  ELSE IF( ldb.LT.max( 1, n ) ) THEN
258  info = -7
259  ELSE IF( ldx.LT.max( 1, n ) ) THEN
260  info = -9
261  END IF
262  IF( info.NE.0 ) THEN
263  CALL xerbla( 'DSGESV', -info )
264  RETURN
265  END IF
266 *
267 * Quick return if (N.EQ.0).
268 *
269  IF( n.EQ.0 )
270  $ RETURN
271 *
272 * Skip single precision iterative refinement if a priori slower
273 * than double precision factorization.
274 *
275  IF( .NOT.doitref ) THEN
276  iter = -1
277  GO TO 40
278  END IF
279 *
280 * Compute some constants.
281 *
282  anrm = dlange( 'I', n, n, a, lda, work )
283  eps = dlamch( 'Epsilon' )
284  cte = anrm*eps*sqrt( dble( n ) )*bwdmax
285 *
286 * Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
287 *
288  ptsa = 1
289  ptsx = ptsa + n*n
290 *
291 * Convert B from double precision to single precision and store the
292 * result in SX.
293 *
294  CALL dlag2s( n, nrhs, b, ldb, swork( ptsx ), n, info )
295 *
296  IF( info.NE.0 ) THEN
297  iter = -2
298  GO TO 40
299  END IF
300 *
301 * Convert A from double precision to single precision and store the
302 * result in SA.
303 *
304  CALL dlag2s( n, n, a, lda, swork( ptsa ), n, info )
305 *
306  IF( info.NE.0 ) THEN
307  iter = -2
308  GO TO 40
309  END IF
310 *
311 * Compute the LU factorization of SA.
312 *
313  CALL sgetrf( n, n, swork( ptsa ), n, ipiv, info )
314 *
315  IF( info.NE.0 ) THEN
316  iter = -3
317  GO TO 40
318  END IF
319 *
320 * Solve the system SA*SX = SB.
321 *
322  CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n, ipiv,
323  $ swork( ptsx ), n, info )
324 *
325 * Convert SX back to double precision
326 *
327  CALL slag2d( n, nrhs, swork( ptsx ), n, x, ldx, info )
328 *
329 * Compute R = B - AX (R is WORK).
330 *
331  CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )
332 *
333  CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n, negone, a,
334  $ lda, x, ldx, one, work, n )
335 *
336 * Check whether the NRHS normwise backward errors satisfy the
337 * stopping criterion. If yes, set ITER=0 and return.
338 *
339  DO i = 1, nrhs
340  xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )
341  rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )
342  IF( rnrm.GT.xnrm*cte )
343  $ GO TO 10
344  END DO
345 *
346 * If we are here, the NRHS normwise backward errors satisfy the
347 * stopping criterion. We are good to exit.
348 *
349  iter = 0
350  RETURN
351 *
352  10 CONTINUE
353 *
354  DO 30 iiter = 1, itermax
355 *
356 * Convert R (in WORK) from double precision to single precision
357 * and store the result in SX.
358 *
359  CALL dlag2s( n, nrhs, work, n, swork( ptsx ), n, info )
360 *
361  IF( info.NE.0 ) THEN
362  iter = -2
363  GO TO 40
364  END IF
365 *
366 * Solve the system SA*SX = SR.
367 *
368  CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n, ipiv,
369  $ swork( ptsx ), n, info )
370 *
371 * Convert SX back to double precision and update the current
372 * iterate.
373 *
374  CALL slag2d( n, nrhs, swork( ptsx ), n, work, n, info )
375 *
376  DO i = 1, nrhs
377  CALL daxpy( n, one, work( 1, i ), 1, x( 1, i ), 1 )
378  END DO
379 *
380 * Compute R = B - AX (R is WORK).
381 *
382  CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )
383 *
384  CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n, negone,
385  $ a, lda, x, ldx, one, work, n )
386 *
387 * Check whether the NRHS normwise backward errors satisfy the
388 * stopping criterion. If yes, set ITER=IITER>0 and return.
389 *
390  DO i = 1, nrhs
391  xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )
392  rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )
393  IF( rnrm.GT.xnrm*cte )
394  $ GO TO 20
395  END DO
396 *
397 * If we are here, the NRHS normwise backward errors satisfy the
398 * stopping criterion, we are good to exit.
399 *
400  iter = iiter
401 *
402  RETURN
403 *
404  20 CONTINUE
405 *
406  30 CONTINUE
407 *
408 * If we are at this place of the code, this is because we have
409 * performed ITER=ITERMAX iterations and never satisified the
410 * stopping criterion, set up the ITER flag accordingly and follow up
411 * on double precision routine.
412 *
413  iter = -itermax - 1
414 *
415  40 CONTINUE
416 *
417 * Single-precision iterative refinement failed to converge to a
418 * satisfactory solution, so we resort to double precision.
419 *
420  CALL dgetrf( n, n, a, lda, ipiv, info )
421 *
422  IF( info.NE.0 )
423  $ RETURN
424 *
425  CALL dlacpy( 'All', n, nrhs, b, ldb, x, ldx )
426  CALL dgetrs( 'No transpose', n, nrhs, a, lda, ipiv, x, ldx,
427  $ info )
428 *
429  RETURN
430 *
431 * End of DSGESV.
432 *
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105
double precision function dlamch(CMACH)
DLAMCH
Definition: dlamch.f:65
subroutine sgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
SGETRS
Definition: sgetrs.f:123
subroutine dgetrf(M, N, A, LDA, IPIV, INFO)
DGETRF
Definition: dgetrf.f:110
integer function idamax(N, DX, INCX)
IDAMAX
Definition: idamax.f:73
subroutine daxpy(N, DA, DX, INCX, DY, INCY)
DAXPY
Definition: daxpy.f:91
subroutine dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
DGETRS
Definition: dgetrs.f:123
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
subroutine dlag2s(M, N, A, LDA, SA, LDSA, INFO)
DLAG2S converts a double precision matrix to a single precision matrix.
Definition: dlag2s.f:110
subroutine slag2d(M, N, SA, LDSA, A, LDA, INFO)
SLAG2D converts a single precision matrix to a double precision matrix.
Definition: slag2d.f:106
double precision function dlange(NORM, M, N, A, LDA, WORK)
DLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition: dlange.f:116
subroutine sgetrf(M, N, A, LDA, IPIV, INFO)
SGETRF
Definition: sgetrf.f:110
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
Here is the call graph for this function:
Here is the caller graph for this function: