LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ dsgesv()

subroutine dsgesv ( integer  n,
integer  nrhs,
double precision, dimension( lda, * )  a,
integer  lda,
integer, dimension( * )  ipiv,
double precision, dimension( ldb, * )  b,
integer  ldb,
double precision, dimension( ldx, * )  x,
integer  ldx,
double precision, dimension( n, * )  work,
real, dimension( * )  swork,
integer  iter,
integer  info 
)

DSGESV computes the solution to system of linear equations A * X = B for GE matrices (mixed precision with iterative refinement)

Download DSGESV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 DSGESV computes the solution to a real system of linear equations
    A * X = B,
 where A is an N-by-N matrix and X and B are N-by-NRHS matrices.

 DSGESV first attempts to factorize the matrix in SINGLE PRECISION
 and use this factorization within an iterative refinement procedure
 to produce a solution with DOUBLE PRECISION normwise backward error
 quality (see below). If the approach fails the method switches to a
 DOUBLE PRECISION factorization and solve.

 The iterative refinement is not going to be a winning strategy if
 the ratio SINGLE PRECISION performance over DOUBLE PRECISION
 performance is too small. A reasonable strategy should take the
 number of right-hand sides and the size of the matrix into account.
 This might be done with a call to ILAENV in the future. Up to now, we
 always try iterative refinement.

 The iterative refinement process is stopped if
     ITER > ITERMAX
 or for all the RHS we have:
     RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
 where
     o ITER is the number of the current iteration in the iterative
       refinement process
     o RNRM is the infinity-norm of the residual
     o XNRM is the infinity-norm of the solution
     o ANRM is the infinity-operator-norm of the matrix A
     o EPS is the machine epsilon returned by DLAMCH('Epsilon')
 The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
 respectively.
Parameters
[in]N
          N is INTEGER
          The number of linear equations, i.e., the order of the
          matrix A.  N >= 0.
[in]NRHS
          NRHS is INTEGER
          The number of right hand sides, i.e., the number of columns
          of the matrix B.  NRHS >= 0.
[in,out]A
          A is DOUBLE PRECISION array,
          dimension (LDA,N)
          On entry, the N-by-N coefficient matrix A.
          On exit, if iterative refinement has been successfully used
          (INFO = 0 and ITER >= 0, see description below), then A is
          unchanged, if double precision factorization has been used
          (INFO = 0 and ITER < 0, see description below), then the
          array A contains the factors L and U from the factorization
          A = P*L*U; the unit diagonal elements of L are not stored.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          The pivot indices that define the permutation matrix P;
          row i of the matrix was interchanged with row IPIV(i).
          Corresponds either to the single precision factorization
          (if INFO = 0 and ITER >= 0) or the double precision
          factorization (if INFO = 0 and ITER < 0).
[in]B
          B is DOUBLE PRECISION array, dimension (LDB,NRHS)
          The N-by-NRHS right hand side matrix B.
[in]LDB
          LDB is INTEGER
          The leading dimension of the array B.  LDB >= max(1,N).
[out]X
          X is DOUBLE PRECISION array, dimension (LDX,NRHS)
          If INFO = 0, the N-by-NRHS solution matrix X.
[in]LDX
          LDX is INTEGER
          The leading dimension of the array X.  LDX >= max(1,N).
[out]WORK
          WORK is DOUBLE PRECISION array, dimension (N,NRHS)
          This array is used to hold the residual vectors.
[out]SWORK
          SWORK is REAL array, dimension (N*(N+NRHS))
          This array is used to use the single precision matrix and the
          right-hand sides or solutions in single precision.
[out]ITER
          ITER is INTEGER
          < 0: iterative refinement has failed, double precision
               factorization has been performed
               -1 : the routine fell back to full precision for
                    implementation- or machine-specific reasons
               -2 : narrowing the precision induced an overflow,
                    the routine fell back to full precision
               -3 : failure of SGETRF
               -31: stop the iterative refinement after the 30th
                    iterations
          > 0: iterative refinement has been successfully used.
               Returns the number of iterations
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
                exactly zero.  The factorization has been completed,
                but the factor U is exactly singular, so the solution
                could not be computed.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 193 of file dsgesv.f.

195*
196* -- LAPACK driver routine --
197* -- LAPACK is a software package provided by Univ. of Tennessee, --
198* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
199*
200* .. Scalar Arguments ..
201 INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS
202* ..
203* .. Array Arguments ..
204 INTEGER IPIV( * )
205 REAL SWORK( * )
206 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ),
207 $ X( LDX, * )
208* ..
209*
210* =====================================================================
211*
212* .. Parameters ..
213 LOGICAL DOITREF
214 parameter( doitref = .true. )
215*
216 INTEGER ITERMAX
217 parameter( itermax = 30 )
218*
219 DOUBLE PRECISION BWDMAX
220 parameter( bwdmax = 1.0e+00 )
221*
222 DOUBLE PRECISION NEGONE, ONE
223 parameter( negone = -1.0d+0, one = 1.0d+0 )
224*
225* .. Local Scalars ..
226 INTEGER I, IITER, PTSA, PTSX
227 DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM
228*
229* .. External Subroutines ..
230 EXTERNAL daxpy, dgemm, dlacpy, dlag2s, dgetrf, dgetrs,
232* ..
233* .. External Functions ..
234 INTEGER IDAMAX
235 DOUBLE PRECISION DLAMCH, DLANGE
236 EXTERNAL idamax, dlamch, dlange
237* ..
238* .. Intrinsic Functions ..
239 INTRINSIC abs, dble, max, sqrt
240* ..
241* .. Executable Statements ..
242*
243 info = 0
244 iter = 0
245*
246* Test the input parameters.
247*
248 IF( n.LT.0 ) THEN
249 info = -1
250 ELSE IF( nrhs.LT.0 ) THEN
251 info = -2
252 ELSE IF( lda.LT.max( 1, n ) ) THEN
253 info = -4
254 ELSE IF( ldb.LT.max( 1, n ) ) THEN
255 info = -7
256 ELSE IF( ldx.LT.max( 1, n ) ) THEN
257 info = -9
258 END IF
259 IF( info.NE.0 ) THEN
260 CALL xerbla( 'DSGESV', -info )
261 RETURN
262 END IF
263*
264* Quick return if (N.EQ.0).
265*
266 IF( n.EQ.0 )
267 $ RETURN
268*
269* Skip single precision iterative refinement if a priori slower
270* than double precision factorization.
271*
272 IF( .NOT.doitref ) THEN
273 iter = -1
274 GO TO 40
275 END IF
276*
277* Compute some constants.
278*
279 anrm = dlange( 'I', n, n, a, lda, work )
280 eps = dlamch( 'Epsilon' )
281 cte = anrm*eps*sqrt( dble( n ) )*bwdmax
282*
283* Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
284*
285 ptsa = 1
286 ptsx = ptsa + n*n
287*
288* Convert B from double precision to single precision and store the
289* result in SX.
290*
291 CALL dlag2s( n, nrhs, b, ldb, swork( ptsx ), n, info )
292*
293 IF( info.NE.0 ) THEN
294 iter = -2
295 GO TO 40
296 END IF
297*
298* Convert A from double precision to single precision and store the
299* result in SA.
300*
301 CALL dlag2s( n, n, a, lda, swork( ptsa ), n, info )
302*
303 IF( info.NE.0 ) THEN
304 iter = -2
305 GO TO 40
306 END IF
307*
308* Compute the LU factorization of SA.
309*
310 CALL sgetrf( n, n, swork( ptsa ), n, ipiv, info )
311*
312 IF( info.NE.0 ) THEN
313 iter = -3
314 GO TO 40
315 END IF
316*
317* Solve the system SA*SX = SB.
318*
319 CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n, ipiv,
320 $ swork( ptsx ), n, info )
321*
322* Convert SX back to double precision
323*
324 CALL slag2d( n, nrhs, swork( ptsx ), n, x, ldx, info )
325*
326* Compute R = B - AX (R is WORK).
327*
328 CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )
329*
330 CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n, negone, a,
331 $ lda, x, ldx, one, work, n )
332*
333* Check whether the NRHS normwise backward errors satisfy the
334* stopping criterion. If yes, set ITER=0 and return.
335*
336 DO i = 1, nrhs
337 xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )
338 rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )
339 IF( rnrm.GT.xnrm*cte )
340 $ GO TO 10
341 END DO
342*
343* If we are here, the NRHS normwise backward errors satisfy the
344* stopping criterion. We are good to exit.
345*
346 iter = 0
347 RETURN
348*
349 10 CONTINUE
350*
351 DO 30 iiter = 1, itermax
352*
353* Convert R (in WORK) from double precision to single precision
354* and store the result in SX.
355*
356 CALL dlag2s( n, nrhs, work, n, swork( ptsx ), n, info )
357*
358 IF( info.NE.0 ) THEN
359 iter = -2
360 GO TO 40
361 END IF
362*
363* Solve the system SA*SX = SR.
364*
365 CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n, ipiv,
366 $ swork( ptsx ), n, info )
367*
368* Convert SX back to double precision and update the current
369* iterate.
370*
371 CALL slag2d( n, nrhs, swork( ptsx ), n, work, n, info )
372*
373 DO i = 1, nrhs
374 CALL daxpy( n, one, work( 1, i ), 1, x( 1, i ), 1 )
375 END DO
376*
377* Compute R = B - AX (R is WORK).
378*
379 CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )
380*
381 CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n, negone,
382 $ a, lda, x, ldx, one, work, n )
383*
384* Check whether the NRHS normwise backward errors satisfy the
385* stopping criterion. If yes, set ITER=IITER>0 and return.
386*
387 DO i = 1, nrhs
388 xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )
389 rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )
390 IF( rnrm.GT.xnrm*cte )
391 $ GO TO 20
392 END DO
393*
394* If we are here, the NRHS normwise backward errors satisfy the
395* stopping criterion, we are good to exit.
396*
397 iter = iiter
398*
399 RETURN
400*
401 20 CONTINUE
402*
403 30 CONTINUE
404*
405* If we are at this place of the code, this is because we have
406* performed ITER=ITERMAX iterations and never satisfied the
407* stopping criterion, set up the ITER flag accordingly and follow up
408* on double precision routine.
409*
410 iter = -itermax - 1
411*
412 40 CONTINUE
413*
414* Single-precision iterative refinement failed to converge to a
415* satisfactory solution, so we resort to double precision.
416*
417 CALL dgetrf( n, n, a, lda, ipiv, info )
418*
419 IF( info.NE.0 )
420 $ RETURN
421*
422 CALL dlacpy( 'All', n, nrhs, b, ldb, x, ldx )
423 CALL dgetrs( 'No transpose', n, nrhs, a, lda, ipiv, x, ldx,
424 $ info )
425*
426 RETURN
427*
428* End of DSGESV
429*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine dlag2s(m, n, a, lda, sa, ldsa, info)
DLAG2S converts a double precision matrix to a single precision matrix.
Definition dlag2s.f:108
subroutine slag2d(m, n, sa, ldsa, a, lda, info)
SLAG2D converts a single precision matrix to a double precision matrix.
Definition slag2d.f:104
subroutine daxpy(n, da, dx, incx, dy, incy)
DAXPY
Definition daxpy.f:89
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188
subroutine sgetrf(m, n, a, lda, ipiv, info)
SGETRF
Definition sgetrf.f:108
subroutine dgetrf(m, n, a, lda, ipiv, info)
DGETRF
Definition dgetrf.f:108
subroutine sgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info)
SGETRS
Definition sgetrs.f:121
subroutine dgetrs(trans, n, nrhs, a, lda, ipiv, b, ldb, info)
DGETRS
Definition dgetrs.f:121
integer function idamax(n, dx, incx)
IDAMAX
Definition idamax.f:71
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103
double precision function dlamch(cmach)
DLAMCH
Definition dlamch.f:69
double precision function dlange(norm, m, n, a, lda, work)
DLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition dlange.f:114
Here is the call graph for this function:
Here is the caller graph for this function: