001:       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
002:      +                   SWORK, ITER, INFO )
003: *
004: *  -- LAPACK PROTOTYPE driver routine (version 3.2) --
005: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
006: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
007: *     February 2007
008: *
009: *     ..
010: *     .. Scalar Arguments ..
011:       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
012: *     ..
013: *     .. Array Arguments ..
014:       INTEGER            IPIV( * )
015:       REAL               SWORK( * )
016:       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
017:      +                   X( LDX, * )
018: *     ..
019: *
020: *  Purpose
021: *  =======
022: *
023: *  DSGESV computes the solution to a real system of linear equations
024: *     A * X = B,
025: *  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
026: *
027: *  DSGESV first attempts to factorize the matrix in SINGLE PRECISION
028: *  and use this factorization within an iterative refinement procedure
029: *  to produce a solution with DOUBLE PRECISION normwise backward error
030: *  quality (see below). If the approach fails the method switches to a
031: *  DOUBLE PRECISION factorization and solve.
032: *
033: *  The iterative refinement is not going to be a winning strategy if
034: *  the ratio SINGLE PRECISION performance over DOUBLE PRECISION
035: *  performance is too small. A reasonable strategy should take the
036: *  number of right-hand sides and the size of the matrix into account.
037: *  This might be done with a call to ILAENV in the future. Up to now, we
038: *  always try iterative refinement.
039: *
040: *  The iterative refinement process is stopped if
041: *      ITER > ITERMAX
042: *  or for all the RHS we have:
043: *      RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
044: *  where
045: *      o ITER is the number of the current iteration in the iterative
046: *        refinement process
047: *      o RNRM is the infinity-norm of the residual
048: *      o XNRM is the infinity-norm of the solution
049: *      o ANRM is the infinity-operator-norm of the matrix A
050: *      o EPS is the machine epsilon returned by DLAMCH('Epsilon')
051: *  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
052: *  respectively.
053: *
054: *  Arguments
055: *  =========
056: *
057: *  N       (input) INTEGER
058: *          The number of linear equations, i.e., the order of the
059: *          matrix A.  N >= 0.
060: *
061: *  NRHS    (input) INTEGER
062: *          The number of right hand sides, i.e., the number of columns
063: *          of the matrix B.  NRHS >= 0.
064: *
065: *  A       (input or input/ouptut) DOUBLE PRECISION array,
066: *          dimension (LDA,N)
067: *          On entry, the N-by-N coefficient matrix A.
068: *          On exit, if iterative refinement has been successfully used
069: *          (INFO.EQ.0 and ITER.GE.0, see description below), then A is
070: *          unchanged, if double precision factorization has been used
071: *          (INFO.EQ.0 and ITER.LT.0, see description below), then the
072: *          array A contains the factors L and U from the factorization
073: *          A = P*L*U; the unit diagonal elements of L are not stored.
074: *
075: *  LDA     (input) INTEGER
076: *          The leading dimension of the array A.  LDA >= max(1,N).
077: *
078: *  IPIV    (output) INTEGER array, dimension (N)
079: *          The pivot indices that define the permutation matrix P;
080: *          row i of the matrix was interchanged with row IPIV(i).
081: *          Corresponds either to the single precision factorization
082: *          (if INFO.EQ.0 and ITER.GE.0) or the double precision
083: *          factorization (if INFO.EQ.0 and ITER.LT.0).
084: *
085: *  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
086: *          The N-by-NRHS right hand side matrix B.
087: *
088: *  LDB     (input) INTEGER
089: *          The leading dimension of the array B.  LDB >= max(1,N).
090: *
091: *  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
092: *          If INFO = 0, the N-by-NRHS solution matrix X.
093: *
094: *  LDX     (input) INTEGER
095: *          The leading dimension of the array X.  LDX >= max(1,N).
096: *
097: *  WORK    (workspace) DOUBLE PRECISION array, dimension (N*NRHS)
098: *          This array is used to hold the residual vectors.
099: *
100: *  SWORK   (workspace) REAL array, dimension (N*(N+NRHS))
101: *          This array is used to use the single precision matrix and the
102: *          right-hand sides or solutions in single precision.
103: *
104: *  ITER    (output) INTEGER
105: *          < 0: iterative refinement has failed, double precision
106: *               factorization has been performed
107: *               -1 : the routine fell back to full precision for
108: *                    implementation- or machine-specific reasons
109: *               -2 : narrowing the precision induced an overflow,
110: *                    the routine fell back to full precision
111: *               -3 : failure of SGETRF
112: *               -31: stop the iterative refinement after the 30th
113: *                    iterations
114: *          > 0: iterative refinement has been sucessfully used.
115: *               Returns the number of iterations
116: *
117: *  INFO    (output) INTEGER
118: *          = 0:  successful exit
119: *          < 0:  if INFO = -i, the i-th argument had an illegal value
120: *          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
121: *                exactly zero.  The factorization has been completed,
122: *                but the factor U is exactly singular, so the solution
123: *                could not be computed.
124: *
125: *  =========
126: *
127: *     .. Parameters ..
128:       LOGICAL            DOITREF
129:       PARAMETER          ( DOITREF = .TRUE. )
130: *
131:       INTEGER            ITERMAX
132:       PARAMETER          ( ITERMAX = 30 )
133: *
134:       DOUBLE PRECISION   BWDMAX
135:       PARAMETER          ( BWDMAX = 1.0E+00 )
136: *
137:       DOUBLE PRECISION   NEGONE, ONE
138:       PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
139: *
140: *     .. Local Scalars ..
141:       INTEGER            I, IITER, PTSA, PTSX
142:       DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM
143: *
144: *     .. External Subroutines ..
145:       EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, SLAG2D, SGETRF,
146:      +                   SGETRS, XERBLA
147: *     ..
148: *     .. External Functions ..
149:       INTEGER            IDAMAX
150:       DOUBLE PRECISION   DLAMCH, DLANGE
151:       EXTERNAL           IDAMAX, DLAMCH, DLANGE
152: *     ..
153: *     .. Intrinsic Functions ..
154:       INTRINSIC          ABS, DBLE, MAX, SQRT
155: *     ..
156: *     .. Executable Statements ..
157: *
158:       INFO = 0
159:       ITER = 0
160: *
161: *     Test the input parameters.
162: *
163:       IF( N.LT.0 ) THEN
164:          INFO = -1
165:       ELSE IF( NRHS.LT.0 ) THEN
166:          INFO = -2
167:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
168:          INFO = -4
169:       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
170:          INFO = -7
171:       ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
172:          INFO = -9
173:       END IF
174:       IF( INFO.NE.0 ) THEN
175:          CALL XERBLA( 'DSGESV', -INFO )
176:          RETURN
177:       END IF
178: *
179: *     Quick return if (N.EQ.0).
180: *
181:       IF( N.EQ.0 )
182:      +   RETURN
183: *
184: *     Skip single precision iterative refinement if a priori slower
185: *     than double precision factorization.
186: *
187:       IF( .NOT.DOITREF ) THEN
188:          ITER = -1
189:          GO TO 40
190:       END IF
191: *
192: *     Compute some constants.
193: *
194:       ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
195:       EPS = DLAMCH( 'Epsilon' )
196:       CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX
197: *
198: *     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
199: *
200:       PTSA = 1
201:       PTSX = PTSA + N*N
202: *
203: *     Convert B from double precision to single precision and store the
204: *     result in SX.
205: *
206:       CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
207: *
208:       IF( INFO.NE.0 ) THEN
209:          ITER = -2
210:          GO TO 40
211:       END IF
212: *
213: *     Convert A from double precision to single precision and store the
214: *     result in SA.
215: *
216:       CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
217: *
218:       IF( INFO.NE.0 ) THEN
219:          ITER = -2
220:          GO TO 40
221:       END IF
222: *
223: *     Compute the LU factorization of SA.
224: *
225:       CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
226: *
227:       IF( INFO.NE.0 ) THEN
228:          ITER = -3
229:          GO TO 40
230:       END IF
231: *
232: *     Solve the system SA*SX = SB.
233: *
234:       CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
235:      +             SWORK( PTSX ), N, INFO )
236: *
237: *     Convert SX back to double precision
238: *
239:       CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
240: *
241: *     Compute R = B - AX (R is WORK).
242: *
243:       CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
244: *
245:       CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,
246:      +            LDA, X, LDX, ONE, WORK, N )
247: *
248: *     Check whether the NRHS normwise backward errors satisfy the
249: *     stopping criterion. If yes, set ITER=0 and return.
250: *
251:       DO I = 1, NRHS
252:          XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
253:          RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
254:          IF( RNRM.GT.XNRM*CTE )
255:      +      GO TO 10
256:       END DO
257: *
258: *     If we are here, the NRHS normwise backward errors satisfy the
259: *     stopping criterion. We are good to exit.
260: *
261:       ITER = 0
262:       RETURN
263: *
264:    10 CONTINUE
265: *
266:       DO 30 IITER = 1, ITERMAX
267: *
268: *        Convert R (in WORK) from double precision to single precision
269: *        and store the result in SX.
270: *
271:          CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
272: *
273:          IF( INFO.NE.0 ) THEN
274:             ITER = -2
275:             GO TO 40
276:          END IF
277: *
278: *        Solve the system SA*SX = SR.
279: *
280:          CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
281:      +                SWORK( PTSX ), N, INFO )
282: *
283: *        Convert SX back to double precision and update the current
284: *        iterate.
285: *
286:          CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
287: *
288:          DO I = 1, NRHS
289:             CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
290:          END DO
291: *
292: *        Compute R = B - AX (R is WORK).
293: *
294:          CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
295: *
296:          CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,
297:      +               A, LDA, X, LDX, ONE, WORK, N )
298: *
299: *        Check whether the NRHS normwise backward errors satisfy the
300: *        stopping criterion. If yes, set ITER=IITER>0 and return.
301: *
302:          DO I = 1, NRHS
303:             XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
304:             RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
305:             IF( RNRM.GT.XNRM*CTE )
306:      +         GO TO 20
307:          END DO
308: *
309: *        If we are here, the NRHS normwise backward errors satisfy the
310: *        stopping criterion, we are good to exit.
311: *
312:          ITER = IITER
313: *
314:          RETURN
315: *
316:    20    CONTINUE
317: *
318:    30 CONTINUE
319: *
320: *     If we are at this place of the code, this is because we have
321: *     performed ITER=ITERMAX iterations and never satisified the
322: *     stopping criterion, set up the ITER flag accordingly and follow up
323: *     on double precision routine.
324: *
325:       ITER = -ITERMAX - 1
326: *
327:    40 CONTINUE
328: *
329: *     Single-precision iterative refinement failed to converge to a
330: *     satisfactory solution, so we resort to double precision.
331: *
332:       CALL DGETRF( N, N, A, LDA, IPIV, INFO )
333: *
334:       IF( INFO.NE.0 )
335:      +   RETURN
336: *
337:       CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
338:       CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
339:      +             INFO )
340: *
341:       RETURN
342: *
343: *     End of DSGESV.
344: *
345:       END
346: