001:       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
002:      +                   SWORK, ITER, INFO )
003: *
004: *  -- LAPACK PROTOTYPE driver routine (version 3.2) --
005: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
006: *     February 2007
007: *
008: *     ..
009: *     .. Scalar Arguments ..
010:       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
011: *     ..
012: *     .. Array Arguments ..
013:       INTEGER            IPIV( * )
014:       REAL               SWORK( * )
015:       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
016:      +                   X( LDX, * )
017: *     ..
018: *
019: *  Purpose
020: *  =======
021: *
022: *  DSGESV computes the solution to a real system of linear equations
023: *     A * X = B,
024: *  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
025: *
026: *  DSGESV first attempts to factorize the matrix in SINGLE PRECISION
027: *  and use this factorization within an iterative refinement procedure
028: *  to produce a solution with DOUBLE PRECISION normwise backward error
029: *  quality (see below). If the approach fails the method switches to a
030: *  DOUBLE PRECISION factorization and solve.
031: *
032: *  The iterative refinement is not going to be a winning strategy if
033: *  the ratio SINGLE PRECISION performance over DOUBLE PRECISION
034: *  performance is too small. A reasonable strategy should take the
035: *  number of right-hand sides and the size of the matrix into account.
036: *  This might be done with a call to ILAENV in the future. Up to now, we
037: *  always try iterative refinement.
038: *
039: *  The iterative refinement process is stopped if
040: *      ITER > ITERMAX
041: *  or for all the RHS we have:
042: *      RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
043: *  where
044: *      o ITER is the number of the current iteration in the iterative
045: *        refinement process
046: *      o RNRM is the infinity-norm of the residual
047: *      o XNRM is the infinity-norm of the solution
048: *      o ANRM is the infinity-operator-norm of the matrix A
049: *      o EPS is the machine epsilon returned by DLAMCH('Epsilon')
050: *  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
051: *  respectively.
052: *
053: *  Arguments
054: *  =========
055: *
056: *  N       (input) INTEGER
057: *          The number of linear equations, i.e., the order of the
058: *          matrix A.  N >= 0.
059: *
060: *  NRHS    (input) INTEGER
061: *          The number of right hand sides, i.e., the number of columns
062: *          of the matrix B.  NRHS >= 0.
063: *
064: *  A       (input or input/ouptut) DOUBLE PRECISION array,
065: *          dimension (LDA,N)
066: *          On entry, the N-by-N coefficient matrix A.
067: *          On exit, if iterative refinement has been successfully used
068: *          (INFO.EQ.0 and ITER.GE.0, see description below), then A is
069: *          unchanged, if double precision factorization has been used
070: *          (INFO.EQ.0 and ITER.LT.0, see description below), then the
071: *          array A contains the factors L and U from the factorization
072: *          A = P*L*U; the unit diagonal elements of L are not stored.
073: *
074: *  LDA     (input) INTEGER
075: *          The leading dimension of the array A.  LDA >= max(1,N).
076: *
077: *  IPIV    (output) INTEGER array, dimension (N)
078: *          The pivot indices that define the permutation matrix P;
079: *          row i of the matrix was interchanged with row IPIV(i).
080: *          Corresponds either to the single precision factorization
081: *          (if INFO.EQ.0 and ITER.GE.0) or the double precision
082: *          factorization (if INFO.EQ.0 and ITER.LT.0).
083: *
084: *  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
085: *          The N-by-NRHS right hand side matrix B.
086: *
087: *  LDB     (input) INTEGER
088: *          The leading dimension of the array B.  LDB >= max(1,N).
089: *
090: *  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
091: *          If INFO = 0, the N-by-NRHS solution matrix X.
092: *
093: *  LDX     (input) INTEGER
094: *          The leading dimension of the array X.  LDX >= max(1,N).
095: *
096: *  WORK    (workspace) DOUBLE PRECISION array, dimension (N*NRHS)
097: *          This array is used to hold the residual vectors.
098: *
099: *  SWORK   (workspace) REAL array, dimension (N*(N+NRHS))
100: *          This array is used to use the single precision matrix and the
101: *          right-hand sides or solutions in single precision.
102: *
103: *  ITER    (output) INTEGER
104: *          < 0: iterative refinement has failed, double precision
105: *               factorization has been performed
106: *               -1 : the routine fell back to full precision for
107: *                    implementation- or machine-specific reasons
108: *               -2 : narrowing the precision induced an overflow,
109: *                    the routine fell back to full precision
110: *               -3 : failure of SGETRF
111: *               -31: stop the iterative refinement after the 30th
112: *                    iterations
113: *          > 0: iterative refinement has been sucessfully used.
114: *               Returns the number of iterations
115: *
116: *  INFO    (output) INTEGER
117: *          = 0:  successful exit
118: *          < 0:  if INFO = -i, the i-th argument had an illegal value
119: *          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
120: *                exactly zero.  The factorization has been completed,
121: *                but the factor U is exactly singular, so the solution
122: *                could not be computed.
123: *
124: *  =========
125: *
126: *     .. Parameters ..
127:       LOGICAL            DOITREF
128:       PARAMETER          ( DOITREF = .TRUE. )
129: *
130:       INTEGER            ITERMAX
131:       PARAMETER          ( ITERMAX = 30 )
132: *
133:       DOUBLE PRECISION   BWDMAX
134:       PARAMETER          ( BWDMAX = 1.0E+00 )
135: *
136:       DOUBLE PRECISION   NEGONE, ONE
137:       PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
138: *
139: *     .. Local Scalars ..
140:       INTEGER            I, IITER, PTSA, PTSX
141:       DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM
142: *
143: *     .. External Subroutines ..
144:       EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, SLAG2D, SGETRF,
145:      +                   SGETRS, XERBLA
146: *     ..
147: *     .. External Functions ..
148:       INTEGER            IDAMAX
149:       DOUBLE PRECISION   DLAMCH, DLANGE
150:       EXTERNAL           IDAMAX, DLAMCH, DLANGE
151: *     ..
152: *     .. Intrinsic Functions ..
153:       INTRINSIC          ABS, DBLE, MAX, SQRT
154: *     ..
155: *     .. Executable Statements ..
156: *
157:       INFO = 0
158:       ITER = 0
159: *
160: *     Test the input parameters.
161: *
162:       IF( N.LT.0 ) THEN
163:          INFO = -1
164:       ELSE IF( NRHS.LT.0 ) THEN
165:          INFO = -2
166:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
167:          INFO = -4
168:       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
169:          INFO = -7
170:       ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
171:          INFO = -9
172:       END IF
173:       IF( INFO.NE.0 ) THEN
174:          CALL XERBLA( 'DSGESV', -INFO )
175:          RETURN
176:       END IF
177: *
178: *     Quick return if (N.EQ.0).
179: *
180:       IF( N.EQ.0 )
181:      +   RETURN
182: *
183: *     Skip single precision iterative refinement if a priori slower
184: *     than double precision factorization.
185: *
186:       IF( .NOT.DOITREF ) THEN
187:          ITER = -1
188:          GO TO 40
189:       END IF
190: *
191: *     Compute some constants.
192: *
193:       ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
194:       EPS = DLAMCH( 'Epsilon' )
195:       CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX
196: *
197: *     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
198: *
199:       PTSA = 1
200:       PTSX = PTSA + N*N
201: *
202: *     Convert B from double precision to single precision and store the
203: *     result in SX.
204: *
205:       CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
206: *
207:       IF( INFO.NE.0 ) THEN
208:          ITER = -2
209:          GO TO 40
210:       END IF
211: *
212: *     Convert A from double precision to single precision and store the
213: *     result in SA.
214: *
215:       CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
216: *
217:       IF( INFO.NE.0 ) THEN
218:          ITER = -2
219:          GO TO 40
220:       END IF
221: *
222: *     Compute the LU factorization of SA.
223: *
224:       CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
225: *
226:       IF( INFO.NE.0 ) THEN
227:          ITER = -3
228:          GO TO 40
229:       END IF
230: *
231: *     Solve the system SA*SX = SB.
232: *
233:       CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
234:      +             SWORK( PTSX ), N, INFO )
235: *
236: *     Convert SX back to double precision
237: *
238:       CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
239: *
240: *     Compute R = B - AX (R is WORK).
241: *
242:       CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
243: *
244:       CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,
245:      +            LDA, X, LDX, ONE, WORK, N )
246: *
247: *     Check whether the NRHS normwise backward errors satisfy the
248: *     stopping criterion. If yes, set ITER=0 and return.
249: *
250:       DO I = 1, NRHS
251:          XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
252:          RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
253:          IF( RNRM.GT.XNRM*CTE )
254:      +      GO TO 10
255:       END DO
256: *
257: *     If we are here, the NRHS normwise backward errors satisfy the
258: *     stopping criterion. We are good to exit.
259: *
260:       ITER = 0
261:       RETURN
262: *
263:    10 CONTINUE
264: *
265:       DO 30 IITER = 1, ITERMAX
266: *
267: *        Convert R (in WORK) from double precision to single precision
268: *        and store the result in SX.
269: *
270:          CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
271: *
272:          IF( INFO.NE.0 ) THEN
273:             ITER = -2
274:             GO TO 40
275:          END IF
276: *
277: *        Solve the system SA*SX = SR.
278: *
279:          CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
280:      +                SWORK( PTSX ), N, INFO )
281: *
282: *        Convert SX back to double precision and update the current
283: *        iterate.
284: *
285:          CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
286: *
287:          DO I = 1, NRHS
288:             CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
289:          END DO
290: *
291: *        Compute R = B - AX (R is WORK).
292: *
293:          CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
294: *
295:          CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,
296:      +               A, LDA, X, LDX, ONE, WORK, N )
297: *
298: *        Check whether the NRHS normwise backward errors satisfy the
299: *        stopping criterion. If yes, set ITER=IITER>0 and return.
300: *
301:          DO I = 1, NRHS
302:             XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
303:             RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
304:             IF( RNRM.GT.XNRM*CTE )
305:      +         GO TO 20
306:          END DO
307: *
308: *        If we are here, the NRHS normwise backward errors satisfy the
309: *        stopping criterion, we are good to exit.
310: *
311:          ITER = IITER
312: *
313:          RETURN
314: *
315:    20    CONTINUE
316: *
317:    30 CONTINUE
318: *
319: *     If we are at this place of the code, this is because we have
320: *     performed ITER=ITERMAX iterations and never satisified the
321: *     stopping criterion, set up the ITER flag accordingly and follow up
322: *     on double precision routine.
323: *
324:       ITER = -ITERMAX - 1
325: *
326:    40 CONTINUE
327: *
328: *     Single-precision iterative refinement failed to converge to a
329: *     satisfactory solution, so we resort to double precision.
330: *
331:       CALL DGETRF( N, N, A, LDA, IPIV, INFO )
332: *
333:       IF( INFO.NE.0 )
334:      +   RETURN
335: *
336:       CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
337:       CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
338:      +             INFO )
339: *
340:       RETURN
341: *
342: *     End of DSGESV.
343: *
344:       END
345: