001:       SUBROUTINE DLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A,
002:      $                                LDA, AF, LDAF, IPIV, COLEQU, C, B,
003:      $                                LDB, Y, LDY, BERR_OUT, N_NORMS,
004:      $                                ERRS_N, ERRS_C, RES, AYB, DY,
005:      $                                Y_TAIL, RCOND, ITHRESH, RTHRESH,
006:      $                                DZ_UB, IGNORE_CWISE, INFO )
007: *
008: *     -- LAPACK routine (version 3.2)                                 --
009: *     -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and --
010: *     -- Jason Riedy of Univ. of California Berkeley.                 --
011: *     -- November 2008                                                --
012: *
013: *     -- LAPACK is a software package provided by Univ. of Tennessee, --
014: *     -- Univ. of California Berkeley and NAG Ltd.                    --
015: *
016:       IMPLICIT NONE
017: *     ..
018: *     .. Scalar Arguments ..
019:       INTEGER            INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE,
020:      $                   TRANS_TYPE, N_NORMS, ITHRESH
021:       LOGICAL            COLEQU, IGNORE_CWISE
022:       DOUBLE PRECISION   RTHRESH, DZ_UB
023: *     ..
024: *     .. Array Arguments ..
025:       INTEGER            IPIV( * )
026:       DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
027:      $                   Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * )
028:       DOUBLE PRECISION   C( * ), AYB( * ), RCOND, BERR_OUT( * ),
029:      $                   ERRS_N( NRHS, * ), ERRS_C( NRHS, * )
030: *     ..
031: *     .. Local Scalars ..
032:       CHARACTER          TRANS
033:       INTEGER            CNT, I, J, X_STATE, Z_STATE, Y_PREC_STATE
034:       DOUBLE PRECISION   YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT,
035:      $                   DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX,
036:      $                   DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z,
037:      $                   EPS, HUGEVAL, INCR_THRESH
038:       LOGICAL            INCR_PREC
039: *     ..
040: *     .. Parameters ..
041:       INTEGER            UNSTABLE_STATE, WORKING_STATE, CONV_STATE,
042:      $                   NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL,
043:      $                   EXTRA_Y
044:       PARAMETER          ( UNSTABLE_STATE = 0, WORKING_STATE = 1,
045:      $                   CONV_STATE = 2, NOPROG_STATE = 3 )
046:       PARAMETER          ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1,
047:      $                   EXTRA_Y = 2 )
048:       INTEGER            FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I
049:       INTEGER            RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I
050:       INTEGER            CMP_ERR_I, PIV_GROWTH_I
051:       PARAMETER          ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2,
052:      $                   BERR_I = 3 )
053:       PARAMETER          ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 )
054:       PARAMETER          ( CMP_RCOND_I = 7, CMP_ERR_I = 8,
055:      $                   PIV_GROWTH_I = 9 )
056:       INTEGER            LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I,
057:      $                   LA_LINRX_CWISE_I
058:       PARAMETER          ( LA_LINRX_ITREF_I = 1,
059:      $                   LA_LINRX_ITHRESH_I = 2 )
060:       PARAMETER          ( LA_LINRX_CWISE_I = 3 )
061:       INTEGER            LA_LINRX_TRUST_I, LA_LINRX_ERR_I,
062:      $                   LA_LINRX_RCOND_I
063:       PARAMETER          ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 )
064:       PARAMETER          ( LA_LINRX_RCOND_I = 3 )
065:       INTEGER            LA_LINRX_MAX_N_ERRS
066:       PARAMETER          ( LA_LINRX_MAX_N_ERRS = 3 )
067: *     ..
068: *     .. External Subroutines ..
069:       EXTERNAL           DAXPY, DCOPY, DGETRS, DGEMV, BLAS_DGEMV_X,
070:      $                   BLAS_DGEMV2_X, DLA_GEAMV, DLA_WWADDW, DLAMCH,
071:      $                   CHLA_TRANSTYPE, DLA_LIN_BERR
072:       DOUBLE PRECISION   DLAMCH
073:       CHARACTER          CHLA_TRANSTYPE
074: *     ..
075: *     .. Intrinsic Functions ..
076:       INTRINSIC          ABS, MAX, MIN
077: *     ..
078: *     .. Executable Statements ..
079: *
080:       IF ( INFO.NE.0 ) RETURN
081:       TRANS = CHLA_TRANSTYPE(TRANS_TYPE)
082:       EPS = DLAMCH( 'Epsilon' )
083:       HUGEVAL = DLAMCH( 'Overflow' )
084: *     Force HUGEVAL to Inf
085:       HUGEVAL = HUGEVAL * HUGEVAL
086: *     Using HUGEVAL may lead to spurious underflows.
087:       INCR_THRESH = DBLE( N ) * EPS
088: *
089:       DO J = 1, NRHS
090:          Y_PREC_STATE = EXTRA_RESIDUAL
091:          IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN
092:             DO I = 1, N
093:                Y_TAIL( I ) = 0.0D+0
094:             END DO
095:          END IF
096: 
097:          DXRAT = 0.0D+0
098:          DXRATMAX = 0.0D+0
099:          DZRAT = 0.0D+0
100:          DZRATMAX = 0.0D+0
101:          FINAL_DX_X = HUGEVAL
102:          FINAL_DZ_Z = HUGEVAL
103:          PREVNORMDX = HUGEVAL
104:          PREV_DZ_Z = HUGEVAL
105:          DZ_Z = HUGEVAL
106:          DX_X = HUGEVAL
107: 
108:          X_STATE = WORKING_STATE
109:          Z_STATE = UNSTABLE_STATE
110:          INCR_PREC = .FALSE.
111: 
112:          DO CNT = 1, ITHRESH
113: *
114: *         Compute residual RES = B_s - op(A_s) * Y,
115: *             op(A) = A, A**T, or A**H depending on TRANS (and type).
116: *
117:             CALL DCOPY( N, B( 1, J ), 1, RES, 1 )
118:             IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN
119:                CALL DGEMV( TRANS, N, N, -1.0D+0, A, LDA, Y( 1, J ), 1,
120:      $              1.0D+0, RES, 1 )
121:             ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN
122:                CALL BLAS_DGEMV_X( TRANS_TYPE, N, N, -1.0D+0, A, LDA,
123:      $              Y( 1, J ), 1, 1.0D+0, RES, 1, PREC_TYPE )
124:             ELSE
125:                CALL BLAS_DGEMV2_X( TRANS_TYPE, N, N, -1.0D+0, A, LDA,
126:      $              Y( 1, J ), Y_TAIL, 1, 1.0D+0, RES, 1, PREC_TYPE )
127:             END IF
128: 
129: !        XXX: RES is no longer needed.
130:             CALL DCOPY( N, RES, 1, DY, 1 )
131:             CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, DY, N, INFO )
132: *
133: *         Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT.
134: *
135:             NORMX = 0.0D+0
136:             NORMY = 0.0D+0
137:             NORMDX = 0.0D+0
138:             DZ_Z = 0.0D+0
139:             YMIN = HUGEVAL
140: *
141:             DO I = 1, N
142:                YK = ABS( Y( I, J ) )
143:                DYK = ABS( DY( I ) )
144: 
145:                IF ( YK .NE. 0.0D+0 ) THEN
146:                   DZ_Z = MAX( DZ_Z, DYK / YK )
147:                ELSE IF ( DYK .NE. 0.0D+0 ) THEN
148:                   DZ_Z = HUGEVAL
149:                END IF
150: 
151:                YMIN = MIN( YMIN, YK )
152: 
153:                NORMY = MAX( NORMY, YK )
154: 
155:                IF ( COLEQU ) THEN
156:                   NORMX = MAX( NORMX, YK * C( I ) )
157:                   NORMDX = MAX( NORMDX, DYK * C( I ) )
158:                ELSE
159:                   NORMX = NORMY
160:                   NORMDX = MAX( NORMDX, DYK )
161:                END IF
162:             END DO
163: 
164:             IF ( NORMX .NE. 0.0D+0 ) THEN
165:                DX_X = NORMDX / NORMX
166:             ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN
167:                DX_X = 0.0D+0
168:             ELSE
169:                DX_X = HUGEVAL
170:             END IF
171: 
172:             DXRAT = NORMDX / PREVNORMDX
173:             DZRAT = DZ_Z / PREV_DZ_Z
174: *
175: *         Check termination criteria
176: *
177:             IF (.NOT.IGNORE_CWISE
178:      $           .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY
179:      $           .AND. Y_PREC_STATE .LT. EXTRA_Y)
180:      $           INCR_PREC = .TRUE.
181: 
182:             IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH )
183:      $           X_STATE = WORKING_STATE
184:             IF ( X_STATE .EQ. WORKING_STATE ) THEN
185:                IF ( DX_X .LE. EPS ) THEN
186:                   X_STATE = CONV_STATE
187:                ELSE IF ( DXRAT .GT. RTHRESH ) THEN
188:                   IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN
189:                      INCR_PREC = .TRUE.
190:                   ELSE
191:                      X_STATE = NOPROG_STATE
192:                   END IF
193:                ELSE
194:                   IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT
195:                END IF
196:                IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X
197:             END IF
198: 
199:             IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB )
200:      $           Z_STATE = WORKING_STATE
201:             IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH )
202:      $           Z_STATE = WORKING_STATE
203:             IF ( Z_STATE .EQ. WORKING_STATE ) THEN
204:                IF ( DZ_Z .LE. EPS ) THEN
205:                   Z_STATE = CONV_STATE
206:                ELSE IF ( DZ_Z .GT. DZ_UB ) THEN
207:                   Z_STATE = UNSTABLE_STATE
208:                   DZRATMAX = 0.0D+0
209:                   FINAL_DZ_Z = HUGEVAL
210:                ELSE IF ( DZRAT .GT. RTHRESH ) THEN
211:                   IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN
212:                      INCR_PREC = .TRUE.
213:                   ELSE
214:                      Z_STATE = NOPROG_STATE
215:                   END IF
216:                ELSE
217:                   IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT
218:                END IF
219:                IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z
220:             END IF
221: *
222: *           Exit if both normwise and componentwise stopped working,
223: *           but if componentwise is unstable, let it go at least two
224: *           iterations.
225: *
226:             IF ( X_STATE.NE.WORKING_STATE ) THEN
227:                IF ( IGNORE_CWISE) GOTO 666
228:                IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE )
229:      $              GOTO 666
230:                IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666
231:             END IF
232: 
233:             IF ( INCR_PREC ) THEN
234:                INCR_PREC = .FALSE.
235:                Y_PREC_STATE = Y_PREC_STATE + 1
236:                DO I = 1, N
237:                   Y_TAIL( I ) = 0.0D+0
238:                END DO
239:             END IF
240: 
241:             PREVNORMDX = NORMDX
242:             PREV_DZ_Z = DZ_Z
243: *
244: *           Update soluton.
245: *
246:             IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN
247:                CALL DAXPY( N, 1.0D+0, DY, 1, Y( 1, J ), 1 )
248:             ELSE
249:                CALL DLA_WWADDW( N, Y( 1, J ), Y_TAIL, DY )
250:             END IF
251: 
252:          END DO
253: *        Target of "IF (Z_STOP .AND. X_STOP)".  Sun's f77 won't EXIT.
254:  666     CONTINUE
255: *
256: *     Set final_* when cnt hits ithresh.
257: *
258:          IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X
259:          IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z
260: *
261: *     Compute error bounds
262: *
263:          IF (N_NORMS .GE. 1) THEN
264:             ERRS_N( J, LA_LINRX_ERR_I ) = FINAL_DX_X / (1 - DXRATMAX)
265:          END IF
266:          IF ( N_NORMS .GE. 2 ) THEN
267:             ERRS_C( J, LA_LINRX_ERR_I ) = FINAL_DZ_Z / (1 - DZRATMAX)
268:          END IF
269: *
270: *     Compute componentwise relative backward error from formula
271: *         max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) )
272: *     where abs(Z) is the componentwise absolute value of the matrix
273: *     or vector Z.
274: *
275: *         Compute residual RES = B_s - op(A_s) * Y,
276: *             op(A) = A, A**T, or A**H depending on TRANS (and type).
277: *
278:          CALL DCOPY( N, B( 1, J ), 1, RES, 1 )
279:          CALL DGEMV( TRANS, N, N, -1.0D+0, A, LDA, Y(1,J), 1, 1.0D+0, 
280:      $     RES, 1 )
281: 
282:          DO I = 1, N
283:             AYB( I ) = ABS( B( I, J ) )
284:          END DO
285: *
286: *     Compute abs(op(A_s))*abs(Y) + abs(B_s).
287: *
288:          CALL DLA_GEAMV ( TRANS_TYPE, N, N, 1.0D+0,
289:      $        A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1 )
290: 
291:          CALL DLA_LIN_BERR ( N, N, 1, RES, AYB, BERR_OUT( J ) )
292: *
293: *     End of loop for each RHS.
294: *
295:       END DO
296: *
297:       RETURN
298:       END
299: