#include "blaswrap.h" #include "f2c.h" /* Subroutine */ int zcgesv_(integer *n, integer *nrhs, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *work, complex *swork, integer *iter, integer *info) { /* -- LAPACK PROTOTYPE driver routine (version 3.1.1) -- Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. January 2007 This is an LAPACK PROTOTYPE routine which means that the interface of this routine is likely to be changed in the future based on community feedback. Purpose ======= ZCGESV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. ZCGESV first attempts to factorize the matrix in SINGLE COMPLEX PRECISION and use this factorization within an iterative refinement procedure to produce a solution with DOUBLE COMPLEX PRECISION normwise backward error quality (see below). If the approach fails the method switches to a DOUBLE COMPLEX PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio SINGLE PRECISION performance over DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments ========= N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input or input/ouptut) COMPLEX*16 array, dimension (LDA,N) On entry, the N-by-N coefficient matrix A. On exit, if iterative refinement has been successfully used (INFO.EQ.0 and ITER.GE.0, see description below), then A is unchanged, if double precision factorization has been used (INFO.EQ.0 and ITER.LT.0, see description below), then the array A contains the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (N) The pivot indices that define the permutation matrix P; row i of the matrix was interchanged with row IPIV(i). Corresponds either to the single precision factorization (if INFO.EQ.0 and ITER.GE.0) or the double precision factorization (if INFO.EQ.0 and ITER.LT.0). B (input) COMPLEX*16 array, dimension (LDB,NRHS) The N-by-NRHS matrix of right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (output) COMPLEX*16 array, dimension (LDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). WORK (workspace) COMPLEX*16 array, dimension (N*NRHS) This array is used to hold the residual vectors. SWORK (workspace) COMPLEX array, dimension (N*(N+NRHS)) This array is used to use the single precision matrix and the right-hand sides or solutions in single precision. ITER (output) INTEGER < 0: iterative refinement has failed, double precision factorization has been performed -1 : taking into account machine parameters, N, NRHS, it is a priori not worth working in SINGLE PRECISION -2 : overflow of an entry when moving from double to SINGLE PRECISION -3 : failure of SGETRF -31: stop the iterative refinement after the 30th iterations > 0: iterative refinement has been sucessfully used. Returns the number of iterations INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, U(i,i) computed in DOUBLE PRECISION is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed. ========= Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {-1.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, work_dim1, work_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_imag(doublecomplex *); /* Local variables */ static integer i__, ok; static doublereal cte, eps, anrm; static integer ptsa; static doublereal rnrm, xnrm; static integer ptsx, iiter; extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), clag2z_( integer *, integer *, complex *, integer *, doublecomplex *, integer *, integer *), zlag2c_(integer *, integer *, doublecomplex *, integer *, complex *, integer *, integer *); extern doublereal dlamch_(char *); extern /* Subroutine */ int cgetrf_(integer *, integer *, complex *, integer *, integer *, integer *), xerbla_(char *, integer *); extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *); static doublereal bwdmax; extern /* Subroutine */ int cgetrs_(char *, integer *, integer *, complex *, integer *, integer *, complex *, integer *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zgetrf_(integer *, integer *, doublecomplex *, integer *, integer *, integer *), zgetrs_(char *, integer *, integer *, doublecomplex *, integer *, integer *, doublecomplex *, integer *, integer *); static logical doitref; static integer itermax; work_dim1 = *n; work_offset = 1 + work_dim1; work -= work_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --swork; /* Function Body */ itermax = 30; bwdmax = 1.f; doitref = TRUE_; ok = 0; *info = 0; *iter = 0; /* Test the input parameters. */ if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldx < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("ZCGESV", &i__1); return 0; } /* Quick return if (N.EQ.0). */ if (*n == 0) { return 0; } /* Skip single precision iterative refinement if a priori slower than double precision factorization. */ if (! doitref) { *iter = -1; goto L40; } /* Compute some constants. */ anrm = zlange_("I", n, n, &a[a_offset], lda, &work[work_offset]); eps = dlamch_("Epsilon"); cte = anrm * eps * sqrt((doublereal) (*n)) * bwdmax; /* Set the pointers PTSA, PTSX for referencing SA and SX in SWORK. */ ptsa = 1; ptsx = ptsa + *n * *n; /* Convert B from double precision to single precision and store the result in SX. */ zlag2c_(n, nrhs, &b[b_offset], ldb, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Convert A from double precision to single precision and store the result in SA. */ zlag2c_(n, n, &a[a_offset], lda, &swork[ptsa], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Compute the LU factorization of SA. */ cgetrf_(n, n, &swork[ptsa], n, &ipiv[1], info); if (*info != 0) { *iter = -3; goto L40; } /* Solve the system SA*SX = SB. */ cgetrs_("No transpose", n, nrhs, &swork[ptsa], n, &ipiv[1], &swork[ptsx], n, info); /* Convert SX back to double precision */ clag2z_(n, nrhs, &swork[ptsx], n, &x[x_offset], ldx, info); /* Compute R = B - AX (R is WORK). */ zlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); zgemm_("No Transpose", "No Transpose", n, nrhs, n, &c_b1, &a[a_offset], lda, &x[x_offset], ldx, &c_b2, &work[work_offset], n); /* Check whether the NRHS normwised backward errors satisfy the stopping criterion. If yes, set ITER=0 and return. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = izamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1; xnrm = (d__1 = x[i__2].r, abs(d__1)) + (d__2 = d_imag(&x[izamax_(n, & x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1]), abs(d__2)); i__2 = izamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1; rnrm = (d__1 = work[i__2].r, abs(d__1)) + (d__2 = d_imag(&work[ izamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1]), abs(d__2)); if (rnrm > xnrm * cte) { goto L10; } } /* If we are here, the NRHS normwised backward errors satisfy the stopping criterion. We are good to exit. */ *iter = 0; return 0; L10: i__1 = itermax; for (iiter = 1; iiter <= i__1; ++iiter) { /* Convert R (in WORK) from double precision to single precision and store the result in SX. */ zlag2c_(n, nrhs, &work[work_offset], n, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Solve the system SA*SX = SR. */ cgetrs_("No transpose", n, nrhs, &swork[ptsa], n, &ipiv[1], &swork[ ptsx], n, info); /* Convert SX back to double precision and update the current iterate. */ clag2z_(n, nrhs, &swork[ptsx], n, &work[work_offset], n, info); i__2 = *n * *nrhs; zaxpy_(&i__2, &c_b2, &work[work_offset], &c__1, &x[x_offset], &c__1); /* Compute R = B - AX (R is WORK). */ zlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); zgemm_("No Transpose", "No Transpose", n, nrhs, n, &c_b1, &a[a_offset] , lda, &x[x_offset], ldx, &c_b2, &work[work_offset], n); /* Check whether the NRHS normwised backward errors satisfy the stopping criterion. If yes, set ITER=IITER>0 and return. */ i__2 = *nrhs; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = izamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1; xnrm = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[izamax_( n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1]), abs( d__2)); i__3 = izamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1; rnrm = (d__1 = work[i__3].r, abs(d__1)) + (d__2 = d_imag(&work[ izamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1]), abs(d__2)); if (rnrm > xnrm * cte) { goto L20; } } /* If we are here, the NRHS normwised backward errors satisfy the stopping criterion, we are good to exit. */ *iter = iiter; return 0; L20: /* L30: */ ; } /* If we are at this place of the code, this is because we have performed ITER=ITERMAX iterations and never satisified the stopping criterion, set up the ITER flag accordingly and follow up on double precision routine. */ *iter = -itermax - 1; L40: /* Single-precision iterative refinement failed to converge to a satisfactory solution, so we resort to double precision. */ zgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); zlacpy_("All", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); if (*info == 0) { zgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &x[ x_offset], ldx, info); } return 0; /* End of ZCGESV. */ } /* zcgesv_ */