subroutine cg (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('cg',0,irpnt,3*n+2*itmax,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call cgw (suba,subql,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine si (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('si',0,irpnt,4*n,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call siw (suba,subql,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine srcg (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('srcg',0,irpnt,3*n+2*itmax,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call srcgw (suba,subql,subadp,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine srsi (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('srsi',0,irpnt,4*n,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call srsiw (suba,subql,subadp,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine sor (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp, a iparm,rparm,ier) external suba, subat, subql, subqlt, subqr, subqrt, subadp integer iparm(30), jcoef(2), iwksp(1) dimension rhs(1), u(1), ubar(1), rparm(30), coef(1), wksp(1) c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c ier = 0 call needw ('sor',0,irpnt,2*n,ier) if (ier .lt. 0) return nw = lenr - irpnt + 1 call sorw (suba,subql,coef,jcoef,wksp,iwksp, a n,u,ubar,rhs,wksp(irpnt),nw,iparm,rparm,ier) irmax = irpnt + nw - 1 return end subroutine basic (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call basicw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine me (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call mew (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine odir (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call odirw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine omin (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call ominw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine ores (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call oresw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine iom (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call iomw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine gmres (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call gmresw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine cgnr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call cgnrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine lsqr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call lsqrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine usymlq (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call uslqw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine usymqr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call usqrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine landir (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call ldirw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine lanmin (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call lminw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine lanres (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call lresw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine cgcr (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wk,iwk,iparm,rparm,ier) c c this routine implements the constrained residual method of c j. r. wallis, coupled with truncated/restarted orthomin. for c further information about the algorithm, see "constrained residual c acceleration of conjugate residual methods", by j. r. wallis, c r. p. kendall and t. e. little of j. s. nolen and assocs. inc.; c report spe 13536, society of petroleum engineers, 1985. c c right preconditioning only is allowed in this algorithm. c c unfortunately, this routine is limited -- all blocks must be the c same size. but the idea can be easily generalized. c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wk(1), iwk(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp external nullpl, cgcrpr logical ipl, ipr, iql, iqr c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom6 / method, iscale, iperm, nstore, a ifact, kblsz, lvfill, ltrunc, ndeg, a ipropa, isymm, ifctv c c ... data common blocks common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax common / ccgcr / nblk, nband, ictac, ieta, ivcgcr c c time to proceed ... c if (nstore.ne.2 .and. nstore.ne.3) go to 998 c irpsav = irpnt iql = iqlr.eq.1 .or. iqlr.eq.3 iqr = iqlr.eq.2 .or. iqlr.eq.3 if (iql) go to 998 c ipl = .false. ipr = .true. iplr = 0 if (ipl) iplr = iplr + 1 if (ipr) iplr = iplr + 2 c c form the c**(t)*a*c matrix c 1 if (nbl1d.le.0 .or. nbl2d.le.0) go to 998 nbl0d = 1 if (mod(nbl2d,nbl1d).ne.0 .or. mod(nbl1d,nbl0d).ne.0) go to 998 nblk = n / nbl2d if (nblk .eq. 1) nblk = n / nbl1d ictac = irpnt nwgb = lenr - ictac + 1 ierpp = 0 call getblk (coef,jcoef,n,nblk,nband,wk(ictac),nwgb,ierpp) irmax = max0 (irmax,ictac-1+nwgb) if (ierpp .lt. 0) go to 999 irpnt = ictac + nblk*nband c c perform first-iterate calculations c ieta = irpnt ivcgcr = ieta + nblk iv2 = ivcgcr + n irmax = max0(irmax,iv2-1+n) if (irmax .gt. lenr) go to 997 c call suba (coef,jcoef,wk,iwk,n,u,wk(ivcgcr)) call vexopy (n,wk(ivcgcr),rhs,wk(ivcgcr),2) call tmult (n,nblk,nband,wk(ictac),wk(ieta),wk(ivcgcr), a wk(ivcgcr)) call vexopy (n,u,u,wk(ivcgcr),1) c c pass it on to orthomin ... c irpnt = iv2 nw = lenr - irpnt + 1 call omingw (suba,subql,subqr,nullpl,cgcrpr,coef,jcoef, a wk,iwk,n,u,ubar,rhs,wk(irpnt),nw,iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) c irpnt = irpsav return c c error returns ... c c insuff. real workspace ... 997 ier = -2 call ershow (ier,'cgcr') return c c unimplemented option ... 998 ier = -16 call ershow (ier,'cgcr') return c c generic handler ... 999 ier = ierpp return end subroutine bcgs (suba,subat,subql,subqlt,subqr,subqrt,subadp, a coef,jcoef,n,u,ubar,rhs,wksp,iwksp,iparm,rparm,ier) c dimension u(1), ubar(1), rhs(1), coef(1), jcoef(2), a wksp(1), iwksp(1) dimension iparm(30), rparm(30) external suba, subql, subqr external subat, subqlt, subqrt external subadp c c ... data common blocks c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax c nw = lenr - irpnt + 1 call bcgsw (suba,subql,subqr,coef,jcoef, a wksp,iwksp,n,u,ubar,rhs,wksp(irpnt),nw, a iparm,rparm,ier) irmax = max0 (irmax,irpnt-1+nw) iimax = max0 (iimax,iipnt-1) return end subroutine cgw (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar,rhs, a wksp,nw,iparm,rparm,ier) c c cgw drives the conjugate gradient algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 1 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'cg') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 3*n + 2*itmax if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'cgw') go to 30 15 continue call nmcalc (coef,jcoef,wfac,jwfac,1,subq,n,rhs,ubar,wksp,ier) if (ier .lt. 0) go to 30 c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itcg (suba,subq,coef,jcoef,wfac,jwfac,n,u,ubar,rhs, a wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'cg has converged in ',i5,' iterations' ) c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) nw = 3*n + 2*in timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine siw (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar,rhs, a wksp,nw,iparm,rparm,ier) c c siw drives the chebyshev acceleration algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 2 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'si') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 4*n if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'siw') go to 30 15 continue call nmcalc (coef,jcoef,wfac,jwfac,1,subq,n,rhs,ubar,wksp,ier) if (ier .lt. 0) go to 30 c c ... compute an initial rayleigh quotient and adjust emax, emin. c call vfill (n,wksp,1.0) call subq (coef,jcoef,wfac,jwfac,n,wksp,wksp(iw2)) call suba (coef,jcoef,wfac,jwfac,n,wksp(iw2),wksp(iw3)) rq = vdot (n,wksp(iw2),wksp(iw3)) / a vdot (n,wksp(iw2),wksp) rqmax = rq rqmin = rq if (maxadd) emax = amax1 (emax,rqmax) if (minadd) emin = amin1 (emin,rqmin) if (minadd) emin = amax1 (emin,0.0) c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsi (suba,subq,coef,jcoef,wfac,jwfac,n,u,ubar,rhs, a wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'si has converged in ',i5,' iterations ') c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) nw = 4*n timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine srcgw (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,wksp,nw,iparm,rparm,ier) c c srcgw drives the ssor conjugate gradient algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq, subadp integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 1 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'srcg') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 3*n + 2*itmax if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'srcgw') go to 30 15 continue c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsrcg (suba,subq,subadp,coef,jcoef,wfac,jwfac,n,u,ubar, a rhs,wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'srcg has converged in ',i5,' iterations' ) c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) timit = t2 - t1 nw = 3*n + 2*in iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine srsiw (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,wksp,nw,iparm,rparm,ier) c c srsiw drives the ssor chebyshev acceleration algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq, subadp integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 2 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'srsi') c c ... compute workspace base addresses and check for sufficient c ... workspace. c iw1 = 1 iw2 = iw1 + n iw3 = iw2 + n iw4 = iw3 + n nwksp = 4*n if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'srsiw') go to 30 15 continue c c ... compute an initial rayleigh quotient and adjust emax, emin. c call vfill (n,wksp,1.0) call subq (coef,jcoef,wfac,jwfac,n,wksp,wksp(iw2)) call suba (coef,jcoef,wfac,jwfac,n,wksp(iw2),wksp(iw3)) rq = vdot (n,wksp(iw2),wksp(iw3)) / a vdot (n,wksp(iw2),wksp) rqmax = 1.0 rqmin = rq c c ... adjust emax, emin. c emax = 1.0 maxadd = .false. if (minadd) emin = amin1 (emin,rqmin) if (minadd) emin = amax1 (emin,0.0) c c ... zero out workspace c call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsrsi (suba,subq,subadp,coef,jcoef,wfac,jwfac,n,u,ubar, a rhs,wksp(iw1),wksp(iw2),wksp(iw3),wksp(iw4),ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'srsi has converged in ',i5,' iterations ') c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) timit = t2 - t1 nw = 4*n iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine sorw (suba,subq,coef,jcoef,wfac,jwfac,nn, a u,ubar,rhs,wksp,nw,iparm,rparm,ier) c c sorw drives the successive over-relaxation algorithm. c c ... parameters -- c c suba matrix-vector multiplication routine c subq routine to do an sor pass c n input integer. order of the system (= nn) c u input/output vector. on input, u contains the c initial guess to the solution. on output, it c contains the latest estimate to the solution. c ubar input vector containing the true solution c (optional) c rhs input vector. contains the right hand side c of the matrix problem. c wksp vector used for working space. c nw length of wksp array. if this length is less than c the amount needed, nw will give the needed amount c upon output. c iparm integer vector of length 30. allows user to c specify some integer parameters which affect c the method. c rparm real vector of length 30. allows user to c specify some real parameters which affect c the method. c ier output integer. error flag. c c ... specifications for parameters c external suba, subq integer iparm(30), jcoef(2), jwfac(1) dimension rhs(1), u(1), ubar(1), wksp(1), rparm(30), coef(1), a wfac(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- package common c c ... initialize common blocks c ier = 0 n = nn t1 = timer (dummy) iacel = 3 timit = 0.0 digit1 = 0.0 digit2 = 0.0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 35 if (level .ge. 2) write (nout,10) 10 format (1x,'sor') c c ... compute workspace base addresses and check for sufficient c ... workspace. c nwksp = 2*n if (nw .ge. nwksp) go to 15 ier = -2 call ershow (ier,'sorw') go to 30 c c ... zero out workspace c 15 call vfill (nwksp,wksp,0.0) c c ... iteration sequence c call itsor (subq,coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wksp,ier) c if (ier .lt. 0 .or. ier .eq. 1) go to 25 c c ... method has converged c if (level .ge. 1) write (nout,20) in 20 format (/1x,'sor has converged in ',i5,' iterations' ) c c ... optional error analysis c 25 if (idgts .lt. 0) go to 30 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wksp,digit1, a digit2,idgts) c c ... set return parameters in iparm and rparm c 30 t2 = timer (dummy) timit = t2 - t1 nw = 2*n iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 rparm(9) = omega rparm(10) = alphab rparm(11) = betab rparm(12) = specr c 35 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) c return end subroutine itcg (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar, a rhs,r,p,z,tri,ier) c c itcg does the conjugate gradient iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z workspace vectors of length n each c tri tridiagonal matrix associated with the c eigenvalues of the tridiagonal matrix. c ier error code c c ... specifications for parameters c external suba, subq integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), tri(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn in = 0 is = 0 rzdot = 0.0 alpha = 0.0 beta = 0.0 alphao = 0.0 maxadp = maxadd minadp = minadd c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 10 i = 1,n 10 r(i) = rhs(i) - r(i) go to 25 c c***** begin iteration loop ***** c 15 do 20 i = 1,n 20 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 25 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 30 i = 1,n 30 rzdot = rzdot + r(i)*z(i) if (rzdot .gt. 0.0) go to 35 ier = -7 call ershow (ier,'itcg') return c c ... determine whether or not to stop. c 35 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 40 ier = 1 call ershow (ier,'itcg') zeta = stptst return c c ... compute beta = rzdot/dkm1 c 40 if (in .eq. 0) go to 45 beta = rzdot/dkm1 c c ... compute p = z + beta*p c 45 do 50 i = 1,n 50 p(i) = z(i) + beta*p(i) c c ... compute alpha = rzdot / (p,a*p) c call suba (coef,jcoef,wfac,jwfac,n,p,z) alphao = alpha pap = 0.0 do 55 i = 1,n 55 pap = pap + p(i)*z(i) alpha = rzdot / pap if (pap .gt. 0.0) go to 60 ier = -6 call ershow (ier,'itcg') return c c ... compute latest eigenvalue estimates. c 60 if (maxadp .or. minadp) call chgcon (tri,ier) c c ... compute new solution u = u + alpha*p c do 65 i = 1,n 65 u(i) = u(i) + alpha*p(i) in = in + 1 is = is + 1 go to 15 end subroutine itsi (suba,subq,coef,jcoef,wfac,jwfac,nn,u,ubar, a rhs,r,p,z,wksp,ier) c c itsi does the semi-iterative iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z, workspace vectors of length n each c wksp volatile workspace c ier error code c c ... specifications for parameters c external suba, subq integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), wksp(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn in = 0 c c ... new chebychev sequence. c 10 is = 0 alpha = 0.0 beta = 0.0 rho = 1.0 rzdot = 0.0 gamma = 2.0/(emax + emin) sigma = (emax - emin)/(emax + emin) term = sqrt (1.0 - sigma*sigma) rr = (1.0 - term)/(1.0 + term) maxadp = maxadd minadp = minadd c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 15 i = 1,n 15 r(i) = rhs(i) - r(i) go to 30 c c***** begin iteration loop ***** c 20 do 25 i = 1,n 25 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 30 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 35 i = 1,n 35 rzdot = rzdot + r(i)*z(i) if (is .eq. 0) dkq = rzdot if (rzdot .ge. 0.0) go to 40 ier = -7 call ershow (ier,'itsi') return c c ... determine whether or not to stop. c 40 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 45 ier = 1 call ershow (ier,'itsi') zeta = stptst return c c ... compute iteration parameters. c 45 call parsi c c ... compute p = z + beta*p c ... u = u + alpha*p c do 50 i = 1,n p(i) = z(i) + beta*p(i) u(i) = u(i) + alpha*p(i) 50 continue c c ... adapt on emin and emax c in = in + 1 if (.not. maxadp .and. .not. minadp) go to 55 call chgsi (suba,coef,jcoef,wfac,jwfac,n,z,wksp,icode,ier) if (ier .lt. 0) return c c ... check if new estimates of emax, emin are to be used. c if (icode .eq. 1) go to 10 c c ... estimates of emax, emin are still good. c 55 is = is + 1 call suba (coef,jcoef,wfac,jwfac,n,p,z) go to 20 end subroutine itsrcg (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,r,p,z,tri,ier) c c itsrcg does the ssor conjugate gradient iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z workspace vectors of length n each c tri tridiagonal matrix associated with the c eigenvalues of the tridiagonal matrix. c ier error code c c ... specifications for parameters c external suba, subq, subadp integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), tri(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn in = 0 isw = 1 5 is = 0 rzdot = 0.0 alpha = 0.0 beta = 0.0 alphao = 0.0 maxadp = maxadd minadp = minadd c c recompute bnorm c call nmcalc (coef,jcoef,wfac,jwfac,isw,subq,n,rhs,ubar,r,ier) if (ier .lt. 0) return isw = 2 c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 10 i = 1,n 10 r(i) = rhs(i) - r(i) go to 25 c c***** begin iteration loop ***** c 15 do 20 i = 1,n 20 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 25 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 30 i = 1,n 30 rzdot = rzdot + r(i)*z(i) if (rzdot .ge. 0.0) go to 35 ier = -7 call ershow (ier,'itsrcg') return c c ... determine whether or not to stop. c 35 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 40 ier = 1 call ershow (ier,'itsrcg') zeta = stptst return c c ... compute beta = rzdot/dkm1 c 40 if (is .eq. 0) go to 45 beta = rzdot/dkm1 c c ... compute p = z + beta*p c 45 do 50 i = 1,n 50 p(i) = z(i) + beta*p(i) c c ... compute alpha = rzdot / (p,a*p) c call suba (coef,jcoef,wfac,jwfac,n,p,z) alphao = alpha pap = 0.0 do 55 i = 1,n 55 pap = pap + p(i)*z(i) alpha = rzdot / pap if (pap .ge. 0.0) go to 60 ier = -6 call ershow (ier,'itsrcg') return c c ... compute latest eigenvalue estimates. c 60 if (minadp) call chgcon (tri,ier) c c ... compute new solution u = u + alpha*p c do 65 i = 1,n 65 u(i) = u(i) + alpha*p(i) is = is + 1 in = in + 1 call ssorad (subadp,coef,jcoef,wfac,jwfac,n,p,z,r,icode) if (icode .eq. 0) go to 15 go to 5 end subroutine itsrsi (suba,subq,subadp,coef,jcoef,wfac,jwfac, a nn,u,ubar,rhs,r,p,z,wksp,ier) c c itsrsi does the ssor semi-iterative iterations. c c ... parameters -- c c suba matrix-vector multiplication routine c subq preconditioning routine c subadp adpation routine c n order of system (= nn) c u current solution c ubar known solution (optional) c rhs right hand side vector c r,p,z, workspace vectors of length n each c wksp volatile workspace c ier error code c c ... specifications for parameters c external suba, subq, subadp integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension u(1), ubar(1), rhs(1), r(1), p(1), z(1), wksp(1) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c n = nn c in = 0 isw = 1 c c recompute bnorm c 5 call nmcalc (coef,jcoef,wfac,jwfac,isw,subq,n,rhs,ubar,r,ier) if (ier .lt. 0) return isw = 2 c c ... update rayleigh quotient . c if (in .eq. 0) go to 10 call subq (coef,jcoef,wfac,jwfac,n,p,z) call suba (coef,jcoef,wfac,jwfac,n,z,r) rq = vdot (n,z,r) / vdot (n,z,p) rqmin = rq if (minadd) emin = rqmin c c ... new chebychev sequence. c 10 is = 0 alpha = 0.0 beta = 0.0 rho = 1.0 rzdot = 0.0 gamma = 2.0/(emax + emin) sigma = (emax - emin)/(emax + emin) term = sqrt (1.0 - sigma*sigma) rr = (1.0 - term)/(1.0 + term) minadp = minadd c c compute r = residual c call suba (coef,jcoef,wfac,jwfac,n,u,r) do 15 i = 1,n 15 r(i) = rhs(i) - r(i) go to 30 c c***** begin iteration loop ***** c 20 do 25 i = 1,n 25 r(i) = r(i) - alpha*z(i) c c ... do preconditioning step -- solve q*z = r for z. c 30 call subq (coef,jcoef,wfac,jwfac,n,r,z) c c ... compute rzdot = (r,z) c dkm1 = rzdot rzdot = 0.0 do 35 i = 1,n 35 rzdot = rzdot + r(i)*z(i) if (is .eq. 0) dkq = rzdot if (rzdot .ge. 0.0) go to 40 ier = -7 call ershow (ier,'itsrsi') return c c ... determine whether or not to stop. c 40 call pstops (n,r,z,u,ubar,ier) if (level .ge. 2) call iterm (n,u) if (halt .or. ier .lt. 0) return if (in .lt. itmax) go to 45 ier = 1 call ershow (ier,'itsrsi') zeta = stptst return c c ... compute iteration parameters. c 45 call parsi c c ... compute p = z + beta*p c ... u = u + alpha*p c do 50 i = 1,n p(i) = z(i) + beta*p(i) u(i) = u(i) + alpha*p(i) 50 continue c c ... adapt on emin and emax c in = in + 1 if (.not. minadp) go to 55 call chgsi (suba,coef,jcoef,wfac,jwfac,n,z,wksp,icode,ier) if (ier .lt. 0) return c c ... check if new estimates of emax, emin are to be used. c if (icode .eq. 1) go to 10 c c ... estimates of emax, emin are still good. c 55 is = is + 1 call suba (coef,jcoef,wfac,jwfac,n,p,z) call ssorad (subadp,coef,jcoef,wfac,jwfac,n,p,z,r,icode) if (icode .eq. 0) go to 20 go to 5 end subroutine itsor (subq,coef,jcoef,wfac,jwfac,nn,u,ubar, a rhs,wksp,ier) c c ... itsor does the sor iterations c c ... parameters -- c c subq routine to do an sor pass c n size of system c rhs right hand side c u solution vector c ubar known solution (optional) c wksp workspace vector of length 2*n c c ... specifications for parameters c integer jcoef(2), jwfac(1) dimension coef(1), wfac(1) dimension rhs(1), u(1), ubar(1), wksp(1) external subq logical change c c *** begin -- itpack common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 logical omgadp common / itcom5 / omgadp, omega, alphab, betab, fff, specr c c *** end -- itpack common c c c ... set initial parameters not already set c n = nn in = 0 is = 0 ip = 0 iss = 0 iphat = 2 delnnm = 0.0 delsnm = 0.0 call sorstp (n,u,ubar,0.0,0.0) change = omgadp ib2 = n + 1 if (.not. omgadp) go to 10 omegap = omega omega = 1.0 ipstar = 4 if (omegap .le. 1.0) change = .false. c c ... start iterating. c 10 do 55 iter = 1,itmax+1 c c ... output intermediate information c if (level .ge. 2) call iterm (n,u) if (halt) return if (.not. change) go to 15 change = .false. is = is + 1 ip = 0 iss = 0 omega = amin1 (omegap,tau(is)) iphat = max0 ( 3 , int ( (omega-1.0)/(2.0-omega) ) ) ipstar = ipstr (omega) c c ... compute u (in + 1) and norm of del(s,p) c 15 delsnm = delnnm spcrm1 = specr do 20 i = 1,n 20 wksp(i) = rhs(i) call subq (coef,jcoef,wfac,jwfac,n,u,wksp,wksp(ib2)) do 25 i = 1,n 25 wksp(i) = u(i) - wksp(n+i) sum = 0.0 do 28 i = 1,n 28 sum = sum + wksp(i)*wksp(i) delnnm = sqrt (sum) do 30 i = 1,n 30 u(i) = wksp(i+n) if (delnnm .eq. 0.0) go to 35 if (in .ne. 0) specr = delnnm / delsnm if (ip .lt. iphat) go to 50 c c ... stopping test, set h c if (specr .ge. 1.0) go to 50 if (.not. (specr .gt. (omega - 1.0))) go to 35 h = specr go to 40 35 iss = iss + 1 h = omega - 1.0 c c ... perform stopping test. c 40 continue dnrm = delnnm**2 call sorstp (n,u,ubar,dnrm,h) if (halt) go to 50 c c ... method has not converged yet, test for changing omega c if (.not. omgadp) go to 50 if (ip .lt. ipstar) go to 50 if (omega .gt. 1.0) go to 45 emax = sqrt (abs (specr)) omegap = 2.0 / (1.0 + sqrt (abs (1.0 - specr))) change = .true. go to 50 45 if (iss .ne. 0) go to 50 if (specr .le. (omega - 1.0)**fff) go to 50 if ((specr + 0.00005) .le. spcrm1) go to 50 c c ... change parameters c emax = (specr + omega - 1.0) / (sqrt (abs (specr))*omega) omegap = 2.0 / (1.0 + sqrt (abs (1.0 - emax*emax))) change = .true. c 50 ip = ip + 1 in = in + 1 55 continue ier = 1 in = in - 1 call ershow (ier,'itsor') zeta = stptst return end subroutine basicw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw, a iparm,rparm,ier) c c code to run the basic (unaccelerated) iterative method, c with preconditioning. that is, it applies the fixed point method c to the preconditioned system. c two-sided preconditioning is efficiently implemented. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) logical iql, iqr external suba, subql, subqr dimension iparm(30), rparm(30) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c preliminary calculations c iacel = 0 ier = 0 nwusd = 0 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 if (level .ge. 2) write (nout,496) 496 format (' basic') c use knowledge about spectrum to optimally extrapolate ... extrap = (emax+emin)/2.0 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 735 c c bust up workspace ... c izt = 1 iv1 = izt + n iwfree = iv1 + n if (iqlr .eq. 0) iwfree = iv1 nwusd = max0(nwusd,iwfree-1) c c check the memory usage ... c if (nwusd .gt. nw) go to 999 c c do preliminary calculations ... c in = 0 is = 0 go to (151,152,153,154),iqlr + 1 c 151 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) go to 10 c 152 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 10 c 153 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 10 c 154 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) call subql (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 10 c c-----------------------begin iteration loop---------------------- c c determine whether or not to stop -- c 10 call inithv (1) nwpstp = nw - (iwfree-1) call pstop (1,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,wk(izt), a wk(iwfree),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iwfree-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c form iterate ... c call vtriad (n,u,u,1e0/extrap,wk(izt),1) c c form residuals, as necessary ... c go to (161,162,163,164),iqlr + 1 c 161 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) go to 110 c 162 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 110 c 163 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 110 c 164 call suba (coef,jcoef,wfac,jwfac,n,u,wk(izt)) call vexopy (n,wk(izt),rhs,wk(izt),2) call subql (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(izt)) go to 110 c c proceed to next iteration c 110 in = in + 1 is = is + 1 go to 10 c c--------------------------------finish up------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'basicw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' basic method converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk, a digit1,digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 996 call ershow (ier,'basicw') go to 735 c c insuff. real wksp ... 999 ier = -2 call ershow (ier,'basicw') go to 735 c end subroutine mew (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw, a iparm,rparm,ier) c c this routine runs the minimal error algorithm of fridman. c the reference is: v. m. fridman, "the method of minimum iterations c ...", ussr computational math. and math. phys., vol. 2, 1962, c pp. 362-3. c c two-sided preconditioning is implemented. the iteration matrix c should be symmetric for this algorithm to work. c c we have introduced periodic scaling of the direction vectors, to c prevent overflow. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subql, subqr dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors -- c indp(i) = ip + mod(i,2)*n indpt(i) = ipt + mod(i,2)*n c c various preliminary calculations. c dot = 0e0 nwusd = 0 ier = 0 iacel = 4 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 if (level .ge. 2) write (nout,496) 496 format (' me') iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zhave = .true. nwpstp = nw call pstop (0,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c memory allocation, etc. c c nomenclature -- r -- residual of the original system. c z -- inv(ql)*r c zt -- inv(qr)*z c ip = 1 ipt = ip + 2*n iz = ipt + 2*n ir = iz + n iv1 = ir + n if (.not. rcalp) iv1 = ir izt = iv1 + n iv2 = izt + n if (.not. ztcalp) iv2 = izt iqlap = iv1 iqrlap = iv2 iwfree = iv2 + n c c note that memory usage has been overlapped whenever possible, c in order to save space. c nwusd = max0(nwusd,iwfree-1) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 rhave = rcalp zthave = ztcalp c c perform first-iterate calculations c call suba (coef,jcoef,wfac,jwfac,n,u,wk(ir)) call vexopy (n,wk(ir),rhs,wk(ir),2) call subql (coef,jcoef,wfac,jwfac,n,wk(ir),wk(iz)) call subqr (coef,jcoef,wfac,jwfac,n,wk(iz),wk(izt)) c c---------------------------- begin iteration loop ---------------------------- c c determine whether or not to stop -- c note that we have already done the calculations necessary so that suba c and subql are not actually used by pstop. c 10 call inithv (1) nwpstp = nw - (iwfree-1) call pstop (1,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk(ir),wk(iz),wk(izt), a wk(iwfree),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iwfree-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c compute p(n), the direction vector, and inv(qr)*p(n) (=pt(n)). c scal = 1e0 c c first, case of in .eq. 0 c if (in .ne. 0) go to 100 toplam = vdot (n,wk(iz),wk(iz)) call suba (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(indp(in))) call subqr (coef,jcoef,wfac,jwfac,n,wk(indp(in)),wk(indpt(in))) go to 120 c c case in .gt. 0 c 100 toplam = vdot (n,wk(indp(in-1)),wk(iz)) bet1 = - vdot (n,wk(indp(in-1)),wk(iqlap)) / dot if (in .ne. 1) go to 110 c c case in .eq. 1 c call vtriad (n,wk(indp(in)),wk(iqlap),bet1,wk(indp(in-1)),1) call vtriad (n,wk(indpt(in)),wk(iqrlap),bet1,wk(indpt(in-1)),1) go to 120 c c case in .gt. 1 c 110 bet2 = - vdot (n,wk(indp(in-2)),wk(iqlap)) / dotold call vtriad (n,wk(indp(in)), wk(iqlap), bet2,wk(indp(in-2)), 1) call vtriad (n,wk(indpt(in)),wk(iqrlap),bet2,wk(indpt(in-2)),1) call vtriad (n,wk(indp(in)), wk(indp(in)), bet1,wk(indp(in-1)), 1) call vtriad (n,wk(indpt(in)),wk(indpt(in)),bet1,wk(indpt(in-1)),1) c c at this point, we are finished forming the latest direction vector. c we proceed to calculate lambda and update the solution and the c residual. c 120 dotold = dot dot = vdot (n,wk(indp(in)),wk(indp(in))) c if (dot .lt. srelpr) go to 998 c c scale direction vector if necessary ... if (dot.lt.srelpr**2 .or. dot.gt.(1e0/srelpr**2)) then scal = sqrt(dot) call vtriad (n,wk(indp(in)), xxx,1e0/scal,wk(indp(in)), 2) call vtriad (n,wk(indpt(in)),xxx,1e0/scal,wk(indpt(in)),2) dot = 1e0 end if c 124 vlamda = toplam / dot / scal c c u -- c call vtriad (n,u,u,vlamda,wk(indpt(in)),1) c c r -- c call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(iv2)) if (rhave) call vtriad (n,wk(ir),wk(ir),-vlamda,wk(iv2),1) c c z -- c call subql (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iqlap)) call vtriad (n,wk(iz),wk(iz),-vlamda,wk(iqlap),1) c c zt -- c call subqr (coef,jcoef,wfac,jwfac,n,wk(iqlap),wk(iqrlap)) if (zthave) call vtriad (n,wk(izt),wk(izt),-vlamda,wk(iqrlap),1) c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-------------------------------finish up----------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'mew') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' me converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 997 call ershow (ier,'mew') go to 735 c 998 ier = -15 call ershow (ier,'mew') go to 725 c 999 ier = -2 call ershow (ier,'mew') go to 735 c end subroutine cgnrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the conjugate gradient algorithm on the normal equations. c in this variant, the residual of the original system is minimized c per iteration. currently, only left preconditioning is implemented. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 5 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' cgnr') maxadp = maxadd minadp = minadd alphao = 0e0 alpha = 0e0 beta = 0e0 c c initialize the stopping test ... c call inithv (0) zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c itri = 1 ip = itri if ( .not. (maxadd .or. minadd) ) go to 850 ip = itri + 2*itmax call vfill (2*itmax,wk(itri),0e0) 850 ir = ip + n iv1 = ir + n iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(ir)) c c--------------------------begin iteration loop------------------------ c c determine whether or not to stop -- c 10 call inithv (1) nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,wk(ir), a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c if (in .ne. 0) go to 110 c c perform first-iterate calculations c call subqlt (coef,jcoef,wfac,jwfac,n,wk(ir),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(ip)) ard = vdot (n,wk(ip),wk(ip)) go to 111 c c perform subsequent-iterate calculations c 110 ardold = ard c if (abs(ardold) .lt. srelpr) go to 996 call subqlt (coef,jcoef,wfac,jwfac,n,wk(ir),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) ard = vdot (n,wk(iv2),wk(iv2)) an = ard/ardold call vtriad (n,wk(ip),wk(iv2),an,wk(ip),1) beta = an c c proceed to form the iterate. c 111 call suba (coef,jcoef,wfac,jwfac,n,wk(ip),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) pap = vdot (n,wk(iv2),wk(iv2)) if (abs(pap) .lt. srelpr**2) go to 998 vlamda = ard/pap c call vtriad (n,u,u,vlamda,wk(ip),1) call vtriad (n,wk(ir),wk(ir),-vlamda,wk(iv2),1) c c update eigenvalue estimates c alphao = alpha alpha = vlamda if (maxadp .or. minadp) call chgcon (wk(itri),ier) if (ier .lt. 0) go to 725 c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-------------------------------finish up--------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'cgnrw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' cgnr converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'cgnrw') return c 996 ier = -13 call ershow (ier,'cgnrw') go to 725 c 997 call ershow (ier,'cgnrw') go to 735 c 998 ier = -15 call ershow (ier,'cgnrw') go to 725 c 999 ier = -2 call ershow (ier,'cgnrw') go to 735 c end subroutine lsqrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the lsqr algorithm. the algorithm is taken from c the article 'lsqr -- an algorithm for sparse linear equations c and sparse least squares.' c by c. c. paige amd m. a. saunders, in acm transactions on c mathematical software, vol. 8, no. 1, march 1982, pp. 43-71. c the iterates produced are the same as those of cgnr, in exact c arithmetic, but this should be more stable. only left c preconditioning is currently implemented. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer vect1, vect2, os external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 6 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' lsqr') c c initialize the stopping test ... c call inithv (0) zdhav = .true. nwpstp = nw call pstop (0,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 735 c c ... associated integer variables. c iu = 1 iv = iu + n iw = iv + n iv1 = iw + n iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 c c now, perform first-iterate calculations c call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) beta = sqrt(vdot (n,wk(iv2),wk(iv2))) if (abs(beta) .lt. srelpr) go to 997 call vtriad (n,wk(iu),xxx,1.0/beta,wk(iv2),2) c call subqlt (coef,jcoef,wfac,jwfac,n,wk(iu),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) alpha = sqrt(vdot (n,wk(iv2),wk(iv2))) if (abs(alpha) .lt. srelpr) go to 997 call vtriad (n,wk(iv),xxx,1.0/alpha,wk(iv2),2) c call vcopy (n,wk(iv),wk(iw)) phibar = beta rhobar = alpha zdot = phibar**2 c if u(0) is zero, then the norm of u(n) can be calculated for free. c otherwise, i don't know. c c---------------------------begin iteration loop--------------------- c c determine whether or not to stop -- c 10 call inithv (1) zdhav = .true. nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c ... compute the lanczos vectors. c call suba (coef,jcoef,wfac,jwfac,n,wk(iv),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call vtriad (n,wk(iu),wk(iv2),-alpha,wk(iu),1) beta = sqrt(vdot (n,wk(iu),wk(iu))) if (abs(beta) .lt. srelpr) go to 997 call vtriad (n,wk(iu),xxx,1.0/beta,wk(iu),2) c call subqlt (coef,jcoef,wfac,jwfac,n,wk(iu),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call vtriad (n,wk(iv),wk(iv2),-beta,wk(iv),1) alpha = sqrt(vdot (n,wk(iv),wk(iv))) if (abs(alpha) .lt. srelpr) go to 997 call vtriad (n,wk(iv),xxx,1.0/alpha,wk(iv),2) c c continue by calculating various scalars. c rho = sqrt(rhobar**2+beta**2) if (rho .lt. srelpr) go to 998 c = rhobar/rho s = beta/rho theta = s*alpha rhobar = -c*alpha phi = c*phibar phibar = s*phibar c c now generate the new u and w vectors. c call vtriad (n,u,u,phi/rho,wk(iw),1) call vtriad (n,wk(iw),wk(iv),-theta/rho,wk(iw),1) c c proceed to next iteration c zdot = phibar**2 in = in + 1 is = is + 1 go to 10 c c-----------------------------finish up------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'lsqrw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' lsqr converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'lsqrw') return c 996 call ershow (ier,'lsqrw') go to 735 c 997 ier = -13 call ershow (ier,'lsqrw') go to 725 c 998 ier = -14 call ershow (ier,'lsqrw') go to 725 c 999 ier = -2 call ershow (ier,'lsqrw') go to 735 c end subroutine odirw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this routine implements orthodir with truncation and c restarting and with 2-sided preconditioning. the effective value c of the z matrix is (inv(ql)*a*inv(qr))**t. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) logical iql, iqr external suba, subql, subqr dimension iparm(30), rparm(30) c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors and dot products -- c indpt(i) = ipt + mod(i,nv)*n indqap(i) = iqapt + mod(i,nv)*n inddot(i) = idot + mod(i,nv) c c various preliminary calculations. c c nwusd = 0 ier = 0 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 write (nout,496) 496 format (' orthodir') iacel = 7 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zhave = .true. zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 997 c c memory allocation, etc. c nv = max0(1,min0(ns1,ns2-1)) ipt = 1 iqapt = ipt + nv*n idot = iqapt + nv*n iz = idot + nv izt = iz + n if (.not. iqr) izt = iz isv = izt + n iv1 = isv + n iv2 = iv1 + n c if (iql) nwusd = max0(nwusd,iv2-1+n) if (.not. iql) nwusd = max0(nwusd,iv1-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 c c perform first-iterate calculations c if (iql) go to 122 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iz)) call vexopy (n,wk(iz),rhs,wk(iz),2) go to 121 122 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iz)) 121 if (iqr) call subqr (coef,jcoef,wfac,jwfac,n,wk(iz),wk(izt)) c if (.not. iqr) zdot = vdot (n,wk(iz),wk(iz)) c c ========================= begin iteration loop ==================== c c c determine whether or not to stop ... c 10 call inithv (1) nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),wk(izt), a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c proceed to calculate the direction vectors. c c first, case of no old p vectors. c np = min0(mod(in,ns2),ns1) if (np .ne. 0) go to 100 c if (is .eq. 0) call vcopy (n,wk(izt),wk(indpt(in))) if (is .ne. 0) call vcopy (n,wk(isv),wk(indpt(in))) if (iql) go to 123 call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(indqap(in))) go to 120 123 call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(indqap(in))) go to 120 c c case of at least one old p vector. c this case is handled in a tricky way, to optimize the workspace. c 100 if (iql) go to 124 call suba (coef,jcoef,wfac,jwfac,n,wk(isv),wk(iv1)) go to 125 124 call suba (coef,jcoef,wfac,jwfac,n,wk(isv),wk(iv2)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iv1)) c 125 top = vdot (n,wk(indqap(in-np)),wk(iv1)) bet = - top / wk(inddot(in-np)) call vtriad (n,wk(indpt(in)),wk(isv),bet,wk(indpt(in-np)),1) call vtriad (n,wk(indqap(in)),wk(iv1),bet,wk(indqap(in-np)),1) ibegin = in - np + 1 iend = in - 1 if (ibegin .gt. iend) go to 613 do 612 i = ibegin,iend top = vdot (n,wk(indqap(i)),wk(iv1)) bet = - top / wk(inddot(i)) call vtriad (n,wk(indpt(in)),wk(indpt(in)),bet,wk(indpt(i)),1) 612 call vtriad (n,wk(indqap(in)),wk(indqap(in)),bet,wk(indqap(i)),1) 613 continue c c periodically scale the direction vector, to prevent overflow ... c 120 continue dot = vdot (n,wk(indqap(in)),wk(indqap(in))) if (dot.lt.srelpr**2 .or. dot.gt.(1e0/srelpr**2)) then call vtriad (n,wk(indpt(in)), xxx,1e0/dot,wk(indpt(in)), 2) call vtriad (n,wk(indqap(in)),xxx,1e0/dot,wk(indqap(in)),2) dot = 1e0 end if c c at this point, we are finished forming the latest direction vector. c we proceed to calculate lambda and update the solution and c the residuals. c 129 continue c if (abs(dot) .lt. srelpr) go to 998 wk(inddot(in)) = dot top = vdot (n,wk(indqap(in)),wk(iz)) vlamda = top / dot c the following commented-out line is unstable. but it can be fixed. c if (.not. iqr) zdot = zdot - 2*vlamda*top + vlamda**2*dot c c u -- c call vtriad (n,u,u,vlamda,wk(indpt(in)),1) c c z -- c call vtriad (n,wk(iz),wk(iz),-vlamda,wk(indqap(in)),1) c c zt -- c call subqr (coef,jcoef,wfac,jwfac,n,wk(indqap(in)),wk(isv)) if (iqr) call vtriad (n,wk(izt),wk(izt),-vlamda,wk(isv),1) c c proceed to next iteration c in = in + 1 is = is + 1 if (is .eq. ns2) is = 0 go to 10 c c-------------------------------finish up---------------------------- c 900 if (.not. halt) go to 996 if (level .ge. 1) write (nout,720) in 720 format (/' orthodir converged in ',i5,' iterations.') c 725 if (idgts .ge. 0) a call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk, a digit1,digit2,idgts) c pack revised parms into iparm, rparm ... t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c c error returns c c no convergence ... 996 ier = 1 call ershow (ier,'odirw') zeta = stptst go to 725 c c generic error handler ... 997 call ershow (ier,'odirw') go to 735 c c breakdown ... 998 ier = -15 call ershow (ier,'odirw') go to 725 c c insufficient real wksp ... 999 ier = -2 call ershow (ier,'odirw') go to 735 end subroutine ominw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this routine implements the truncated/restarted orthomin algorithm. c eigenvalue estimation is implemented. c note that this also implements the gcr algorithm. c c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subql, subqr external nullpl, nullpr dimension iparm(30), rparm(30) c ier = 0 call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) return c c pass on to workhorse routine ... c call omingw (suba,subql,subqr,nullpl,nullpr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) return end subroutine omingw (suba,subql,subqr,precl,precr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this is a generalized version of the omingw routine which allows a c more general computational form for the preconditioning. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) logical ipl, ipr external suba, subql, subqr, precl, precr dimension iparm(30), rparm(30) logical ztget, havest, hadest, evest c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors and dot products -- c indpt(i) = ipt + mod(i,nv)*n indqap(i) = iqapt + mod(i,nv)*n inddot(i) = idot + mod(i,nv+1) indhes(i,j) = ihess + (i-1) + (j-1)*nhess inapar(i) = iapar + mod(i,nv) indlam(i) = ilam + mod(i,nv+1) c c various preliminary calculations. c t1 = timer (dummy) c ipl = iplr .eq. 1 .or. iplr .eq. 3 ipr = iplr .eq. 2 .or. iplr .eq. 3 c iacel = 8 nwusd = 0 if (level .ge. 1) write (nout,497) 497 format (' omin') c c initialize the stopping test ... c call inithv (0) zhave = .true. zthave = .true. nwpstp = nw call pstopg (0,suba,subql,subqr,precl,precr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 997 ztget = ztcalp zthave = ztget c c memory allocation, etc. c numbig = 1000 methev = 1 if (iabs(ns3) .ge. numbig) then if (ns3 .gt. 0) ns3 = ns3 - numbig if (ns3 .lt. 0) ns3 = ns3 + numbig methev = 2 end if c evest = ns3.ne.0 .and. (maxadd.or.minadd) nhess = 2 + min0(itmax,ns2) nv = max0(1,min0(ns1,ns2-1)) ipt = 1 iqapt = ipt + nv*n idot = iqapt + nv*n iapar = idot + (nv+1) ihess = iapar + nv ilam = ihess + nhess*(nv+2) if (.not. evest) ilam = ihess iz = ilam + (nv+1) izt = iz + n if (.not. ipr) izt = iz iv1 = izt + n iv2 = iv1 + n ir = iz if (ipl) ir = iv1 c nwtmp = iv1 - 1 + n if (ipl) nwtmp = iv2 - 1 + n nwusd = max0(nwusd,nwtmp) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 c c perform first-iterate calculations c call suba (coef,jcoef,wfac,jwfac,n,u,wk(ir)) call vexopy (n,wk(ir),rhs,wk(ir),2) if (ipl) call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(ir),wk(iz)) hadest = .false. c c ------------------------- begin iteration loop ---------------------- c c determine whether or not to stop ... c 10 if (.not. ztget) go to 710 if (ipr) call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iz),wk(izt)) c 710 call inithv (1) nwpstp = nw - (iv1-1) call pstopg (1,suba,subql,subqr,precl,precr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),wk(izt), a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c if (zthave) go to 711 if (ipr) call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iz),wk(izt)) c c------------------proceed to calculate the direction vectors---------------- c c first, case of no old p vectors. c 711 np = min0(mod(in,ns2),ns1) if (np .ne. 0) go to 100 c call vcopy (n,wk(izt),wk(indpt(in))) if (.not. ipl) then call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(indqap(in))) else call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(in)),wk(iv1)) call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iv1),wk(indqap(in))) end if go to 120 c c case of at least one old p vector. c this case is handled in a tricky way, to optimize the workspace. c 100 if (.not. ipl) then call suba (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv1)) else call suba (coef,jcoef,wfac,jwfac,n,wk(izt),wk(iv2)) call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iv2),wk(iv1)) end if c top = vdot (n,wk(indqap(in-np)),wk(iv1)) wk(inapar(in-np)) = top bet = - top / wk(inddot(in-np)) call vtriad (n,wk(indpt(in)),wk(izt),bet,wk(indpt(in-np)),1) call vtriad (n,wk(indqap(in)),wk(iv1),bet,wk(indqap(in-np)),1) c do 612 i = in-np+1, in-1 top = vdot (n,wk(indqap(i)),wk(iv1)) wk(inapar(i)) = top bet = - top / wk(inddot(i)) call vtriad (n,wk(indpt(in)), wk(indpt(in)), bet,wk(indpt(i)), 1) 612 call vtriad (n,wk(indqap(in)),wk(indqap(in)),bet,wk(indqap(i)),1) c c at this point, we are finished forming the latest direction vector. c we proceed to calculate lambda and update the solution and c the residuals. c 120 continue apap = vdot (n,wk(indqap(in)),wk(indqap(in))) c if (abs(apap) .lt. srelpr**2) go to 998 if (abs(apap) .eq. 0e0) go to 998 wk(inddot(in)) = apap top = vdot (n,wk(indqap(in)),wk(iz)) vlamda = top / apap c if (.not. ipr) zzdot = zzdot - 2*vlamda*top + vlamda**2*apap c c u -- call vtriad (n,u,u,vlamda,wk(indpt(in)),1) c c z -- call vtriad (n,wk(iz),wk(iz),-vlamda,wk(indqap(in)),1) c c----------------------------hess matrix update--------------------------- c c there are two schemes here, based on two different ways of projecting c the iteration matrix. c c update hessenberg matrix: scheme 1 c if (.not. evest) go to 955 wk(indlam(in)) = vlamda if (is .eq. 0) call vfill (nhess*(nv+2),wk(ihess),0e0) if (methev .ne. 1) go to 746 c do 954 i=in-np,in if (i .eq. in) apar = apap if (i .ne. in) apar = wk(inapar(i)) wk(indhes(i+1+(is-in),in-i+2)) = wk(indhes(i+1+(is-in),in-i+2)) a + apar/wk(indlam(in)) / sqrt(wk(inddot(in))*wk(inddot(i))) if (is .ne. 0) a wk(indhes(i+1+(is-in),in-i+1)) = wk(indhes(i+1+(is-in),in-i+1)) a - apar/wk(indlam(in-1)) / sqrt(wk(inddot(in-1))*wk(inddot(i))) 954 continue iesize = is go to 747 c c update hessenberg matrix: scheme 2 c 746 iesize = is + 1 wk(indhes(is+2,1)) = -1e0 / vlamda wk(indhes(is+1,2)) = 1e0 / vlamda if (np .eq. 0) go to 749 do 748 i=in-np,in-1 id = in - i + 1 wk(indhes(is+3-id,id )) = wk(indhes(is+3-id,id )) a - wk(inapar(i))/wk(inddot(i))/wk(indlam(i)) 748 wk(indhes(is+2-id,id+1)) = wk(indhes(is+2-id,id+1)) a + wk(inapar(i))/wk(inddot(i))/wk(indlam(i)) 749 continue c c estimate eigenvalues ... c 747 nwhe = nw - (iv1-1) call hesest (wk(ihess),nhess,nv+2,iesize,ns3,havest, a emaxnw,eminnw,wk(iv1),nwhe,ier) nwusd = max0 (nwusd,iv1-1+nwhe) if (ier .ne. 0) go to 995 if (.not. havest) go to 955 if (hadest) go to 956 if (maxadd) emax = emaxnw if (minadd) emin = eminnw hadest = .true. go to 955 956 if (maxadd) emax = amax1 (emax,emaxnw) if (minadd) emin = amin1 (emin,eminnw) c c---------------------------proceed to next iteration------------------------ c 955 in = in + 1 is = is + 1 if (is .eq. ns2) is = 0 go to 10 c c-------------------------------finish up------------------------------------ c 900 if (.not. halt) go to 996 if (level .ge. 1) write (nout,720) in 720 format (/' orthomin converged in ',i5,' iterations.') c 725 if (idgts .ge. 0) a call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) c pack revised parms into iparm, rparm ... t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c c--------------------------------error returns----------------------------- c c unimplemented option ... 995 ier = -16 call ershow (ier,'omingw') go to 725 c c no convergence ... 996 ier = 1 call ershow (ier,'omingw') zeta = stptst go to 725 c c generic error handler ... 997 call ershow (ier,'omingw') go to 735 c c breakdown ... 998 ier = -15 call ershow (ier,'omingw') go to 725 c c insufficient real wksp ... 999 ier = -2 call ershow (ier,'omingw') go to 735 end subroutine oresw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c this routine implements orthores with truncation and c restarting and with 2-sided preconditioning. the value of z is c the identity. the code is optimal in speed and workspace c requirements, for general a, ql and qr. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subql, subqr dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c the following indexing functions are used to access the old c direction vectors and dot products -- c indu(i) = iu + mod(i,nv)*n indz(i) = iz + mod(i,nv)*n inddot(i) = idot + mod(i,nv) c c various preliminary calculations. c nwusd = 0 ier = 0 iacel = 9 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 if (level .ge. 2) write (nout,496) 496 format (' orthores') iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 c c initialize the stopping test ... c call inithv (0) zhave = .true. zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c memory allocation, etc. c c nomenclature -- r -- residual of the original system. c z -- inv(ql)*r c zt -- inv(qr)*z c nv = max0(1,min0(ns1+1,ns2)) iu = 1 iz = iu + nv*n idot = iz + nv*n iv1 = idot + nv iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 c c perform first-iterate calculations. c note -- we will use the vector 'u' to store ztilde. the u vectors c will be stored in the table. wk(iv1) will hold r. c call vcopy (n,u,wk(indu(0))) call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(indz(0))) call subqr (coef,jcoef,wfac,jwfac,n,wk(indz(0)),u) wk(inddot(0)) = vdot (n,wk(indz(0)),wk(indz(0))) c c-------------------------begin iteration loop----------------------- c c determine whether or not to stop -- c 10 call inithv (1) nwpstp = nw - (iv2-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,wk(indu(in)),ubar,rhs,xxx,wk(indz(in)),u, a wk(iv2),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv2-1) if (level .ge. 2) call iterm (n,wk(indu(in))) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c proceed to calculate the iterates. c np = min0(mod(in,ns2)+1,ns1+1) call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) top = vdot (n,wk(indz(in+1-np)),wk(iv2)) sig = top / wk(inddot(in+1-np)) call vtriad (n,wk(indz(in+1)),wk(iv2),-sig,wk(indz(in+1-np)),1) call vtriad (n,wk(indu(in+1)),u,sig,wk(indu(in+1-np)),1) sigsum = sig ibegin = in - np + 2 iend = in if (ibegin .gt. iend) go to 613 do 612 i = ibegin,iend top = vdot (n,wk(indz(i)),wk(iv2)) den = wk(inddot(i)) if (abs(den) .lt. srelpr) go to 998 sig = top / den call vtriad (n,wk(indz(in+1)),wk(indz(in+1)),-sig,wk(indz(i)),1) call vtriad (n,wk(indu(in+1)),wk(indu(in+1)),sig,wk(indu(i)),1) 612 sigsum = sigsum + sig 613 continue if (abs(sigsum) .lt. srelpr) go to 998 vlamda = 1.0/sigsum call vtriad (n,wk(indz(in+1)),xxx,-vlamda,wk(indz(in+1)),2) call vtriad (n,wk(indu(in+1)),xxx,vlamda,wk(indu(in+1)),2) wk(inddot(in+1)) = vdot (n,wk(indz(in+1)),wk(indz(in+1))) c call subqr (coef,jcoef,wfac,jwfac,n,wk(indz(in+1)),u) c c proceed to next iteration c in = in + 1 go to 10 c c-----------------------------finish up---------------------------- c 900 call vcopy (n,wk(indu(in)),u) if (halt) go to 715 ier = 1 call ershow (ier,'oresw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' orthores converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 997 call ershow (ier,'oresw') go to 735 c 998 ier = -15 call ershow (ier,'oresw') go to 725 c 999 ier = -2 call ershow (ier,'oresw') go to 735 c end subroutine iomw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the (truncated) iom algorithm. the reference is c youcef saad, "krylov subspace methods ...", mathematics of c computation, vol. 37, july 1981, pp. 105f. c c in the symmetric case this algorithm reduces to the symmlq c algorithm of paige and saunders, except paige and saunders have c implemented a trick to avoid breakdown before convergence. this c trick is not implemented here. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer idotw, vect1, vect2, dots1, dots2, os logical uneed external suba, subql, subqr dimension iparm(30), rparm(30) dimension gdum(1), wkxxx(1) logical iql, iqr logical exact, gamize c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c next, the indexing functions. c indv1(i) = vect1 + mod(i,nv)*n indbe2(i) = ibeta2 + mod(i,os) indc(i) = icos + mod(i,os) inds(i) = isin + mod(i,os) indu(i) = iu + mod(i,os+1) indw(i) = iw + n*mod(i,os) c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 10 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 gamize = .true. if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' iom') c the following flag tells us whether the truncating actually c throws out important information. it should actually be set to c true if the matrix is symmetric. exact = .false. c c initialize the stopping test ... c call inithv (0) zdhav = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c ... associated integer variables. c os = iabs(ns1) iv = 1 nv = os idotw = 1 iw = 1 vect1 = iw + iv*n*os vect2 = vect1 dots1 = vect2 + iv*n*nv dots2 = dots1 ibeta1 = dots2 + idotw*os ibeta2 = ibeta1 icos = ibeta2 + os isin = icos + os iu = isin + os iv1 = iu + os+1 iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 uneed = rcalp .or. zcalp .or. ztcalp .or. udhav a .or. ntest .eq. 6 .or. level .ge. 3 c c--------------------------begin iteration loop--------------------- c c perform first-iterate calculations ... c 10 if (is .ne. 0) go to 100 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call pvec (n,nv,iv,1,os,idotw,is,1,1,wk(vect1),wk(dots1),0, a wk(ibeta1),gdum,gamize,wk(iv2),wkxxx,ier) gamma1 = gdum(1) if (ier .lt. 0) go to 997 gamma2 = gamma1 vnorm1 = 1.0/gamma1 vnorm2 = 1.0/gamma2 zdot = vnorm1**2 ucnp1= 0.0 c 100 call inithv (1) zdhav = .true. nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c c ... compute q(n+1), etc -- the direction vectors c call suba (coef,jcoef,wfac,jwfac,n,wk(indv1(is)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call pvec (n,nv,iv,1,os,idotw,is+1,1,1,wk(vect1),wk(dots1),0, a wk(ibeta1),gdum,gamize,wk(iv2),wkxxx,ier) gamma1 = gdum(1) if (ier .lt. 0) go to 997 gamma2 = gamma1 c c ... now record norms. c vn1old = vnorm1 vnorm1 = 1.0/gamma1 vn2old = vnorm2 vnorm2 = 1.0/gamma2 c c ... now update the factorization c ucnbar = ucnp1 ibgn = max0(0,is+1-os) do 1 i = ibgn,is 1 wk(indu(i+1)) = -wk(indbe2(i)) if (ibgn .gt. 0) wk(indu(ibgn))= 0.0 call qrupd (is+1,os+1,os,wk(icos),wk(isin),ucnbar,ucn,wk(iu), a vn2old,ier) if (ier .lt. 0) go to 998 ucnp1 = wk(indu(is+1)) c c ... update the old w vector. c if (is .ne. 0) a call vtriad (n,wk(indw(is-1)),xxx,ucnbar/ucn,wk(indw(is-1)),2) c c ... now generate the new w vector. c if (abs(ucnp1) .lt. srelpr) go to 998 call vcopy (n,wk(indv1(is)),wk(iv1)) ibgn = max0(1,is-os+1) iend = is if (iend .lt. ibgn) go to 200 do 201 i = ibgn,iend 201 call vtriad (n,wk(iv1),wk(iv1),-wk(indu(i)),wk(indw(i-1)),1) 200 continue call vtriad (n,wk(indw(is)),xxx,1.0/ucnp1,wk(iv1),2) if (is .ne. 0) go to 205 c c ... update iterate u(0). c zold= 0.0 zbar = vn1old if (uneed) call vtriad (n,u,u,zbar,wk(indw(0)),1) go to 210 c c ... update subsequent iterates u(n). c 205 zold = wk(indc(is))*zbar zbold = zbar zbar =-wk(inds(is))*zbar factor = zold if (uneed) factor = factor - zbold*ucn/ucnbar call vtriad (n,u,u,factor,wk(indw(is-1)),1) if (uneed) call vtriad (n,u,u,zbar,wk(indw(is)),1) c to avoid breakdown for the symmetric indefinite case, we'd only add c in w(is-1) here, i believe. 210 continue zdot = (zbar/ucnp1*vnorm1)**2 c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-----------------------------finish up------------------------------ c 900 if (.not. uneed) call vtriad (n,u,u,zbar,wk(indw(is-1)),1) if (halt) go to 715 ier = 1 call ershow (ier,'iomw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' iom converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'iomw') return c 996 call ershow (ier,'iomw') go to 735 c 997 ier = -13 call ershow (ier,'iomw') go to 725 c 998 ier = -14 call ershow (ier,'iomw') go to 725 c 999 ier = -2 call ershow (ier,'iomw') go to 735 c end subroutine gmresw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the truncated/restarted gmres algorithm. a detailed c description of this useful algorithm may be found in the paper, c "gmres: a generalized minimal residual algorithm for solving c nonsymmetric linear systems", youcef saad and martin h. schultz, c siam j. sci. stat. comput., v. 7, no. 3, july 1986. c c further scoop on how to set up qr factorizations can be obtained in c "practical use of some krylov subspace methods for solving c indefinite and unsymmetric linear systems", youcef saad, siam j. sci. c stat. comput., v. 5, no. 1, march 1984. c c the advantage of this algorithm over its competitors orthomin and gcr c is that work and storage are saved by avoiding the computation of c certain vectors. c c this routine now handles right and 2-sided preconditioning. the main c thing to note about this is that a new table of basis vecttors is now c necessary, to use to update the solution. c c this routine also avoids explicit scaling of the p and w vectors. c c for the pure restarted case, we actually compute the final arnoldi c vector, rather than just estimating its norm. this is a diversion c from the saad/schultz paper. this was done because in some cases it c was found that the norm estimation was subject to significant c numerical error. c c modified feb. 1990 to make the restarted method more efficient. c specifically, new formulas were installed for the scalar part of c the computation to give an optimal asymptotic dependence on ns2. c c---------------------------------------------------------------------- c dimension coef(*), jcoef(*), wfac(*), jwfac(*) dimension u(*), ubar(*), rhs(*), wk(*) logical uneed, zneed external suba, subql, subqr dimension iparm(30), rparm(30) logical iql, iqr logical trunc, exact, rstrt, rstrtd, zhvold logical havest, hadest, evadpt c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c-------------------------- indexing functions -------------------------- c c the following function accesses the arnoldi vectors. indp(i) = ip + mod(i,nv)*n c c the following accesses q-r times the arnoldi vectors indpt(i) = ipt + mod(i,nvt)*n c c fudge factor for the arnoldi vectors. p(actual) = p(stored)*pfudge. c (we do the same trick with a*p.) indpf(i) = ipf + mod(i,nv) c c the following accesses the w-vectors. indw(i) = iw + n*mod(i,nv) c c fudge factors for the w vectors ... c (similarly, the vector "xi" is fudged.) indwf(i) = iwf + mod(i,nv) c c the following accesses the hessenberg matrix -- stored by diagonals ... indhes(i,j) = ihess + (i-1) + (j-i+1)*nhess c c the following are the cosines and sines of the rotations. indc(i) = icos + mod(i,nrot) inds(i) = isin + mod(i,nrot) c c the following accesses the u matrix -- stored by columns ... indu(i,j) = iu + j-i+1 + mod(j-1,nuc)*nbwuh c c the following accesses the z-vector ... indzc(i) = izc + mod(i-1,nzc) c c----------------------- preliminary calculations ---------------------- c nwusd = 0 ier = 0 iacel = 11 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr.eq.1 .or. iqlr.eq.3 iqr = iqlr.eq.2 .or. iqlr.eq.3 iadpt = ns3 evadpt = (maxadd.or.minadd) .and. iadpt.ne.0 trunc = ns1 .lt. (ns2-1) exact = .not. trunc if (ns1 .lt. 2) go to 995 if (level .ge. 2) write (nout,496) 496 format (' gmres') c c--------------------------initialize the stopping test------------------ c call inithv (0) zdhav = .not. (trunc .and. .not.exact) nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c uneed tells us whether u must be computed explicitly per iteration. c similarly for zneed. uneed = rcalp .or. udhav a .or. ntest .eq. 6 .or. level .ge. 3 zneed = zcalp hadest = .false. c c------------------------associated integer variables--------------------- c c---effective ns2--- ns2e = min0(ns2,itmax) c---length of diags of hess matrix--- nhess = ns2e + 2 c---bandwidth of the hess matrix--- nbwh = min0(ns1+1,ns2e+1) c---bandwidth of u-or-h--- nbwuh = min0(ns1+2, ns2e+1) c---number columns stored of the u matrix--- if ( trunc) nuc = 1 if (.not.trunc) nuc = ns2e c---size of arnoldi-vector tables--- nv = min0(ns1,ns2e+1) nvt = nv if (iqr .and. .not.trunc) nvt = nv - 1 if (iqr .and. trunc) nvt = 1 c---number of givens rotations to store--- nrot = min0(ns1,ns2e) c---number of elts of z-vector to store--- if ( trunc) nzc = 2 if (.not.trunc) nzc = ns2e + 1 c c------------------------------memory layout--------------------------- c ihess = 1 ipt = ihess + nhess*nbwh if (.not.evadpt) ipt = ihess ip = ipt + n*nvt if (.not.iqr) ip = ipt izc = ip + n*nv icos = izc + nzc isin = icos + nrot iy = isin + nrot iu = iy + ns2e if (trunc .or. .not.uneed) iu = iy ipz = iu + nbwuh*nuc ipf = ipz + ns2e+1 if (trunc) ipf = ipz iz = ipf + nv iw = iz + n iwf = iw + n*nv ixi = iwf + nv iv1 = ixi + n if (.not. trunc) iv1 = iw iv2 = iv1 + n nwusd = max0(nwusd,iv2+n-1) c c --- check the memory usage --- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 rstrtd = .true. c c-------------------------------------------------------------------- c---------------------------begin iteration loop--------------------- c-------------------------------------------------------------------- c c handle first iteration after restart ... c 10 call inithv (1) zdhav = .not.(trunc.and..not.exact) .and. in.ne.0 if (.not. rstrtd) go to 100 c ---get resid--- if (.not. zhave) then if (iql) then call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iz)) else call suba (coef,jcoef,wfac,jwfac,n,u,wk(iz)) call vexopy (n,wk(iz),rhs,wk(iz),2) end if zhave = .true. end if c ---get resid norm--- if (.not. zdhav) then zdot = vdot (n,wk(iz),wk(iz)) zdhav = .true. end if if (zdot .lt. 0e0) go to 994 vnorm = sqrt(zdot) if (vnorm .lt. srelpr**2) go to 997 call vcopy (n,wk(iz),wk(indp(is))) wk(indpf(is)) = 1e0/vnorm wk(indzc(is+1)) = vnorm c c --- perform stopping test --- c 100 nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c rstrt tells us whether this is the last step before restarting ... rstrt = (is+1 .eq. ns2) if (evadpt .and. is.eq.0) call vfill (nhess*nbwh,wk(ihess),0e0) c c-----------------------compute the new arnoldi vector---------------- c c pn(is+1)*p(is+1) = a*p(is) + sum (i=0 to is) (beta(is+1,i)*p(i)), c c---get a times old vec--- if (iqr) call subqr (coef,jcoef,wfac,jwfac,n,wk(indp (is)), a wk(indpt(is))) if (iql) then call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(is)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) else call suba (coef,jcoef,wfac,jwfac,n,wk(indpt(is)),wk(iv2)) end if apf = wk(indpf(is)) c---compute arnoldi vector--- ibeg = max0(is+1-ns1,0) iend = is if (ibeg .gt. 0) wk(indu(ibeg,is+1)) = 0e0 pfnew = apf do 199 i = ibeg,iend h = vdot (n,wk(indp(i)),wk(iv2)) * wk(indpf(i))*apf wk(indu(i+1,is+1)) = h if (evadpt) wk(indhes(i+1,is+1)) = h if (i .eq. ibeg) call vtriad (n,wk(indp(is+1)),wk(iv2), a -h*wk(indpf(i))/pfnew,wk(indp(i)),1) if (i .ne. ibeg) call vtriad (n,wk(indp(is+1)),wk(indp(is+1)), a -h*wk(indpf(i))/pfnew,wk(indp(i)),1) 199 continue wk(indpf(is+1)) = pfnew c---get norm--- dot = vdot (n,wk(indp(is+1)),wk(indp(is+1))) * pfnew**2 vnorm = sqrt(dot) if (vnorm .lt. srelpr**2) go to 192 wk(indu(is+2,is+1)) = vnorm if (evadpt) wk(indhes(is+2,is+1)) = vnorm c---scale--- wk(indpf(is+1)) = wk(indpf(is+1))/vnorm if (abs(wk(indpf(is+1))).lt.srelpr .or. a abs(wk(indpf(is+1))).gt.1e0/srelpr) then call vtriad (n,wk(indp(is+1)),xxx,wk(indpf(is+1)), a wk(indp(is+1)),2) wk(indpf(is+1)) = 1e0 end if c c-----------------------update the qr factorization------------------- c 192 continue c---apply old rotations--- ibgn = max(0,is-ns1) iuold = indu(ibgn+1,is+1) do 7977 i = ibgn, is-1 iunew = indu(i+2,is+1) ut = wk(iuold) h = wk(iunew) ctmp = wk(indc(i+1)) stmp = wk(inds(i+1)) wk(iuold) = ctmp*ut + stmp*h wk(iunew) = -stmp*ut + ctmp*h 7977 iuold = iunew iunew = indu(is+2,is+1) c---calc new rotation--- v1 = wk(iuold) v2 = wk(iunew) denom = sqrt (v1**2 + v2**2) if (denom .lt. srelpr) go to 998 wk(indc(is+1)) = v1/denom wk(inds(is+1)) = v2/denom c---apply new rotation--- wk(iuold) = denom wk(iunew) = 0e0 c c--------------------------compute w, if needed------------------------ c uc = wk(indu(is+1,is+1)) if (abs(uc) .lt. srelpr**2) go to 998 if (.not.trunc) go to 572 c c ... case of explicit w calc ... c if (is .eq. 0) then call vcopy (n,wk(indpt(is)),wk(indw(1))) wk(indwf(is+1)) = wk(indpf(is))/uc c call vtriad (n,wk(indw(is+1)),xxx,1.0/uc,wk(indpt(is)),2) go to 572 end if wfnew = wk(indpf(is)) ibeg = max0(1,is+1-ns1) iend = is do 574 i = ibeg, iend if (i .eq. ibeg) call vtriad (n,wk(indw(is+1)),wk(indpt(is)), a -wk(indu(i,is+1))*wk(indwf(i))/wfnew,wk(indw(i)),1) if (i .ne. ibeg) call vtriad (n,wk(indw(is+1)),wk(indw(is+1)), a -wk(indu(i,is+1))*wk(indwf(i))/wfnew,wk(indw(i)),1) 574 continue wk(indwf(is+1)) = wfnew/uc if (abs(wk(indwf(is+1))).lt.srelpr .or. a abs(wk(indwf(is+1))).gt.1e0/srelpr) then call vtriad (n,wk(indw(is+1)),xxx,wk(indwf(is+1)), a wk(indw(is+1)),2) wk(indwf(is+1)) = 1e0 end if 572 continue c c---get new zc entries--- wk(indzc(is+2)) = -wk(inds(is+1))*wk(indzc(is+1)) wk(indzc(is+1)) = wk(indc(is+1))*wk(indzc(is+1)) c c------------------- u-vector computation section --------------------- c if (trunc) then c c---truncated case--- call vtriad (n,u,u,wk(indzc(is+1))*wk(indwf(is+1)), . wk(indw(is+1)),1) else c c---non-truncated case--- if (.not.(uneed .or. rstrt)) go to 410 iynew = iv1 nwusd = max0(nwusd,iynew+ns2e-1) if (nwusd .gt. nw) go to 999 c ---do back solve on u-matrix--- nm = is + 1 do 623 i = nm, 1, -1 sum = wk(indzc(i)) do 624 j = i+1, nm 624 sum = sum - wk(iynew-1+j)*wk(indu(i,j)) 623 wk(iynew-1+i) = sum/wk(indu(i,i)) c ---form iterate--- do 625 i = 0, nm-1 val = wk(iynew+i) if (uneed .and. i.ne.nm-1) val = val - wk(iy+i) 625 call vtriad (n,u,u,val*wk(indpf(i)),wk(indpt(i)),1) if (uneed) call vcopy (nm,wk(iynew),wk(iy)) endif 410 continue c c--------------------- residual computation section ------------------- c zhvold = zhave zhave = .false. if (trunc) go to 671 c c---non-truncated case--- c c do it if resid needed by pstop or if restarting. if (zneed .or. rstrt) then ipznew = iv1 nwusd = max0(nwusd,ipznew+ns2e) if (nwusd .gt. nw) go to 999 call vcopy (is+1,wk(izc),wk(ipznew)) wk(ipznew+is+1) = 0e0 c ---apply rotations--- do 644 i = is+1, 1, -1 v1 = wk(indc(i))*wk(ipznew+i-1) - wk(inds(i))*wk(ipznew+i) v2 = wk(inds(i))*wk(ipznew+i-1) + wk(indc(i))*wk(ipznew+i) wk(ipznew+i-1) = v1 644 wk(ipznew+i) = v2 c ---form resid--- do 645 i = 0, is+1 val = wk(ipznew+i) if (zhvold .and. i.ne.is+1) val = val - wk(ipz) 645 call vtriad (n,wk(iz),wk(iz),-val*wk(indpf(i)), a wk(indp(i)),1) call vcopy (is+2,wk(ipznew),wk(ipz)) zhave = .true. end if go to 425 c c---truncated case--- c c do it if pstop needs it or if we may restart later. 671 if ( zneed .or. (itmax.gt.ns2) ) then c ---update xi--- if (is .eq. 0) then call vcopy (n,wk(indp(is)),wk(ixi)) xif = wk(indpf(is)) else xif = xif*(-wk(inds(is))) call vtriad (n,wk(ixi),wk(ixi), a wk(indc(is))*wk(indpf(is))/xif,wk(indp(is)),1) end if if (abs(xif).lt.srelpr .or. a abs(xif).gt.1e0/srelpr) then call vtriad (n,wk(ixi),xxx,xif,wk(ixi),2) xif = 1e0 end if c ---form resid--- call vtriad (n,wk(iz),wk(iz), . -wk(indzc(is+1))*wk(indc(is+1))*xif,wk(ixi),1) call vtriad (n,wk(iz),wk(iz), . -wk(indzc(is+1))*wk(inds(is+1))*wk(indpf(is+1)), a wk(indp(is+1)),1) zhave = .true. endif 425 continue c c---get resid norm--- c if (exact) then zdot = wk(indzc(is+2))**2 end if c c--------------------------------ev est------------------------------- c if (evadpt) then nwhe = nw - (iv1-1) call hesest (wk(ihess),nhess,nv+2,is+1,iadpt,havest, a emaxnw,eminnw,wk(iv1),nwhe,ier) nwusd = max0(nwusd,iv1-1+nwhe) if (ier .ne. 0) go to 996 if (.not. havest) go to 874 if (hadest) go to 876 if (maxadd) emax = emaxnw if (minadd) emin = eminnw hadest = .true. go to 874 876 if (maxadd) emax = amax1 (emax,emaxnw) if (minadd) emin = amin1 (emin,eminnw) end if c c-------------------------finish up the iteration---------------------- c 874 in = in + 1 is = is + 1 if (rstrt) is = 0 rstrtd = rstrt go to 10 c c---------------------------------------------------------------------- c------------------------------wrap it up------------------------------ c---------------------------------------------------------------------- c c---form u, if not up-to-date--- c 900 if (uneed .or. rstrtd .or. trunc) go to 901 iynew = iv1 nwusd = max0(nwusd,iynew+ns2e-1) if (nwusd .gt. nw) go to 999 c ---do back solve on u-matrix--- nm = is do 663 i = nm, 1, -1 sum = wk(indzc(i)) do 664 j = i+1, nm 664 sum = sum - wk(iynew-1+j)*wk(indu(i,j)) 663 wk(iynew-1+i) = sum/wk(indu(i,i)) c ---form iterate--- do 665 i = 0, nm-1 val = wk(iynew+i) 665 call vtriad (n,u,u,val*wk(indpf(i)),wk(indpt(i)),1) c c---------------------------------------------------------------------- c-----------------------------head out of here------------------------- c---------------------------------------------------------------------- c 901 continue if (halt) go to 715 ier = 1 call ershow (ier,'gmresw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' gmres converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c----------------------------error returns----------------------------- c 994 ier = -15 call ershow (ier,'gmresw') return c 995 ier = -16 call ershow (ier,'gmresw') return c 996 call ershow (ier,'gmresw') go to 735 c 997 ier = -13 call ershow (ier,'gmresw') go to 725 c 998 ier = -14 call ershow (ier,'gmresw') go to 725 c 999 ier = -2 call ershow (ier,'gmresw') go to 735 c end subroutine uslqw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the usymlq algorithm. see: m. a. saunders, h. d. simon c and e. l. yip, "two conjugate-gradient-type methods for sparse c unsymmetric linear equations, report eta-tr-18, boeing computer c services, seattle, washington, 1984, to appear in siam journal on c numerical analysis. c c note -- this routine is still not quite optimal. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer vect1, vect2, os logical uneed external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c next, the indexing functions. c indv1(i) = vect1 + mod(i,nv)*n indv2(i) = vect2 + mod(i,nv)*n indbe1(i) = ibeta1 + mod(i,os) indbe2(i) = ibeta2 + mod(i,os) indc(i) = icos + mod(i,os) inds(i) = isin + mod(i,os) indu(i) = iu + mod(i,os+1) indw(i) = iw + n*mod(i,os) c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 12 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' usymlq') c c initialize the stopping test ... c call inithv (0) zdhav = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c ... associated integer variables. c os = 2 iv = 1 nv = os iw = 1 vect1 = iw + iv*n*os vect2 = vect1 + iv*n*nv ibeta1 = vect2 + iv*n*nv ibeta2 = ibeta1 + os icos = ibeta2 + os isin = icos + os iu = isin + os iv1 = iu + os + 1 iv2 = iv1 + n nwusd = max0(nwusd,iv2-1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 uneed = rcalp .or. zcalp .or. ztcalp .or. udhav a .or. ntest .eq. 6 .or. level .ge. 3 c c------------------------begin iteration loop----------------------- c c perform first-iterate calculations ... c 10 if (in .ne. 0) go to 100 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) vnorm1 = sqrt(vdot(n,wk(iv2),wk(iv2))) vnorm2 = vnorm1 if (abs(vnorm1) .lt. srelpr) go to 997 gamma1 = 1.0/vnorm1 gamma2 = 1.0/vnorm2 call vtriad (n,wk(indv1(0)),xxx,gamma1,wk(iv2),2) call vcopy (n,wk(indv1(0)),wk(indv2(0))) zdot = vnorm1**2 ucnp1= 0.0 c c determine whether or not to stop -- c 100 call inithv (1) zdhav = .true. nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c ... compute q(n+1), etc -- the direction vectors c call suba (coef,jcoef,wfac,jwfac,n,wk(indv1(in)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) an = vdot (n,wk(indv2(in)),wk(iv2)) if (in .ne. 0) go to 110 call vtriad (n,wk(indv2(in+1)),wk(iv2),-an,wk(indv2(in)), 1) wk(indbe2(in)) = -an go to 111 110 call vtriad (n,wk(indv2(in+1)),xxx,-vnorm1,wk(indv2(in-1)),2) call vtriad (n,wk(indv2(in+1)),wk(indv2(in+1)),1.0,wk(iv2),1) call vtriad (n,wk(indv2(in+1)),wk(indv2(in+1)),-an, a wk(indv2(in)),1) wk(indbe2(in)) = -an wk(indbe2(in-1)) = -vnorm1 111 vn2old = vnorm2 vnorm2 = sqrt(vdot (n,wk(indv2(in+1)),wk(indv2(in+1)))) if (abs(vnorm2) .lt. srelpr) go to 997 gamma2 = 1.0/vnorm2 call vtriad (n,wk(indv2(in+1)),xxx,gamma2,wk(indv2(in+1)),2) c call subqlt (coef,jcoef,wfac,jwfac,n,wk(indv2(in)),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) if (in .ne. 0) go to 810 call vtriad (n,wk(indv1(in+1)),wk(iv2),-an,wk(indv1(in)),1) wk(indbe1(in)) = -an go to 811 810 call vtriad (n,wk(indv1(in+1)),xxx,-vn2old,wk(indv1(in-1)),2) call vtriad (n,wk(indv1(in+1)),wk(indv1(in+1)),1.0,wk(iv2),1) call vtriad (n,wk(indv1(in+1)),wk(indv1(in+1)),-an,wk(indv1(in)), a 1) wk(indbe1(in)) = -an wk(indbe1(in-1)) = -vn2old 811 vn1old= vnorm1 vnorm1 = sqrt(vdot (n,wk(indv1(in+1)),wk(indv1(in+1)))) if (abs(vnorm1) .lt. srelpr) go to 997 gamma1 = 1.0/vnorm1 call vtriad (n,wk(indv1(in+1)),xxx,gamma1,wk(indv1(in+1)),2) c c ... now update the factorization ucnbar = ucnp1 ibgn = max0(0,in+1-os) do 1 i = ibgn,in 1 wk(indu(i+1)) = -wk(indbe2(i)) if (ibgn .gt. 0) wk(indu(ibgn))= 0.0 call qrupd (in+1,os+1,os,wk(icos),wk(isin),ucnbar,ucn,wk(iu), a vn2old,ier) if (ier .ne. 0) go to 998 ucnp1 = wk(indu(in+1)) c c ... update the old w vector. c if (in .ne. 0) a call vtriad (n,wk(indw(in-1)),xxx,ucnbar/ucn,wk(indw(in-1)),2) c c ... now generate the new w vector. c if (abs(ucnp1) .lt. srelpr) go to 998 call vcopy (n,wk(indv1(in)),wk(iv1)) ibgn = max0(1,in-os+1) iend = in if (iend .lt. ibgn) go to 200 do 201 i = ibgn,iend 201 call vtriad (n,wk(iv1),wk(iv1),-wk(indu(i)),wk(indw(i-1)),1) 200 continue call vtriad (n,wk(indw(in)),xxx,1.0/ucnp1,wk(iv1),2) if (in .ne. 0) go to 205 c c ... update iterate u(0). zold= 0.0 zbar = vn1old if (uneed) call vtriad (n,u,u,zbar,wk(indw(0)), 1) go to 210 c c ... update subsequent iterates u(n). c 205 zold = wk(indc(in))*zbar zbold = zbar zbar =-wk(inds(in))*zbar factor = zold if (uneed) factor = factor - zbold*ucn/ucnbar call vtriad (n,u,u,factor,wk(indw(in-1)), 1) if (uneed) call vtriad (n,u,u,zbar,wk(indw(in)), 1) 210 continue zdot = (zbar/ucnp1*vnorm1)**2 c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c------------------------------finish up------------------------------- c 900 if (.not. uneed) call vtriad (n,u,u,zbar,wk(indw(in-1)),1) if (halt) go to 715 ier = 1 call ershow (ier,'uslqw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' usymlq converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c c unimplemented option 995 ier = -16 call ershow (ier,'uslqw') return 996 call ershow (ier,'uslqw') go to 735 c 997 ier = -13 call ershow (ier,'uslqw') go to 725 c 998 ier = -14 call ershow (ier,'uslqw') go to 725 c 999 ier = -2 call ershow (ier,'uslqw') go to 735 c end subroutine usqrw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the usymqr algorithm. same reference as usymlq c algorithm. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer vect1, vect2, os external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c next, the indexing functions. c indv 1(i)= vect1 + mod(i,nv)*n indv2(i) = vect2 + mod(i,nv)*n indbe 1(i)= ibeta1 + mod(i,os) indbe2(i) = ibeta2 + mod(i,os) indc(i) = icos + mod(i,os+ 1) inds(i) = isin + mod(i,os+ 1) indu(i) = iu + mod(i,os+2) indw(i) = iw + n*mod(i,os) c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 13 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 996 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' usymqr') c c initialize the stopping test ... c call inithv (0) zdhav = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c ... associated integer variables. c os = 2 iv = 1 nv = os iw = 1 vect1 = iw + iv*n*os vect2 = vect1 + iv*n*nv ibeta1 = vect2 + iv*n*nv ibeta2 = ibeta1 + os icos = ibeta2 + os isin = icos + os+ 1 iu = isin + os+ 1 iv1 = iu + os+2 iv2 = iv1 + n nwusd = max0(nwusd,iv2- 1+n) c c check the memory usage -- c if (nwusd .gt. nw) go to 999 c c c now, perform first-iterate calculations in = 0 is = 0 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) vnorm1 = sqrt(vdot (n,wk(iv2),wk(iv2))) vnorm2 = vnorm1 if (abs(vnorm1) .lt. srelpr) go to 997 gamma1 = 1.0/vnorm1 gamma2 = 1.0/vnorm2 call vtriad (n,wk(indv1(0)),xxx,gamma1,wk(iv2),2) call vcopy (n,wk(indv1(0)),wk(indv2(0))) zdot = vnorm1**2 znext = vnorm1 c c------------------------begin iteration loop------------------------ c c determine whether or not to stop -- c 10 call inithv (1) zdhav = .true. nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c c ... compute q(n+1), etc -- the direction vectors call suba (coef,jcoef,wfac,jwfac,n,wk(indv1(in)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) an = vdot (n,wk(indv2(in)),wk(iv2)) if (in .ne. 0) go to 110 call vtriad (n,wk(indv2(in+1)),wk(iv2),-an,wk(indv2(in)),1) wk(indbe2(in)) = -an go to 111 110 call vtriad (n,wk(indv2(in+1)),xxx,-vnorm1,wk(indv2(in-1)),2) call vtriad (n,wk(indv2(in+1)),wk(indv2(in+1)),1.0,wk(iv2),1) call vtriad (n,wk(indv2(in+1)),wk(indv2(in+1)),-an,wk(indv2(in)), a 1) wk(indbe2(in)) = -an wk(indbe2(in-1)) = -vnorm1 111 vn2old = vnorm2 vnorm2 = sqrt(vdot (n,wk(indv2(in+1)),wk(indv2(in+1)))) if (abs(vnorm2) .lt. srelpr) go to 997 gamma2 = 1.0/vnorm2 call vtriad (n,wk(indv2(in+1)),xxx,gamma2,wk(indv2(in+1)),2) c call subqlt (coef,jcoef,wfac,jwfac,n,wk(indv2(in)),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) if (in .ne. 0) go to 810 call vtriad (n,wk(indv1(in+1)),wk(iv2),-an,wk(indv1(in)),1) wk(indbe1(in)) = -an go to 811 810 call vtriad (n,wk(indv1(in+1)),xxx,-vn2old,wk(indv1(in-1)),2) call vtriad (n,wk(indv1(in+1)),wk(indv1(in+1)),1.0,wk(iv2),1) call vtriad (n,wk(indv1(in+1)),wk(indv1(in+1)),-an,wk(indv1(in)), a 1) wk(indbe1(in)) = -an wk(indbe1(in-1)) = -vn2old 811 vnorm1 = sqrt(vdot (n,wk(indv1(in+1)),wk(indv1(in+1)))) if (abs(vnorm1) .lt. srelpr) go to 997 gamma1 = 1.0/vnorm1 call vtriad (n,wk(indv1(in+1)),xxx,gamma1,wk(indv1(in+1)),2) c c ... now update the factorization ibgn = max0(0,in+1-os) do 1 i = ibgn,in 1 wk(indu(i+1)) = -wk(indbe2(i)) if (ibgn .gt. 0) wk(indu(ibgn))= 0.0 wk(indu(in+2)) = vnorm2 call qrupd (in+2,os+2,os+1,wk(icos),wk(isin),wk(indu(in+1)),x, a wk(iu),vnorm2,ier) if (ier .lt. 0) go to 998 c c ... now generate the new w vector. uc = wk(indu(in+1)) if (abs(uc) .lt. srelpr) go to 998 call vcopy (n,wk(indv1(in)),wk(iv1)) ibgn = max0(1,in-os+1) iend = in if (iend .lt. ibgn) go to 200 do 201 i = ibgn,iend 201 call vtriad (n,wk(iv1),wk(iv1),-wk(indu(i)),wk(indw(i-1)),1) 200 continue call vtriad (n,wk(indw(in)),xxx,1.0/uc,wk(iv1),2) c c ... update iterates u(n). z = wk(indc(in+1))*znext znext = -wk(inds(in+1))*znext call vtriad (n,u,u,z,wk(indw(in)),1) zdot = znext**2 c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-----------------------------finish up---------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'usqrw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' usymqr converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'usqrw') return c 996 call ershow (ier,'usqrw') go to 735 c 997 ier = -13 call ershow (ier,'usqrw') go to 725 c 998 ier = -14 call ershow (ier,'usqrw') go to 725 c 999 ier = -2 call ershow (ier,'usqrw') go to 735 c end subroutine ldirw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the lanczos/orthodir algorithm. see jea and young, in c linear algebra and its applications, vol 52/3, 1983, pp399f. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c indexing functions. c indq(i) = iq + n*mod(i,2) indqt(i) = iqt + n*mod(i,2) c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 14 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' landir') c c initialize the stopping test ... c call inithv (0) zhave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c iq = 1 iqt = iq + 2*n ir = iqt + 2*n iv1 = ir + n iv2 = iv1 + n iv3 = iv2 + n nwusd = max0(nwusd,iv3-1+n) c c check the memory usage. c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(ir)) c c begin iteration loop c c determine whether or not to stop. c 10 call inithv (1) nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(ir),xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c if (in .ne. 0) go to 110 c c perform first-iterate calculations c call vcopy (n,wk(ir),wk(indq(in))) call vcopy (n,wk(indq(in)),wk(indqt(in))) qaq= 0.0 go to 115 c c proceed to calculate the direction vectors, for in .gt. 0. c 110 call subqlt (coef,jcoef,wfac,jwfac,n,wk(indqt(in-1)),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv3)) aqaq = vdot(n,wk(iv2),wk(iv3)) an = aqaq / qaq if (in .ne. 1) go to 150 call vtriad (n,wk(indq(in)),wk(iv2),-an,wk(indq(in-1)),1) call vtriad (n,wk(indqt(in)),wk(iv3),-an,wk(indqt(in-1)),1) go to 115 150 bn = qaq / qaqold call vtriad (n,wk(indq(in)),wk(iv2),-bn,wk(indq(in-2)),1) call vtriad (n,wk(indq(in)),wk(indq(in)),-an,wk(indq(in-1)),1) call vtriad (n,wk(indqt(in)),wk(iv3),-bn,wk(indqt(in-2)),1) call vtriad (n,wk(indqt(in)),wk(indqt(in)),-an,wk(indqt(in-1)),1) c c proceed to form the iterate. c 115 call suba (coef,jcoef,wfac,jwfac,n,wk(indq(in)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) qaqold = qaq qaq = vdot(n,wk(indqt(in)),wk(iv2)) if (abs(qaq) .lt. srelpr) go to 998 qr = vdot(n,wk(indqt(in)),wk(ir)) vlamda = qr / qaq call vtriad (n,u,u,vlamda,wk(indq(in)),1) call vtriad (n,wk(ir),wk(ir),-vlamda,wk(iv2),1) c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c-----------------------------finish up----------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'ldirw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' lanczos/orthodir converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'ldirw') return c 997 call ershow (ier,'ldirw') go to 735 c 998 ier = -15 call ershow (ier,'ldirw') go to 725 c 999 ier = -2 call ershow (ier,'ldirw') go to 735 c end subroutine lminw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the lanczos/orthomin algorithm. c here, zhat and phat will refer to the "dummy" system of the c lanczos method. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) integer vect1, vect2, os external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c nwusd = 0 ier = 0 iacel = 15 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 if (level .ge. 2) write (nout,496) 496 format (' lanmin') c c initialize the stopping test ... c iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 call inithv (0) zhave = .true. zthave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c allocate memory -- overlap wherever possible ... ip = 1 ipt = ip + n if (.not. iqr) ipt = ip iphat = ipt + n iz = iphat + n izt = iz + n if (.not. iqr) izt = iz izhat = izt + n iv1 = izhat + n iv2 = iv1 + n if (iqlr .eq. 0) nwusd = max0(nwusd,iv1-1+n) if (iqlr .ne. 0) nwusd = max0(nwusd,iv2-1+n) c c check the memory usage. c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 if (.not. iql) go to 121 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iz)) go to 122 121 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iz)) call vexopy (n,wk(iz),rhs,wk(iz),2) 122 if (iqr) call subqr (coef,jcoef,wfac,jwfac,n,wk(iz),wk(izt)) c c============================begin iteration loop====================== c c determine whether or not to stop. c 10 call inithv (1) nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),wk(izt), a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c if (in .ne. 0) go to 110 c c perform first-iterate calculations c call vcopy (n,wk(iz),wk(izhat)) rd = vdot (n,wk(iz),wk(izhat)) call vcopy (n,wk(iz),wk(ip)) call vcopy (n,wk(izhat),wk(iphat)) if (iqr) call vcopy (n,wk(izt),wk(ipt)) go to 111 c c perform subsequent-iterate calculations c 110 rdold = rd c if (abs(rdold) .lt. srelpr) go to 996 if (abs(rdold) .eq. 0e0) go to 996 c c form the old zhat ... go to (131,132,133,134), iqlr + 1 131 call subat (coef,jcoef,wfac,jwfac,n,wk(iphat),wk(iv1)) go to 135 132 call subqlt (coef,jcoef,wfac,jwfac,n,wk(iphat),wk(iv2)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iv1)) go to 135 133 call subat (coef,jcoef,wfac,jwfac,n,wk(iphat),wk(iv2)) call subqrt (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iv1)) go to 135 134 call subqlt (coef,jcoef,wfac,jwfac,n,wk(iphat),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call subqrt (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iv1)) 135 call vtriad (n,wk(izhat),wk(izhat),-vlamda,wk(iv1),1) c c form the direction vectors ... rd = vdot (n,wk(iz),wk(izhat)) an = rd/rdold call vtriad (n,wk(ip),wk(iz),an,wk(ip),1) call vtriad (n,wk(iphat),wk(izhat),an,wk(iphat),1) if (iqr) call vtriad (n,wk(ipt),wk(izt),an,wk(ipt),1) c c============================form the iterate======================== c 111 if (iql) go to 141 call suba (coef,jcoef,wfac,jwfac,n,wk(ipt),wk(iv1)) go to 142 141 call suba (coef,jcoef,wfac,jwfac,n,wk(ipt),wk(iv2)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iv1)) 142 pap = vdot (n,wk(iphat),wk(iv1)) c if (abs(pap) .lt. srelpr**2) go to 998 if (abs(pap) .eq. 0e0) go to 998 vlamda = rd/pap c c call vtriad (n,u,u,vlamda,wk(ipt),1) call vtriad (n,wk(iz),wk(iz),-vlamda,wk(iv1),1) if (.not. iqr) go to 151 call subqr (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iv2)) call vtriad (n,wk(izt),wk(izt),-vlamda,wk(iv2),1) c c proceed to next iteration c 151 in = in + 1 is = is + 1 go to 10 c c------------------------------finish up----------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'lminw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' lanczos/orthomin converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 996 ier = -13 call ershow (ier,'lminw') go to 725 c 997 call ershow (ier,'lminw') go to 735 c 998 ier = -15 call ershow (ier,'lminw') go to 725 c 999 ier = -2 call ershow (ier,'lminw') go to 735 c end subroutine lresw (suba,subat,subql,subqlt,subqr,subqrt, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the lanczos/orthores algorithm. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subat, subql, subqlt, subqr, subqrt dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c c indexing functions. c indu(i) = iu + n*mod(i,nv) indr(i) = ir + n*mod(i,nv) indrt(i) = irt + n*mod(i,nv) c c preliminary calculations. c nwusd = 0 ier = 0 iacel = 16 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 if (level .ge. 2) write (nout,496) 496 format (' lanres') c c initialize the stopping test ... c call inithv (0) zhave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c nv = 2 iu = 1 ir = iu + nv*n irt = ir + nv*n iv1 = irt + nv*n nwusd = max0(nwusd,iv1-1+n) c c check the memory usage. c if (nwusd .gt. nw) go to 999 c c note -- we will use the vector 'u' for scratch storage, to save space. c call vcopy (n,u,wk(indu(0))) in = 0 is = 0 call suba (coef,jcoef,wfac,jwfac,n,wk(indu(in)),wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(indr(in))) call vcopy (n,wk(indr(in)),wk(indrt(in))) c c--------------------------begin iteration loop----------------------- c c determine whether or not to stop. c 10 call inithv (1) nwpstp = nw - (iv1-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,wk(indu(in)),ubar,rhs,xxx,wk(indr(in)),xxx, a wk(iv1),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv1-1) if (level .ge. 2) call iterm (n,wk(indu(in))) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c c proceed to calculate the parameters. c first, gamma. c rd = vdot (n,wk(indr(in)),wk(indrt(in))) call suba (coef,jcoef,wfac,jwfac,n,wk(indr(in)),wk(iv1)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),u) rar = vdot (n,u,wk(indrt(in))) if (abs(rar) .lt. srelpr) go to 998 gam = rd / rar c c now, rho. c if (in .ne. 0) go to 118 rho = 1.0 go to 119 118 if (abs(gamold) .lt. srelpr) go to 998 if (abs(rdold) .lt. srelpr) go to 998 if (abs(rho) .lt. srelpr) go to 998 rhoinv = 1.0 - (gam/gamold)*(rd/rdold)/rho if (abs(rhoinv) .lt. srelpr) go to 998 rho = 1.0 / rhoinv c c now work on updating u, r, rt. c first, the first iteration. c 119 if (in .ne. 0) go to 150 call vtriad (n,wk(indu(in+1)),wk(indu(in)),gam,wk(indr(in)),1) call vtriad (n,wk(indu(in+1)),xxx,rho,wk(indu(in+1)),2) call vtriad (n,wk(indr(in+1)),wk(indr(in)),-gam,u,1) call vtriad (n,wk(indr(in+1)),xxx,rho,wk(indr(in+1)),2) call subqlt (coef,jcoef,wfac,jwfac,n,wk(indrt(in)),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),u) call vtriad (n,wk(indrt(in+1)),wk(indrt(in)),-gam,u,1) call vtriad (n,wk(indrt(in+1)),xxx,rho,wk(indrt(in+1)),2) go to 151 c c now work on subsequent iterations. c 150 call vtriad (n,wk(indu(in+1)),xxx,1.0-rho,wk(indu(in-1)),2) call vtriad (n,wk(indu(in+1)),wk(indu(in+1)),rho,wk(indu(in)),1) call vtriad (n,wk(indu(in+1)),wk(indu(in+1)),rho*gam, a wk(indr(in)),1) call vtriad (n,wk(indr(in+1)),xxx,1.0-rho,wk(indr(in-1)),2) call vtriad (n,wk(indr(in+1)),wk(indr(in+1)),rho,wk(indr(in)),1) call vtriad (n,wk(indr(in+1)),wk(indr(in+1)),-rho*gam,u,1) call subqlt (coef,jcoef,wfac,jwfac,n,wk(indrt(in)),wk(iv1)) call subat (coef,jcoef,wfac,jwfac,n,wk(iv1),u) call vtriad (n,wk(indrt(in+1)),xxx,1.0-rho,wk(indrt(in-1)), a 2) call vtriad (n,wk(indrt(in+1)),wk(indrt(in+1)),rho,wk(indrt(in)), a 1) call vtriad (n,wk(indrt(in+1)),wk(indrt(in+1)),-rho*gam,u,1) c c proceed to next iteration c 151 gamold = gam rdold = rd in = in + 1 is = is + 1 go to 10 c c-------------------------------finish up---------------------------- c 900 call vcopy (n,wk(indu(in)),u) if (halt) go to 715 ier = 1 call ershow (ier,'lresw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' lanczos/orthores converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'lresw') return c 997 call ershow (ier,'lresw') go to 735 c 998 ier = -15 call ershow (ier,'lresw') go to 725 c 999 ier = -2 call ershow (ier,'lresw') go to 735 c end subroutine bcgsw (suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,wk,nw,iparm,rparm,ier) c c code to run the biconjugate-gradient-squared algorithm. c the algorithm is taken from "preconditioned biconjugate gradient c methods for numerical reservoir simulation", by p. joly and r. c eymard, to appear in journal of computational physics. the original c reference is p. sonneveld, "cgs, a fast lanczos-type solver for c unsymmetric linear systems," report 84-16, delft university of c technology, dept. of mathematics and informatics. c dimension u(1), ubar(1), rhs(1), wk(1), coef(1), jcoef(2), a wfac(1), jwfac(1) external suba, subql, subqr dimension iparm(30), rparm(30) logical iql, iqr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c nwusd = 0 ier = 0 iacel = 15 t1 = timer (dummy) call echall (n,iparm,rparm,1,2,ier) if (ier .lt. 0) go to 997 if (level .ge. 2) write (nout,496) 496 format (' bcgs') iql = iqlr .eq. 1 .or. iqlr .eq. 3 iqr = iqlr .eq. 2 .or. iqlr .eq. 3 if (iqr) go to 995 c c initialize the stopping test ... c call inithv (0) zhave = .true. nwpstp = nw call pstop (0,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,xxx,xxx, a wk,nwpstp,ier) nwusd = max0(nwusd,nwpstp) if (ier .lt. 0) go to 730 c c allocate memory -- overlap wherever possible ... ir0 = 1 ip = ir0 + n ipt = ip + n if (.not. iqr) ipt = ip iq = ipt + n iz = iq + n izt = iz + n if (.not. iqr) izt = iz iv1 = izt + n iv2 = iv1 + n iv3 = iv2 + n nwusd = max0(nwusd,iv3-1+n) ipaaq = iv1 ippaaq = iv2 c c check the memory usage. c if (nwusd .gt. nw) go to 999 c in = 0 is = 0 if (.not. iql) go to 121 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iv1)) call vexopy (n,wk(iv1),rhs,wk(iv1),2) call subql (coef,jcoef,wfac,jwfac,n,wk(iv1),wk(iz)) go to 122 121 call suba (coef,jcoef,wfac,jwfac,n,u,wk(iz)) call vexopy (n,wk(iz),rhs,wk(iz),2) 122 if (iqr) call subqr (coef,jcoef,wfac,jwfac,n,wk(iz),wk(izt)) c c=======================begin iteration loop======================= c c determine whether or not to stop. c 10 call inithv (1) nwpstp = nw - (iv2-1) call pstop (1,suba,subql,subqr,coef,jcoef, a wfac,jwfac,n,u,ubar,rhs,xxx,wk(iz),wk(izt), a wk(iv2),nwpstp,ier) nwusd = max0(nwusd,nwpstp+iv2-1) if (level .ge. 2) call iterm (n,u) if (halt .or. in .ge. itmax .or. ier .lt. 0) go to 900 c if (in .ne. 0) go to 110 c c perform first-iterate calculations c call vcopy (n,wk(iz),wk(ir0)) call vcopy (n,wk(iz),wk(ip)) call vcopy (n,wk(iz),wk(iq)) r0r = vdot (n,wk(iz),wk(ir0)) go to 111 c c perform subsequent-iterate calculations c 110 r0rold = r0r r0r = vdot (n,wk(ir0),wk(iz)) if (abs(r0rold) .lt. srelpr**2) go to 996 beta = r0r/r0rold c c form direction vectors ... c call vtriad (n,wk(ip),wk(iz),beta,wk(ipaaq),1) call vtriad (n,wk(iv2),wk(ipaaq),beta,wk(iq),1) call vtriad (n,wk(iq),wk(ip),beta,wk(iv2),1) c c==========================form the iterate========================== c c at this point we have the vectors p and q and the new dot(r,r0) ... c now form aq ... c 111 iaq = iv1 if (.not.iql) then call suba (coef,jcoef,wfac,jwfac,n,wk(iq),wk(iaq)) else call suba (coef,jcoef,wfac,jwfac,n,wk(iq),wk(iv2)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv2),wk(iaq)) end if c dot(r0,aq) ... r0aq = vdot (n,wk(ir0),wk(iaq)) if (abs(r0aq) .lt. srelpr**2) go to 998 alpha = r0r / r0aq c p-alpha*aq, p+p-alpha*aq ... call vtriad (n,wk(ipaaq), wk(ip),-alpha,wk(iaq),1) call vexopy (n,wk(ippaaq),wk(ip),wk(ipaaq),1) c c---get u--- call vtriad (n,u,u,alpha,wk(ippaaq),1) c c---get resid--- if (.not.iql) then call suba (coef,jcoef,wfac,jwfac,n,wk(ippaaq),wk(iv3)) call vtriad (n,wk(iz),wk(iz),-alpha,wk(iv3),1) else call suba (coef,jcoef,wfac,jwfac,n,wk(ippaaq),wk(iv3)) call subql (coef,jcoef,wfac,jwfac,n,wk(iv3),wk(iv2)) call vtriad (n,wk(iz),wk(iz),-alpha,wk(iv2),1) end if c c proceed to next iteration c in = in + 1 is = is + 1 go to 10 c c----------------------------------finish up---------------------------------- c 900 if (halt) go to 715 ier = 1 call ershow (ier,'bcgsw') zeta = stptst go to 725 715 continue if (level .ge. 1) write (nout,720) in 720 format (/' bcgs converged in ',i5,' iterations.') c 725 continue if (idgts .lt. 0) go to 730 call perror (suba,coef,jcoef,wfac,jwfac,n,u,rhs,wk,digit1, a digit2,idgts) 730 t2 = timer (dummy) timit = t2 - t1 iparm(2) = in rparm(1) = zeta rparm(2) = emax rparm(3) = emin rparm(6) = timit rparm(7) = digit1 rparm(8) = digit2 735 continue if (level .ge. 3) call echall (n,iparm,rparm,2,2,ier) nw = nwusd return c c error returns c 995 ier = -16 call ershow (ier,'bcgsw') go to 725 c 996 ier = -13 call ershow (ier,'bcgsw') go to 725 c 997 call ershow (ier,'bcgsw') go to 735 c 998 ier = -15 call ershow (ier,'bcgsw') go to 725 c 999 ier = -2 call ershow (ier,'bcgsw') go to 735 c end subroutine nullpl (coef,jcoef,wk,iwk,n,subql,suba,subqr,u,v) c c routine to just apply the left preconditioner ... c dimension u(1), v(1), coef(1), jcoef(2), wk(1), iwk(1) external subql, suba, subqr c call subql (coef,jcoef,wk,iwk,n,u,v) return end subroutine nullpr (coef,jcoef,wk,iwk,n,subql,suba,subqr,u,v) c c routine to just apply the right preconditioner ... c dimension u(1), v(1), coef(1), jcoef(2), wk(1), iwk(1) external subql, suba, subqr c call subqr (coef,jcoef,wk,iwk,n,u,v) return end subroutine cgcrpr (coef,jcoef,wk,iwk,n,subql,suba,subqr,u,v) c c right preconditioner routine to use with cgcr method. c dimension u(1), v(1), coef(1), jcoef(2), wk(1), iwk(1) c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d c common / cwkcon / lenr, irpnt, irmax, leni, iipnt, iimax common / ccgcr / nblk, nband, ictac, ieta, ivcgcr c external subql, suba, subqr c c could bypass next line if subqr is just a copy ... call subqr (coef,jcoef,wk,iwk,n,u,v) call suba (coef,jcoef,wk,iwk,n,v,wk(ivcgcr)) call tmult (n,nblk,nband,wk(ictac),wk(ieta),wk(ivcgcr), a wk(ivcgcr)) call vexopy (n,v,v,wk(ivcgcr),2) c return end subroutine getblk (coef,jcoef,n,nblk,nband,ctac,nw,ier) c c this utility routine for the cgcr algorithm computes the matrix c (c**t)*a*c and factors it. here, each column of c is zero c everywhere except it is all 1's on one of its blocks. c dimension ctac(nblk,1), coef(1), jcoef(2) logical symm c common / itcom4 / keygs, srelpr, keyzer common / itcom6 / method, iscale, iperm, nstore, a ifact, kblsz ,lvfill, ltrunc, ndeg, a ipropa, isymm, ifctv c c ... data common blocks c common / dscons / ndim, mdim, maxnz c c nband = 0 c c*************************** find the bandwidth ********************** c c idmin = 0 idmax = 0 do 10 i=1,maxnz idiag = jcoef(i) idmin = min0 (idmin,idiag) 10 idmax = max0 (idmax,idiag) if (nstore .eq. 2) idmin = - idmax ihalf = max0(-idmin,idmax) nbsiz = n / nblk nhband = (ihalf+nbsiz-1)/nbsiz nband = 1 + 2*nhband c c******************************************************************* c c now form the matrix. basically what we need to do here is to c add up all the elements in each block of the a-matrix. c if (nblk*nband .gt. nw) go to 999 nw = nblk*nband c call vfill (nblk*nband,ctac,0e0) c c loop over the diagonals ... do 1 i=1,maxnz idiag = jcoef(i) ibeg = max0 (1,1-idiag) iend = min0 (n,n-idiag) ibbeg = 1 + (ibeg-1)/nbsiz ibend = 1 + (iend-1)/nbsiz ibase = (i-1)*ndim c symm = nstore.eq.2 .and. idiag.ne.0 idm1 = idiag - 1 iomid = -idm1 nmid = n - idiag nhbp1 = nhband + 1 c loop over the rows of ctac ... do 2 j=ibbeg,ibend ibeg = max0(1+(j-1)*nbsiz,iomid) iend = min0(j*nbsiz,nmid) c ic1 = (ibeg+idiag-1)/nbsiz + 1 c ic2 = (iend+idiag-1)/nbsiz + 1 c id1 = ic1 - j + nhband + 1 c id2 = ic2 - j + nhband + 1 itemp1 = (ibeg+idm1)/nbsiz itemp2 = (iend+idm1)/nbsiz id1 = itemp1 + 2 - j + nhband id2 = itemp2 + 2 - j + nhband j1s = j + id1 - nhbp1 j2s = j + id2 - nhbp1 id1s = nband - id1 + 1 id2s = nband - id2 + 1 if (id1 .ne. id2) go to 3 c ctac(j,id1) = ctac(j,id1) c a + vadd(iend-ibeg+1,coef(ibase+ibeg)) do 41 ii=ibeg,iend if (symm) ctac(j1s,id1s) = ctac(j1s,id1s) + coef(ibase+ii) 41 ctac(j,id1) = ctac(j,id1) + coef(ibase+ii) go to 2 c3 imid = 1 + (ic2-1)*nbsiz - idiag 3 imid = iomid + itemp2*nbsiz c ctac(j,id1) = ctac(j,id1) c a + vadd(imid-ibeg ,coef(ibase+ibeg)) do 42 ii=ibeg,imid-1 if (symm) ctac(j1s,id1s) = ctac(j1s,id1s) + coef(ibase+ii) 42 ctac(j,id1) = ctac(j,id1) + coef(ibase+ii) c ctac(j,id2) = ctac(j,id2) c a + vadd(iend-imid+1,coef(ibase+imid)) do 43 ii=imid,iend if (symm) ctac(j2s,id2s) = ctac(j2s,id2s) + coef(ibase+ii) 43 ctac(j,id2) = ctac(j,id2) + coef(ibase+ii) c 2 continue c 1 continue c c**************************** do lu factorization ******************** c do 31 i=1,nblk-1 denom = ctac(i,nhbp1) if (abs(denom) .lt. srelpr) go to 998 xpivot = 1e0 / denom nsubmt = min0(nhband,nblk-i) do 30 j=1,nsubmt ipj = i + j ind2 = nhbp1 - j do 30 k=1,nsubmt c30 ctac(i+j,nhband-j+1+k) = ctac(i+j,nhband-j+1+k) c a - xpivot*ctac(i+j,nhband-j+1)*ctac(i,nhband+1+k) ind = nhbp1 - j + k 30 ctac(ipj,ind) = ctac(ipj,ind) a - xpivot*ctac(ipj,ind2)*ctac(i,nhbp1+k) do 32 j=1,nsubmt ipj = i + j ind1 = nhbp1 - j ind2 = nhbp1 + j c ctac(i+j,nhband+1-j) = ctac(i+j,nhband+1-j)*xpivot c32 ctac(i ,nhband+1+j) = ctac(i ,nhband+1+j)*xpivot ctac(ipj,ind1) = ctac(ipj,ind1)*xpivot 32 ctac(i ,ind2) = ctac(i ,ind2)*xpivot 31 continue return c c c error returns c c breakdown ... 998 ier = -6 call ershow (ier,'getblk') return c c insuff. memory ... 999 ier = -2 call ershow (ier,'getblk') nw = nblk*nband return end subroutine tmult (n,nblk,nband,ctac,eta,u,v) c c this utility routine for the cgcr algorithm computes the product of c the t-matrix with a vector. here, t = c*((c**t)*a*c)**(-1) * c**t, c a projection. c dimension ctac(nblk,nband), eta(1), u(1), v(1) c nbsiz = n / nblk nhband = (nband-1)/2 nhbp1 = nhband + 1 c c form the eta vector - aggregation step. c do 1 i=0,nblk-1 c1 eta(i) = vadd (nbsiz,u(1+i*nbsiz)) ip1 = i + 1 eta(ip1) = 0e0 do 1 j=1,nbsiz 1 eta(ip1) = eta(ip1) + u(i*nbsiz+j) c c perform the forward solve. c if (nhband .eq. 0) go to 40 do 2 irow=2,nblk ibeg = max0 (1,irow-nhband) iend = irow - 1 ind = nhbp1 - irow do 3 icol = ibeg,iend 3 eta(irow) = eta(irow) - eta(icol)*ctac(irow,ind+icol) 2 continue c c perform the diagonal solve. c 40 do 4 i=1,nblk 4 eta(i) = eta(i) / ctac(i,nhbp1) c c perform the back solve. c if (nhband .eq. 0) go to 41 do 5 i=1,nblk-1 irow = nblk - i ibeg = irow + 1 iend = min0 (irow+nhband,nblk) ind = nhbp1 - irow do 6 icol = ibeg,iend 6 eta(irow) = eta(irow) - eta(icol)*ctac(irow,ind+icol) 5 continue c c form the vector t*u - disaggregation step. c 41 do 7 i=0,nblk-1 val = eta(i+1) c7 call vfill (nbsiz,v(1+i*nbsiz),eta(i+1)) do 7 j=1,nbsiz 7 v(i*nbsiz+j) = val c return end function vadd (n,v) c c adds up all elements of a vector c dimension v(1) c sum = 0e0 do 1 i=1,n 1 sum = sum + v(i) vadd = sum return end subroutine hesest (hess,nhess,nd,esize,imode,havest, a emax,emin,wk,nw,ier) c c routine to calculate the moduli of the extremal eigenvalues of a c banded hessenberg matrix. c c hess - the hessenberg matrix, stored by diagonals c nhess, nd - dimensions of array hess c esize - indicator of how many rows/cols of hess have been c filled out so far c imode - style of eigenvalue estimation: c abs(imode) - use this size of principal submatrix to do estimate c sign(imode) - use either leading or trailing principal submatrix c c dimension hess(nhess,nd), wk(1) logical havest integer esize c havest = .false. if (imode .gt. 0 .and. esize .gt. imode) return c c memory allocation c ndim = min0(esize,iabs(imode)) if (ndim .le. 0) return imat = 1 ireal = imat + ndim*ndim iimag = ireal + ndim c nwusd = iimag - 1 + ndim if (nwusd .gt. nw) go to 999 nw = nwusd c c make the hess matrix into a full matrix c if (imode .lt. 0) go to 1 ibeg = 1 iend = esize go to 2 1 ibeg = max0 (1,esize-iabs(imode)+1) iend = esize 2 call vfill (ndim*ndim,wk(imat),0e0) do 3 i=ibeg,iend jbeg = max0 (ibeg,i-1) jend = min0 (ibeg-1+ndim,i+nd-2) do 3 j=jbeg,jend 3 wk(imat+(i-ibeg)+(j-ibeg)*ndim) = hess(i,j-i+2) c c call to eispack to calculate eigenvalues c ierr = 0 call hqr (ndim,ndim,1,ndim,wk(imat),wk(ireal),wk(iimag),ierr) if (ierr .ne. 0) go to 998 c c find eigenvalues with largest and smallest modulus c emax = wk(ireal)**2 + wk(iimag)**2 emin = emax if (ndim .eq. 1) go to 5 do 6 i=2,ndim vmod = wk(ireal-1+i)**2 + wk(iimag-1+i)**2 emax = amax1 (emax,vmod) 6 emin = amin1 (emin,vmod) c 5 emax = sqrt (emax) emin = sqrt (emin) havest = .true. return c c c error returns ... c c error in call to eispack 998 ier = -18 call ershow (ier,'hesest') return c c insuff. real workspace 999 ier = -2 nw = nwusd call ershow (ier,'hesest') return end subroutine pvec (n,nv,iv,s,s1,idotw,it,il,ir,vect, a dots,ndc,betas,gamma,gamize,svec,wk,ier) c c this routine performs generalized gram-schmidt on a collection c of vectors. c it is used to update the table of direction vectors for c generalized conjugate gradient methods per-iteration. c note that this routine was intended to be rather general, c including block conjugate gradient methods. c c params -- c n - size of the vectors c nv - the size of the p-vector table. c ie., the table contains p(it-1), p(it-2),...,p(it-nv). c iv - number of p-vector-like objects we are dragging along. c eg., if iv=3, then we may be computing p, ap and q(inv)ap. c s - the block size for block conjugate gradient methods. c s1 - indicates how many of the old p-vectors are to be used to c orthogonalize the new p-vector. c idotw - indicates the bandwidth of the matrix used to calculate c the betas. c generally equals s1, but if the h-matrix is symmetric c then = 1. c it - iteration number. this routine calculates p(it). c il,ir - integers between 1 and iv. indicate whether p, ap or c q(inv)ap c is to be used to calculate the inner product for c orthogonality. c vect - the p-vector table. c dots - workspace for the dot products. c ndc - the number of dot products that have already been c computed by formit. c betas - workspace for the betas. c gamma - an s by s matrix containing the coefficients from applying c gram schmidt to p(it). c gamize - flag to indicate whether gram schmidt is to be applied c after p(it) is calculated. c svec - input packet of vectors to the p-vector calculation c process. c wk - workspace. must be of size s. c ier - error code c c array structure and indexing functions -- c c vect(n,s,nv,iv) jv c svec(n,s,iv) isv c dots(s,s,idotw,s1) id c betas(s,s,s1) ib c gamma(s,s) - c integer s1, s, idotw dimension vect(1), svec(1) dimension dots(1), betas(1) dimension gamma(s,s) dimension wk(1) logical gamize common / itcom4 / keygs, srelpr, keyzer c c define the necessary indexing functions. c jv(i,j,k,l) = 1 + (i-1) + n*((j-1) + s*(mod(k,nv) + nv*(l-1))) isv(i,j,k) = 1 + (i-1) + n*((j-1) + s*(k-1)) id(i,j,k,l) = 1 + (i-1) + s*((j-1) + s*((k-l) + idotw*mod(k,s1))) ib(i,j,k) = 1 + (i-1) + s*((j-1) + s*mod(k,s1)) c ier = 0 c c ... handle first iteration. c if (it .eq. 0 .or. s1 .le. 0) go to 1000 c c ... now handle general iteration. c c ... first, calculate dot products (p(it-1),p(i)). c ibgn = max0 (it-idotw,0) iend = it - 1 - ndc if (ibgn .gt. iend) go to 10 do 2 i = ibgn,iend do 2 j = 1,s do 2 k = 1,s 2 dots(id(j,k,it-1,i)) = vdot (n,vect(jv(1,j,it-1,il)), a vect(jv(1,k,i,ir))) c c ... next, form all the new betas. c 10 ibgn = max0 (it-s1,0) iend = it - 1 do 3 i = ibgn,iend do 34 l = 1,s do 35 k = 1,s wk(k) = -vdot (n,vect(jv(1,k,i,il)),svec(isv(1,l,ir))) jbgn = max0 (i-idotw+1,it-s1,0) jend = i - 1 if (jend .lt. jbgn) go to 35 do 4 j = jbgn,jend do 4 m = 1,s 4 wk(k) = wk(k) - dots(id(k,m,i,j))*betas(ib(m,l,j)) 35 continue call vcopy (s*s,dots(id(1,1,i,i)),gamma) call gauss (s,s,gamma,wk(1),betas(ib(1,l,i)),ier) if (ier .ne. 0) go to 999 34 continue 3 continue c c ... now, get new p vectors. c do 37 m = 1,iv do 37 i = ibgn,iend do 37 l = 1,s do 6 k = 1,s 6 call vtriad (n,svec(isv(1,l,m)),svec(isv(1,l,m)), a betas(ib(k,l,i)),vect(jv(1,k,i,m)),1) 37 continue c c ... copy new vectors into the table. c 1000 do 168 m = 1,iv 168 call vcopy (n*s,svec(isv(1,1,m)),vect(jv(1,1,it,m))) c c ... now calculate gamma and orthogonalize the new block of p-vectors c call vfill (s*s,gamma,0.0) do 881 i = 1,s 881 gamma(i,i) = 1.0 if (.not. gamize) return do 879 i = 1,s if (i .eq. 1) go to 882 do 883 j = 1,i-1 883 wk(j) = vdot (n,vect(jv(1,1,j,it)),vect(jv(1,1,i,it))) do 884 j = 1,i-1 do 885 m = 1,iv 885 call vtriad (n,vect(jv(1,i,it,m)),vect(jv(1,i,it,m)), a -wk(j),vect(jv(1,j,it,m)),1) do 886 k = j,i-1 886 gamma(j,i) = gamma(j,i) - gamma(j,k)*wk(k) 884 continue 882 vnorm = sqrt(vdot(n,vect(jv(1,i,it,1)),vect(jv(1,i,it,1)))) if (abs(vnorm) .lt. srelpr**2) go to 999 do 888 m = 1,iv 888 call vtriad (n,vect(jv(1,i,it,m)),xxx,1.0/vnorm, a vect(jv(1,i,it,m)),2) do 887 j = 1,i 887 gamma(j,i) = gamma(j,i)/vnorm 879 continue return c c ... error return. c 999 ier = -100 return end subroutine gauss (ndim,n,a,rhs,u,ier) c c gaussian elimination routine. c dimension a(ndim,ndim), rhs(ndim), u(ndim) common / itcom4 / keygs, srelpr, keyzer ier = 0 if (n .eq. 1) go to 190 do 1 i = 1,n-1 if (abs(a(i,i)) .lt. srelpr**2) go to 999 do 10 j = i+1,n fact = a(j,i)/a(i,i) a(j,i) = 0.e0 do 2 k = i+1,n 2 a(j,k) = a(j,k) - fact*a(i,k) rhs(j) = rhs(j) - fact*rhs(i) 10 continue 1 continue c 190 do 3 i = 1,n k = n - i + 1 if (abs(a(k,k)) .lt. srelpr**2) go to 999 u(k) = rhs(k) if (i .eq. 1) go to 44 do 4 j = k+1,n u(k) = u(k) - u(j)*a(k,j) 4 continue 44 u(k) = u(k)/a(k,k) 3 continue return 999 ier = -100 return end subroutine qrupd (ndim,nnz,nind,c,s,ucnbar,ucn,u,b,ier) c c this routine updates the qr factorization of the banded upper c hessenberg matrix used by various conjugate gradient variants. c c parameters -- c ndim - the current size of the hessenberg matrix c nnz - the actual number of nonzeros in the band of the c hessenberg matrix. obviously, must be .le. than nind. c nind - the bandwidth of the hessenberg matrix, as stored c c,s - arrays which hold the cosines and sines of all the c rotations that have been performed so far c u - the new rightmost column of the hessenberg matrix, c which is to be rotated c b - the element of the hessenberg matrix to be zapped c by the new rotation c ucnbar - the element of the hessenberg matrix that b is to be c ucn - rotated into the new value of ucnbar, after the rotation c dimension c(1), s(1), u(1) c c note -- due to the fortran implementation on the cyber 205, it is c necessary to make ucnbar an array rather than a scalar. c dimension ucnbar(1) c c ... define the usual indexing functions. c indv(i) = 1 + mod(i,nind) indu(i) = 1 + mod(i,nind+1) c c ... indu is used to index u. c if (ndim .le. 1) return c c ... apply all the old rotations to the column. c jbgn = max0(1,ndim-nnz+1) jend = ndim - 2 if (jend .lt. jbgn) go to 3 do 2 j = jbgn,jend u1 = c(indv(j))*u(indu(j)) + s(indv(j))*u(indu(j+1)) u2 =-s(indv(j))*u(indu(j)) + c(indv(j))*u(indu(j+1)) u(indu(j)) = u1 u(indu(j+1)) = u2 2 continue 3 continue c c ... now proceed to form the new 2-by-2 rotation matrix. c ucnb = ucnbar(1) denom = sqrt(ucnb*ucnb+b*b) if (abs(ucnb) .ge. 1.0e-40) denom = sign(denom,ucnb) if (abs(denom) .lt. 1.0e-40) go to 999 c(indv(ndim-1)) = ucnb/denom s(indv(ndim-1)) = b/denom c c ... now apply the new rotation. c u1 = c(indv(ndim-1))*u(indu(ndim-1))+s(indv(ndim-1))*u(indu(ndim)) u2 =-s(indv(ndim-1))*u(indu(ndim-1))+c(indv(ndim-1))*u(indu(ndim)) u(indu(ndim-1)) = u1 u(indu(ndim)) = u2 ucn = c(indv(ndim-1))*ucnb + s(indv(ndim-1))*b return 999 ier = -14 return end subroutine pstop (ncall,suba,subql,subqr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,r, a z,zt,wk,nw,ier) c dimension zt(1), z(1), r(1), u(1), ubar(1), rhs(1), wk(1) dimension coef(1), jcoef(2), wfac(1), jwfac(1) external suba, subql, subqr, nullpl, nullpr c call pstopg (ncall,suba,subql,subqr,nullpl,nullpr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,r,z,zt,wk,nw,ier) return end subroutine pstopg (ncall,suba,subql,subqr,precl,precr, a coef,jcoef,wfac,jwfac,n,u,ubar,rhs,r, a z,zt,wk,nw,ier) c c ... pstop computes one of stopping tests to determine if the c iterative method has converged to a solution within the c error tolerance, zeta. the stopping tests are -- c c (1) (emax/emin) * sqrt ( (r ,zt)/(rhs,inv(q)*rhs) ) c (2) ( 1.0/emin) * sqrt ( (zt,zt)/(u,u) ) c (3) (emax/emin) * sqrt ( (zt,zt)/(inv(q)*rhs,inv(q)*rhs) ) c (4) sqrt ( (zt,zt)/(inv(q)*rhs,inv(q)*rhs) ) c (5) sqrt ( (r ,r )/(rhs,rhs) ) c (6) sqrt ( (u-ubar,u-ubar)/(ubar,ubar) ) c (7) (emax/emin) * sqrt ( (r,z)/(rhs,inv(ql)*rhs) ) c (8) ( 1.0/emin) * sqrt ( (z,z)/(u,u) ) c (9) (emax/emin) * sqrt ( (z,z)/(inv(ql)*rhs,inv(ql)*rhs) ) c (10) sqrt ( (z,z)/(inv(ql)*rhs,inv(ql)*rhs) ) c c ... here, emax and emin are estimates of the 2-norm of the iteration c matrix and its inverse. c c key parameters -- c c ncall: = 0 for first call to pstop by accelerator c < 0 for recalc of bnorms, in the case that a new prec has c been calc'ed c > 0 for a routine call to check the stopping test c c iplr : = 0 the left and right preconditioning matrices are the c identity c = 1 the right prec is the identity c = 2 the left prec is the identity c = 3 neither the left nor the right prec matrix is the c identity c c r: the residual of the original system, if rhave = .true. c z : ql**(-1) r, if zhave = .true. c zt : qr**(-1) z, if zthave = .true. c c c this routine is admittedly quite overdesigned. the idea was to have c a general routine which would calculate the needed inner products c with the absolute least amount of work, by determining which inner c products already exist. c dimension zt(1), z(1), r(1), u(1), ubar(1), rhs(1), wk(1) dimension coef(1), jcoef(2), wfac(1), jwfac(1) external suba, subql, subqr, precl, precr c c *** begin -- package common c common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d logical halt, maxadp, minadp, maxadd, minadd common / itcom2 / halt, maxadp, minadp, maxadd, minadd common / itcom3 / alpha, beta, zeta, emax, emin, pap, b alphao, gamma, sigma, rr, rho, dkq, dkm1, b ff, rqmin, rqmax, stptst, udnm, ubarnm, b bnorm, bnorm1 common / itcom4 / keygs, srelpr, keyzer common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c c *** end -- package common c logical init, nufact logical ipl, ipr logical risz, riszt, ziszt logical rhav, zhav, zthav, rcalc, zcalc, ztcalc logical udhv, rdhv, rzhv, rzthv, zdhv, zzthv, ztdhv logical udcal, rdcal, rzcal, rztcal, zdcal, zztcal, ztdcal c dimension idlarr(10), idrarr(10), needbn(10) data idlarr /1,3,3,3,1,0,1,2,2,2/ data idrarr /3,3,3,3,1,0,2,2,2,2/ data needbn /1,0,1,1,1,0,1,0,1,1/ c c nwusd = 0 halt = .false. tiny = 500.0*srelpr c c get flags to tell us if there is any prec on the left or right ... ipl = iplr .eq. 1 .or. iplr .eq. 3 ipr = iplr .eq. 2 .or. iplr .eq. 3 c find equivalences between r, z, zt ... risz = .not. ipl ziszt = .not. ipr riszt = risz .and. ziszt c decode ntest ... idl = idlarr(ntest) idr = idrarr(ntest) idot = 1 + (idl-1) + (idr-1)*3 c init = ncall .eq. 0 nufact = ncall .lt. 0 if (.not. (init .or. nufact)) go to 900 c c====================================================================== c========================== initialization section ==================== iv1 = 1 iv2 = iv1 + n c c ... compute bnorms, as necessary. c if (needbn(ntest).eq.0) go to 750 idle = idl if (idle.eq.3 .and. ziszt) idle = 2 if (idle.eq.2 .and. risz) idle = 1 idre = idr if (idre.eq.3 .and. ziszt) idre = 2 if (idre.eq.2 .and. risz) idre = 1 idp = 0 idlp = 0 idrp = 0 nwusd = 0 if (nwusd .gt. nw) go to 999 c calc ql(inv)*rhs, if necess ... if (max0(idle,idre).gt.1 .and. ipl) then nwusd = nwusd + n if (nwusd .gt. nw) go to 999 idp = idp + 1 call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr,rhs, a wk(1+n*(idp-1))) if (idle .gt. 1) idlp = idlp + 1 if (idre .gt. 1) idrp = idrp + 1 end if c calc qr(inv)*ql(inv)*rhs, if necess ... if (max0(idle,idre).gt.2) then nwusd = nwusd + n if (nwusd .gt. nw) go to 999 idp = idp + 1 if (idp .eq. 1) . call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, . rhs,wk(1+n*(idp-1))) if (idp .eq. 2) . call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, . wk(1+n*(idp-2)),wk(1+n*(idp-1))) if (idle .gt. 2) idlp = idlp + 1 if (idre .gt. 2) idrp = idrp + 1 end if c get needed dot ... if (init .or. (idlp.ne.0 .or. idrp.ne.0)) then bnorm1 = seldot(n,1+idlp,rhs,wk(1),wk(1+n),wk(1+2*n), . 1+idrp,rhs,wk(1),wk(1+n),wk(1+2*n)) if (bnorm1 .lt. 0e0) go to 998 bnorm1 = amax1 (srelpr,sqrt(bnorm1)) end if if (idlp.eq.0 .or. idrp.eq.0) bnorm = bnorm1 c c ... get ubar norm, as necessary ... c 750 if (nufact) go to 900 ubarnm = srelpr if (ntest .eq. 6) ubarnm = sqrt(vdot (n,ubar,ubar)) c c ... end of initialization phase ... c=============================================================================== c=============================================================================== c c ... now begin the actual stopping test section ... c c notes on the strategy of this routine: c basically, what we're after in order to perform the stopping c tests is certain dot products. the needed dot products may already c be available from the accelerator (in variables rrot, etc., as c indicated in flags rdhav, etc.) otherwise, it will be necessary to c compute these from the appropriate vectors. these vectors in turn c may already exist (in variables r, z, zt, as indicated b c rhave, zhave, zthave), or it may be necessary to compute them. c if they are computed by pstop, then the workspace is used to store c them. furthermore, there are dependencies between the vectors: zt c requires z, z requires r. add to this the further complication that c it may be possible to c optimize: if there is no left preconditioner, c then r equals z, and so forth. c this routine attempts to get the necessary data to do the c stopping test in the most optimal way. c a few notes on the semantics of variables. the flag rhave tells c whether the variable named r actually contains the residual; the flag c rhav tells whether the residual exists somewhere - whether in r, z, c zt or workspace. if c nonzero, the variable ir tells where in the c workspace the residual is (if it is in the workspace). now, the c variable rdhav tells whether rdot actually contains the dot product c of r with itself. unlike r and rhave, rdot and rdhav will actually c be updated by pstop if they are calculated herein, or if rdot can be c found from some other dot. c the variable rcalp indicates whether r was somewhere in workspace c after pstop did its work. the accelerator would like to know this, c since it may want to circumvent letting pstop do a vector calculation c if it can do it more efficiently. c for the initialization call (ncall=0), there is a dry run of c the stopping test. that is, the flags rhave, rcalp, rrhave, etc. c are set to what they would be set in an actual call, but no actual c vector calculations are done. this is necessary so that the c accelerator can plan ahead and take action to circumvent pstop doing c lengthly calculations - e.g., calculating the residual using an a c mult when the accelerator could do it simply by doing a saxpy. c c 900 continue c c make temporaries for dot haves (modify the actual dot haves only c if ncall>0) udhv = udhav rdhv = rdhav rzhv = rzhav rzthv = rzthav zdhv = zdhav zzthv = zzthav ztdhv = ztdhav c c evaluate vector haves ... rhav = rhave .or. (zhave.and.risz) .or. (zthave.and.riszt) zhav = zhave .or. (rhave.and.risz) .or. (zthave.and.ziszt) zthav = zthave .or. (rhave.and.riszt) .or. (zhave .and.ziszt) c c take note that there are no vectors in the workspace ... ir = 0 iz = 0 izt = 0 c iwfree = 1 c c ********** calculate r ********** c c find dot needs ... 102 assign 105 to lbldn go to 1100 c calculate whatever dots we can ... 105 assign 110 to lbldc go to 1300 c find vector needs ... 110 assign 115 to lblvn go to 1200 c get r ... 115 if (.not. rcalc) go to 120 ir = iwfree iwfree = iwfree + n nwusd = iwfree-1 if (init .or. nufact) go to 116 if (nwusd .gt. nw) go to 999 call suba (coef,jcoef,wfac,jwfac,n,u,wk(ir)) call vexopy (n,wk(ir),rhs,wk(ir),2) 116 rhav = .true. c revise vector haves ... if (.not. risz) go to 111 iz = ir zhav = .true. 111 if (.not. riszt) go to 120 izt = ir zthav = .true. c c ********** calculate z ********** c c calculate dots ... 120 assign 125 to lbldc go to 1300 c revise vector needs ... 125 assign 126 to lblvn go to 1200 c get z ... 126 if (.not. zcalc) go to 130 iz = iwfree iwfree = iwfree + n nwusd = iwfree-1 if (init .or. nufact) go to 127 if (nwusd .gt. nw) go to 999 if (rhave) call precl (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a r,wk(iz)) if (ir .ne. 0) call precl (coef,jcoef,wfac,jwfac,n,subql,suba, a subqr,wk(ir),wk(iz)) 127 zhav = .true. c revise vector haves ... if (.not. risz) go to 121 ir = iz rhav = .true. 121 if (.not. ziszt) go to 130 izt = iz zthav = .true. c c ********** calculate zt ********** c c calculate dots ... 130 assign 135 to lbldc go to 1300 c revise vector needs .. 135 assign 136 to lblvn go to 1200 c get zt ... 136 if (.not. ztcalc) go to 150 izt = iwfree iwfree = iwfree + n nwusd = iwfree-1 if (init .or. nufact) go to 137 if (nwusd .gt. nw) go to 999 if (zhave) call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a z,wk(izt)) if ((.not. zhave) .and. (rhave .and. risz)) a call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a r,wk(izt)) if (iz .ne. 0) a call precr (coef,jcoef,wfac,jwfac,n,subql,suba,subqr, a wk(iz),wk(izt)) 137 zthav = .true. c revise vector haves ... if (.not. riszt) go to 131 ir = izt rhav = .true. 131 if (.not. ziszt) go to 150 iz = izt zhav = .true. c c***** take care of details before going on to perform the stopping c test c c calculate whatever dots we can ... 150 assign 151 to lbldc go to 1300 c save vector calculation needs ... 151 rcalp = ir .ne. 0 zcalp = iz .ne. 0 ztcalp = izt .ne. 0 c head home, if ncall.le.0 ... if (init .or. nufact) go to 950 c c save dot have temporaries, if ncall>0 ... udhav = udhv rdhav = rdhv rzhav = rzhv rzthav = rzthv zdhav = zdhv zzthav = zzthv ztdhav = ztdhv c c get (u-ubar,u-ubar) if (ntest .ne. 6) go to 45 uedot= 0.0 do 40 i = 1,n 40 uedot = uedot + (u(i) - ubar(i))**2 c c=============================================================================== c====================== stopping test computation section ====================== c c at this point, all the needed dot products have been computed, and c we are to actually perform the stopping test. c 45 go to (51,52,53,54,55,56,57,58,59,60), ntest c c ... test 1 c 51 if (rztdot .lt. -srelpr) go to 998 top = emax * sqrt (abs(rztdot)) bottom = emin * bnorm1 go to 80 c c ... test 2 c 52 top = sqrt (abs(ztdot)) bottom = emin * udnm go to 80 c c ... test 3 c 53 top = emax * sqrt (abs(ztdot)) bottom = emin * bnorm1 go to 80 c c ... test 4 c 54 top = sqrt (abs(ztdot)) bottom = bnorm1 go to 80 c c ... test 5 c 55 top = sqrt (abs(rdot)) bottom = bnorm1 go to 80 c c ... test 6 c 56 top = sqrt (abs(uedot)) bottom = ubarnm go to 80 c c ... test 7 c 57 if (rzdot .lt. -srelpr) go to 998 top = emax * sqrt (abs(rzdot)) bottom = emin * bnorm1 go to 80 c c ... test 8 c 58 top = sqrt (abs(zdot)) bottom = emin * udnm go to 80 c c ... test 9 c 59 top = emax * sqrt (abs(zdot)) bottom = emin * bnorm1 go to 80 c c ... test 10 c 60 top = sqrt (abs(zdot)) bottom = bnorm1 go to 80 c 80 if (bottom .lt. tiny) bottom = tiny stptst = top/bottom call ckconv (ier) if (ier .lt. 0) go to 950 halt = .false. if (top .lt. bottom*zeta) halt = .true. c c done with the stopping test, head home. go to 950 c c=============================================================================== c*********************** section to calculate dot-needs ************************ c c here, we consider which dot products the stopping test needs, and c see whether the needed dot products are currently nonexistent and c thus must be calculated. c 1100 continue c c spread any dot information to other dots, as possible ... if (risz) then if (rdhv) then rzdot = rdot rzhv = .true. zdot = rdot zdhv = .true. end if if (rzhv) then rdot = rzdot rdhv = .true. zdot = rzdot zdhv = .true. end if if (zdhv) then rzdot = zdot rzhv = .true. rdot = zdot rdhv = .true. end if end if c if (ziszt) then if (zdhv) then zztdot = zdot zzthv = .true. ztdot = zdot ztdhv = .true. end if if (zzthv) then zdot = zztdot zdhv = .true. ztdot = zztdot ztdhv = .true. end if if (ztdhv) then zdot = ztdot zdhv = .true. zztdot = ztdot zzthv = .true. end if end if c if (riszt) then if (rdhv) then rztdot = rdot rzthv = .true. ztdot = rdot ztdhv = .true. end if if (rzthv) then rdot = rztdot rdhv = .true. ztdot = rztdot ztdhv = .true. end if if (ztdhv) then rztdot = ztdot rzthv = .true. rdot = ztdot rdhv = .true. end if end if c c figure out which dots actually need to be calculated ... 1103 udcal = (needbn(ntest).eq.0 .and. ntest.ne.6) .and. .not.udhv rdcal = idot.eq.1 .and. .not.rdhv rzcal = (idot.eq.2 .or. idot.eq.4) .and. .not.rzhv rztcal = (idot.eq.3 .or. idot.eq.7) .and. .not.rzthv zdcal = idot.eq.5 .and. .not.zdhv zztcal = (idot.eq.6 .or. idot.eq.8) .and. .not.zzthv ztdcal = idot.eq.9 .and. .not.ztdhv go to lbldn c c=============================================================================== c********************* section to calculate vector-needs *********************** c c here, we see which vectors have to be calculated in order to c satisfy the dot calculation needs. c 1200 continue ztcalc = (rztcal.or.zztcal.or.ztdcal) a .and. .not.zthav zcalc = (rzcal .or.zdcal .or.zztcal .or. ztcalc) a .and. .not.zhav rcalc = (rdcal .or.rzcal .or.rztcal .or. zcalc) a .and. .not.rhav go to lblvn c c=============================================================================== c********************* dot product calculation section ************************* c c here, we calculate whatever dot products can be calculated from the c currently existing vectors. c c first locate where the needed vectors are ... 1300 if (rhave) locr = 1 if (zhave .and. risz) locr = 2 if (zthave .and. riszt) locr = 3 if (ir .ne. 0) locr = 4 c if (rhave .and. risz) locz = 1 if (zhave) locz = 2 if (zthave .and. ziszt) locz = 3 if (iz .ne. 0) locz = 4 c if (rhave .and. riszt) loczt = 1 if (zhave .and. ziszt) loczt = 2 if (zthave) loczt = 3 if (izt .ne. 0) loczt = 4 c c now calculate whatever dot products we can ... c c** get udnm ... if (.not. udcal) go to 1350 if ((in .gt. 5) .and. (mod(in,5) .ne. 0)) go to 1350 uold = udnm if (init .or. nufact) go to 1349 udnm = sqrt ( abs ( vdot (n,u,u) ) ) c if ((in .gt. 5) .and. (abs (udnm-uold) .lt. udnm*zeta)) c a is3 = 1 if (udnm .lt. srelpr) udnm = 1.0 1349 udhv = .true. assign 1350 to lbldn go to 1100 c c** get rdot ... 1350 if (.not .(rdcal .and. rhav)) go to 1360 if (init .or. nufact) go to 1359 rdot = seldot (n,locr,r,z,zt,wk(ir),locr,r,z,zt,wk(ir)) 1359 rdhv = .true. assign 1360 to lbldn go to 1100 c c** get rzdot ... 1360 if (.not. (rzcal .and. rhav .and. zhav)) go to 1370 if (init .or. nufact) go to 1369 rzdot = seldot (n,locr,r,z,zt,wk(ir),locz,r,z,zt,wk(iz)) 1369 rzhv = .true. assign 1370 to lbldn go to 1100 c c** get rztdot ... 1370 if (.not. (rztcal .and. rhav .and. zthav)) go to 1380 if (init .or. nufact) go to 1379 rztdot = seldot (n,locr,r,z,zt,wk(ir),loczt,r,z,zt,wk(izt)) 1379 rzthv = .true. assign 1380 to lbldn go to 1100 c c** get zdot ... 1380 if (.not. (zdcal .and. zhav)) go to 1390 if (init .or. nufact) go to 1389 zdot = seldot (n,locz,r,z,zt,wk(iz),locz,r,z,zt,wk(iz)) 1389 zdhv = .true. assign 1390 to lbldn go to 1100 c c** get zztdot ... 1390 if (.not. (zztcal .and. zhav .and. zthav)) go to 1400 if (init .or. nufact) go to 1399 zztdot = seldot (n,locz,r,z,zt,wk(iz),loczt,r,z,zt,wk(izt)) 1399 zzthv = .true. assign 1400 to lbldn go to 1100 c c** get ztdot ... 1400 if (.not. (ztdcal .and. zthav)) go to 1410 if (init .or. nufact) go to 1409 ztdot = seldot (n,loczt,r,z,zt,wk(izt),loczt,r,z,zt,wk(izt)) 1409 ztdhv = .true. assign 1410 to lbldn go to 1100 c 1410 continue go to lbldc c c=============================================================================== 950 nw = nwusd return c================================= error returns =============================== c c splitting matrix is not positive definite c 998 ier = -7 call ershow (ier,'pstop') go to 950 c c insuff. real wksp c 999 ier = -2 call ershow (ier,'pstop') go to 950 end function seldot (n,iu,u1,u2,u3,u4,iv,v1,v2,v3,v4) c c this routine computes a dot product from a selected pair of vectors c dimension u1(1), u2(1), u3(1), v1(1), v2(1), v3(1) dimension u4(1), v4(1) c ind = 1 + (iv-1) + 4*(iu-1) if (ind .eq. 1) seldot = vdot (n,u1,v1) if (ind .eq. 2) seldot = vdot (n,u1,v2) if (ind .eq. 3) seldot = vdot (n,u1,v3) if (ind .eq. 4) seldot = vdot (n,u1,v4) if (ind .eq. 5) seldot = vdot (n,u2,v1) if (ind .eq. 6) seldot = vdot (n,u2,v2) if (ind .eq. 7) seldot = vdot (n,u2,v3) if (ind .eq. 8) seldot = vdot (n,u2,v4) if (ind .eq. 9) seldot = vdot (n,u3,v1) if (ind .eq. 10) seldot = vdot (n,u3,v2) if (ind .eq. 11) seldot = vdot (n,u3,v3) if (ind .eq. 12) seldot = vdot (n,u3,v4) if (ind .eq. 13) seldot = vdot (n,u4,v1) if (ind .eq. 14) seldot = vdot (n,u4,v2) if (ind .eq. 15) seldot = vdot (n,u4,v3) if (ind .eq. 16) seldot = vdot (n,u4,v4) return end subroutine ckconv (ier) c c routine to determine whether iterative method has stagnated, c or other unfortunate situation. c parameter (nst=20) parameter (eps=1.e-7) common / itcom1 / in, itmax, level, nout, ns1, ns2, ns3, a iplr, iqlr, ntest, is, iacel, idgts, nbl1d, nbl2d common / itcom3 / alpha, beta, zeta, emax, emin, pap, a alphao, gamma, sigma, rr, rho, dkq, dkm1, a ff, rqmin, rqmax, stptst, udnm, ubarnm, a bnorm, bnorm1 c dimension stold(nst) save stold, ist ind(i) = 1 + mod(i,nst) c if (in .le. 0) ist = 0 c ist = ist + 1 stold(ind(ist)) = stptst if (ist .lt. nst) go to 900 c do 2 i = 1, nst-1 do 2 i = nst-1, 1, -1 c val = abs(stold(ind(ist-i))-stptst)/stptst val = abs(stold(ind(ist-i))-stptst) if (val .gt. eps*stptst) go to 900 2 continue ier = -19 call ershow (ier,'ckconv') return c 900 return end subroutine inithv (icall) c c routine to initialize dot and vector haves to false. c common / itcom9 / rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav, a rdot, rzdot, rztdot, zdot, zztdot, ztdot logical rhave, zhave, zthave, rcalp, zcalp, ztcalp, a udhav, rdhav, rzhav, rzthav, zdhav, zzthav, ztdhav c udhav = .false. rdhav = .false. rzhav = .false. rzthav = .false. zdhav = .false. zzthav = .false. ztdhav = .false. if (icall .eq. 1) return rhave = .false. zhave = .false. zthave = .false. c return end subroutine hqr(nm,n,low,igh,h,wr,wi,ierr) c integer i,j,k,l,m,n,en,ll,mm,na,nm,igh,itn,its,low,mp2,enm2,ierr real h(nm,n),wr(n),wi(n) real p,q,r,s,t,w,x,y,zz,norm,tst1,tst2 logical notlas c c this routine is a translation of the algol procedure hqr, c num. math. 14, 219-231(1970) by martin, peters, and wilkinson. c handbook for auto. comp., vol.ii-linear algebra, 359-371(1971). c c this routine finds the eigenvalues of a real c upper hessenberg matrix by the qr method. c c on input c c nm must be set to the row dimension of two-dimensional c array parameters as declared in the calling program c dimension statement. c c n is the order of the matrix. c c low and igh are integers determined by the balancing c routine balanc. if balanc has not been used, c set low=1, igh=n. c c h contains the upper hessenberg matrix. information about c the transformations used in the reduction to hessenberg c form by elmhes or orthes, if performed, is stored c in the remaining triangle under the hessenberg matrix. c c on output c c h has been destroyed. therefore, it must be saved c before calling hqr if subsequent calculation and c back transformation of eigenvectors is to be performed. c c wr and wi contain the real and imaginary parts, c respectively, of the eigenvalues. the eigenvalues c are unordered except that complex conjugate pairs c of values appear consecutively with the eigenvalue c having the positive imaginary part first. if an c error exit is made, the eigenvalues should be correct c for indices ierr+1,...,n. c c ierr is set to c zero for normal return, c j if the limit of 30*n iterations is exhausted c while the j-th eigenvalue is being sought. c c questions and comments should be directed to burton s. garbow, c mathematics and computer science div, argonne national laboratory c c this version dated august 1983. c c ------------------------------------------------------------------ c ierr = 0 norm = 0.0 k = 1 c .......... store roots isolated by balanc c and compute matrix norm .......... do 50 i = 1, n c do 40 j = k, n 40 norm = norm + abs(h(i,j)) c k = i if (i .ge. low .and. i .le. igh) go to 50 wr(i) = h(i,i) wi(i) = 0.0 50 continue c en = igh t = 0.0 itn = 30*n c .......... search for next eigenvalues .......... 60 if (en .lt. low) go to 1001 its = 0 na = en - 1 enm2 = na - 1 c .......... look for single small sub-diagonal element c for l=en step -1 until low do -- .......... 70 do 80 ll = low, en l = en + low - ll if (l .eq. low) go to 100 s = abs(h(l-1,l-1)) + abs(h(l,l)) if (s .eq. 0.0) s = norm tst1 = s tst2 = tst1 + abs(h(l,l-1)) if (tst2 .eq. tst1) go to 100 80 continue c .......... form shift .......... 100 x = h(en,en) if (l .eq. en) go to 270 y = h(na,na) w = h(en,na) * h(na,en) if (l .eq. na) go to 280 if (itn .eq. 0) go to 1000 if (its .ne. 10 .and. its .ne. 20) go to 130 c .......... form exceptional shift .......... t = t + x c do 120 i = low, en 120 h(i,i) = h(i,i) - x c s = abs(h(en,na)) + abs(h(na,enm2)) x = 0.75 * s y = x w = -0.4375 * s * s 130 its = its + 1 itn = itn - 1 c .......... look for two consecutive small c sub-diagonal elements. c for m=en-2 step -1 until l do -- .......... do 140 mm = l, enm2 m = enm2 + l - mm zz = h(m,m) r = x - zz s = y - zz p = (r * s - w) / h(m+1,m) + h(m,m+1) q = h(m+1,m+1) - zz - r - s r = h(m+2,m+1) s = abs(p) + abs(q) + abs(r) p = p / s q = q / s r = r / s if (m .eq. l) go to 150 tst1 = abs(p)*(abs(h(m-1,m-1)) + abs(zz) + abs(h(m+1,m+1))) tst2 = tst1 + abs(h(m,m-1))*(abs(q) + abs(r)) if (tst2 .eq. tst1) go to 150 140 continue c 150 mp2 = m + 2 c do 160 i = mp2, en h(i,i-2) = 0.0 if (i .eq. mp2) go to 160 h(i,i-3) = 0.0 160 continue c .......... double qr step involving rows l to en and c columns m to en .......... do 260 k = m, na notlas = k .ne. na if (k .eq. m) go to 170 p = h(k,k-1) q = h(k+1,k-1) r = 0.0 if (notlas) r = h(k+2,k-1) x = abs(p) + abs(q) + abs(r) if (x .eq. 0.0) go to 260 p = p / x q = q / x r = r / x 170 s = sign(sqrt(p*p+q*q+r*r),p) if (k .eq. m) go to 180 h(k,k-1) = -s * x go to 190 180 if (l .ne. m) h(k,k-1) = -h(k,k-1) 190 p = p + s x = p / s y = q / s zz = r / s q = q / p r = r / p if (notlas) go to 225 c .......... row modification .......... do 200 j = k, n p = h(k,j) + q * h(k+1,j) h(k,j) = h(k,j) - p * x h(k+1,j) = h(k+1,j) - p * y 200 continue c j = min0(en,k+3) c .......... column modification .......... do 210 i = 1, j p = x * h(i,k) + y * h(i,k+1) h(i,k) = h(i,k) - p h(i,k+1) = h(i,k+1) - p * q 210 continue go to 255 225 continue c .......... row modification .......... do 230 j = k, n p = h(k,j) + q * h(k+1,j) + r * h(k+2,j) h(k,j) = h(k,j) - p * x h(k+1,j) = h(k+1,j) - p * y h(k+2,j) = h(k+2,j) - p * zz 230 continue c j = min0(en,k+3) c .......... column modification .......... do 240 i = 1, j p = x * h(i,k) + y * h(i,k+1) + zz * h(i,k+2) h(i,k) = h(i,k) - p h(i,k+1) = h(i,k+1) - p * q h(i,k+2) = h(i,k+2) - p * r 240 continue 255 continue c 260 continue c go to 70 c .......... one root found .......... 270 wr(en) = x + t wi(en) = 0.0 en = na go to 60 c .......... two roots found .......... 280 p = (y - x) / 2.0 q = p * p + w zz = sqrt(abs(q)) x = x + t if (q .lt. 0.0) go to 320 c .......... real pair .......... zz = p + sign(zz,p) wr(na) = x + zz wr(en) = wr(na) if (zz .ne. 0.0) wr(en) = x - w / zz wi(na) = 0.0 wi(en) = 0.0 go to 330 c .......... complex pair .......... 320 wr(na) = x + p wr(en) = x + p wi(na) = zz wi(en) = -zz 330 en = enm2 go to 60 c .......... set error -- all eigenvalues have not c converged after 30*n iterations .......... 1000 ierr = en 1001 return end