C================================================================C
C                                                                C
C     This file is part of the                                   C
C     Distributed Iterative Systems Solvers library              C
C     (c) 1994 Victor Eijkhout, eijkhout@cs.utk.edu              C
C                                                                C
C     Current version: 0.9                                       C
C     This file last generated 94/11/04                          C
C                                                                C
C================================================================C
C================================================================
C     Iterative methods
C================================================================
C----------------------------------------------------------------
      subroutine cg_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,dir,adr,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     iter, idum1, vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     it_update_mul, stop_tester,stop_testee,normg1,
     >     xtx,xmx,gtg,hth, rar,pap,
     >     rr_rr_quot,alpha

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.1 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 2
      else
         nhist = 2-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_cg_work(work,
     >     its,grd,hrd,dir,adr,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      pap = 0.d0
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvzero(work(dir),vec_inf)
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,2),
     >     te, plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',iter,tstp)

C     Iteration Loop for CG
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
      if (no_prec) then
         hrd = grd
         goto 11
      endif

      if (tstp) call pd0('Solve preconditioner$')
      call solve_preconditioner(work(hrd),
     >     work(grd),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), iter+1, 0,tptip,vt(iter))
      if (vt(iter)) call dump_vector(work(hrd),
     >     vec_inf,'hrd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after prec solve$',iter,.true.)
 11   continue
C====
C     Matrix times gradient
C----
      if (tptip) call pd1i('Matrix times hrd$',iter+1)
      call cg_mvp1(
     >     work(agr),work(hrd),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(agr),
     >     vec_inf,'agr$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ar$',iter,.true.)

C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_cg_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots6(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grd),gh_dotloc,
     >        'gth$',work(gh_dots+iter),
     >     n_hh_dots, work(hrd),hh_dotloc, 'hth$',hth,
     >     1,work(agr),rar_dotloc, 'rar$',rar,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',iter,.true.)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
      if (iter.gt.0) then
         rr_rr_quot = work(gh_dots+iter) / work(gh_dots+iter-1)
         if (tstp) call pd1d('Beta:$',rr_rr_quot)
      endif
      work(norms+iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.1 .and. lda_hist.gt.iter) then
            grad_hist(iter+1,1) = sqrt(gtg)
            call pt1i1d('Iter:$',iter, '|g|:$',sqrt(gtg))
         endif
      endif

C================
C     Update search directions
C================

      if (iter.eq.0) then
         if (tptip) call pd0('Copy hrd -> dir$')
         call vvcopy(work(dir),work(hrd), vs)
         if (tptip) call pd0('Copy agr -> adr$')
         call vvcopy(work(adr),work(agr), vs)
      else
         call axby(work(dir),
     >        1.d0,work(hrd),rr_rr_quot,work(dir),vs)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after dir update$',iter,.true.)
         call axby(work(adr),
     >        1.d0,work(agr),rr_rr_quot,work(adr),vs)
            if (mem_trace_val.ge.2) call test_memory_array(work,
     >           'Memtest after Adr update$',iter,.true.)
         if (vt(iter)) call dump_vector(work(adr),
     >        vec_inf,'adr$',iter+1)
      endif
      if (vt(iter)) then
         call dump_vector(work(dir),vec_inf,'dir$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after dir update$',iter,.true.)

C================
C     End of an iteration
C================

      if (iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = iter
      results(4) = stop_testee
      results(5) = normg1
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      plot_true_res = plot_true_res.and.lda_hist.gt.iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',iter,tstp)
      if (tetp) call pt1i('Iteration:$',iter)
      
C================
C     Compute pAp inner products.
C================

      if (tstp) call pd1d2d('gag$',rar,'pap old/new$',pap,
     >     rar - rr_rr_quot**2 * pap)
      pap = rar - rr_rr_quot**2 * pap
      alpha = alpha / pap
      if (tstp) call pd1d('alpha:$',alpha)

      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after p inprods$',iter,.true.)

C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

C     Coupled two-term recurrences
C     gradient update using matrix times search direction
C----
      call axby(work(its),1.d0,work(its),
     >     -alpha,work(dir),vs)
      it_update_loc = dir
      it_update_mul = alpha
      call axby(work(grd),1.d0,work(grd),
     >     -alpha,work(adr),vs)

      if (vt(iter)) then
         call dump_vector(work(grd),vec_inf,'grd$',iter+1)
         call dump_vector(work(its),vec_inf,'itr$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',iter,.true.)

C     Calculate the exact residual (norm) for trace

C----
      if (plot_true_res .or. ts) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        iter,lvs,ddum, ts,tp)
         grad_hist(iter+1,2) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

C     Iteration Loop End for CG
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_cg_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots
      rar_dotloc = xu_dotloc + 1

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_cg_work(work,
     >     its,grd,hrd,dir,adr,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,dir,adr,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     vec_inf(*),mat_con(*),vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)

      its = alcate(vs,'iterates$',work)
      grd = alcate(vs,'right gradient$',work)
      if (no_prec) then
         hrd = grd
      else
         hrd = alcate(vs,'right Cinv gradient$',work)
      endif
      dir = alcate(vs,'right search direction$',work)
      adr = alcate(vs,'right A search$',work)
      agr = alcate(vs,'A times grad$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      

      return
      end
C----------------------------------------------------------------
      subroutine minres_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,dir,adr,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     iter, idum1, vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     err_guess,
     >     it_update_mul, stop_tester,stop_testee,normg1,
     >     xtx,xmx,gtg,hth, rar,pap,
     >     rr_rr_quot,alpha

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.2 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 3
      else
         nhist = 3-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_minres_work(work,
     >     its,grd,hrd,dir,adr,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      pap = 0.d0
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvzero(work(dir),vec_inf)
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,3),
     >     .true., plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',iter,tstp)

C     Iteration Loop for MINRES
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
      if (no_prec) then
         hrd = grd
         goto 11
      endif

      call vvcopy(work(ohrd),work(hrd),vs)
      if (tstp) call pd0('Solve preconditioner$')
      call solve_preconditioner(work(hrd),
     >     work(grd),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), iter+1, 0,tptip,vt(iter))
      if (vt(iter)) call dump_vector(work(hrd),
     >     vec_inf,'hrd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after prec solve$',iter,.true.)
 11   continue
C====
C     Matrix times gradient
C----
      if (tptip) call pd1i('Matrix times hrd$',iter+1)
      call cg_mvp1(
     >     work(agr),work(hrd),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(agr),
     >     vec_inf,'agr$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ar$',iter,.true.)

C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_minres_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots6(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grd),gh_dotloc,
     >        'gth$',work(gh_dots+iter),
     >     n_hh_dots, work(hrd),hh_dotloc, 'hth$',hth,
     >     1,work(agr),rar_dotloc, 'rar$',rar,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',iter,.true.)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
         if (iter.eq.0) work(lsqnorms) = normg1
      if (iter.gt.0) then
         rr_rr_quot = work(gh_dots+iter) / work(gh_dots+iter-1)
         if (tstp) call pd1d('Beta:$',rr_rr_quot)
         work(hesfac_ucol) = rr_rr_quot
      endif
      work(norms+iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.2 .and. lda_hist.gt.iter) then
            grad_hist(iter+1,2) = sqrt(gtg)
         endif
      endif

C====
C     Update the Hessenberg QR factorization, and update the iterate
C----
      if (iter.gt.0) then
         call hessqr(work(hssbrg_q),work(hssbrg),
     >        hess_ncol,hess_nupdi, work(norms),normg1,
     >        work(hess_Qrow),work(hess_Rcol),err_guess,
     >        iter,iter, tstp)
         work(lsqnorms+iter) = sqrt(err_guess)
         stop_testee = min(stop_testee,sqrt(err_guess))
         if (nhist.ge.1 .and. lda_hist.gt.iter)
     >        grad_hist(iter,1) = sqrt(err_guess)
         if (tetp)
     >        call pt1i1d('Iter:$',iter,'|g| (est)$',err_guess)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after hess qr$',iter,.true.)
         call requpdate(work(minres_grd),'Minres grd updt$',
     >        mrs_wd,vs,lvs,iter,vec_inf,
     >        1.d0,work(ohrd),work(hess_Rcol),iter,
     >        tptip,it_update_loc)
         it_update_loc = minres_grd+it_update_loc-1
         if (vt(iter)) call dump_vector(work(it_update_loc),
     >        vec_inf,'itupd$',iter)
         it_update_mul = -work(hess_Qrow+iter-1)
         call bvaxby(work(its),vec_inf, 1.d0,work(its),
     >        it_update_mul,work(it_update_loc))
         if (vt(iter)) call dump_vector(
     >        work(its),vec_inf,'itr$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after minres update$',iter,.true.)
      endif

C================
C     Update the Hessenberg matrix
C     with the coefficients just calculated
C================

      if (iter.gt.0) call split_recur_hescol_u(work(hssbrg),
     >     work(hesfac_ucol), hess_ncol,hess_nupdi,iter+1,tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after hess update$',iter,.true.)

C================
C     Update search directions
C================

      if (iter.eq.0) then
         if (tptip) call pd0('Copy hrd -> dir$')
         call vvcopy(work(dir),work(hrd), vs)
         if (tptip) call pd0('Copy agr -> adr$')
         call vvcopy(work(adr),work(agr), vs)
      else
         call axby(work(dir),
     >        1.d0,work(hrd),rr_rr_quot,work(dir),vs)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after dir update$',iter,.true.)
         call axby(work(adr),
     >        1.d0,work(agr),rr_rr_quot,work(adr),vs)
            if (mem_trace_val.ge.2) call test_memory_array(work,
     >           'Memtest after Adr update$',iter,.true.)
         if (vt(iter)) call dump_vector(work(adr),
     >        vec_inf,'adr$',iter+1)
      endif
      if (vt(iter)) then
         call dump_vector(work(dir),vec_inf,'dir$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after dir update$',iter,.true.)

C================
C     End of an iteration
C================

      if (iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = iter
      results(4) = stop_testee
      results(5) = normg1
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      plot_true_res = plot_true_res.and.lda_hist.gt.iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',iter,tstp)
      if (tetp) call pt1i('Iteration:$',iter)
      
C================
C     Compute pAp inner products.
C================

      if (tstp) call pd1d2d('gag$',rar,'pap old/new$',pap,
     >     rar - rr_rr_quot**2 * pap)
      pap = rar - rr_rr_quot**2 * pap
      alpha = alpha / pap
      if (tstp) call pd1d('alpha:$',alpha)

C     Update the Hessenberg matrix
C     with the alpha just calculated
C----
      call split_recur_hescol_d(work(hssbrg),alpha,
     >     hess_ncol,hess_nupdi,iter,tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after p inprods$',iter,.true.)

C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

C     Save the old gradient for the minres update
C----
      if (no_prec) call vvcopy(work(ohrd),work(grd),vs)

C     Coupled two-term recurrences
C     gradient update using matrix times search direction
C----
      call axby(work(grd),1.d0,work(grd),
     >     -alpha,work(adr),vs)

      if (vt(iter)) then
         call dump_vector(work(grd),vec_inf,'grd$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',iter,.true.)

C     Calculate the exact residual (norm) for trace
C----
      if (plot_true_res .or. ts) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        iter,lvs,ddum, ts,tp)
         grad_hist(iter+1,3) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

C     Iteration Loop End for MINRES
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_minres_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots
      rar_dotloc = xu_dotloc + 1

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_minres_work(work,
     >     its,grd,hrd,dir,adr,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,dir,adr,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     vec_inf(*),mat_con(*),vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----

C     How many upper diagonals does the Hessenberg matrix have?
C----
      hess_nupdi = 1

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)
      lsqnorms = alcate(max_iter+1,'LSQ Gradient norms$',work)

      its = alcate(vs,'iterates$',work)
      grd = alcate(vs,'right gradient$',work)
      if (no_prec) then
         hrd = grd
      else
         hrd = alcate(vs,'right Cinv gradient$',work)
      endif
      dir = alcate(vs,'right search direction$',work)
      adr = alcate(vs,'right A search$',work)
      agr = alcate(vs,'A times grad$',work)
      hesfac_ucol = alcate(hess_nupdi,'col of U from hesfac$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      
C     For minimal residual methods allocate Q and R;
C     for Bicg three vectors are enough,
C     for unsymmetric cg this can be much more.
C---- 
      hess_ncol = max_iter+2
      hssbrg    = alcate(hess_ncol*hess_ncol,
     >     'band hessenberg matrix$',work)
      hssbrg_q = alcate(hess_ncol*hess_ncol,
     >     'hessenberg matrix Q$',work)
      hess_Qrow = alcate(hess_ncol+1,
     >     'first row of hessenberg matrix$',work)
      hess_Rcol = alcate(hess_ncol+1,
     >     'current column of hessenberg R$',work)
      mrs_wd = hess_nupdi+2
      minres_grd = alcate(mrs_wd*vs,'Min res combinations$',work)
      ohrd = alcate(vs,'One old gradient$',work)


      return
      end
C----------------------------------------------------------------
      subroutine cgne_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,dir,adr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     iter, idum1, vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     it_update_mul, stop_tester,stop_testee,normg1,
     >     xtx,xmx,gtg,hth, pap,
     >     rr_rr_quot,alpha

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.1 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 2
      else
         nhist = 2-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_cgne_work(work,
     >     its,grd,hrd,dir,adr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      pap = 0.d0
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvzero(work(dir),vec_inf)
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.true.,
     >     iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,2),
     >     te, plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',iter,tstp)

C     Iteration Loop for CGNE
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
      if (no_prec) then
         hrd = grd
         goto 11
      endif

      if (tstp) call pd0('Solve preconditioner$')
      call solve_preconditioner(work(hrd),
     >     work(grd),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), iter+1, 0,tptip,vt(iter))
      if (vt(iter)) call dump_vector(work(hrd),
     >     vec_inf,'hrd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after prec solve$',iter,.true.)
 11   continue
C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_cgne_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots5(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grd),gh_dotloc,
     >        'gth$',work(gh_dots+iter),
     >     n_hh_dots, work(hrd),hh_dotloc, 'hth$',hth,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',iter,.true.)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
      if (iter.gt.0) then
         rr_rr_quot = work(gh_dots+iter) / work(gh_dots+iter-1)
         if (tstp) call pd1d('Beta:$',rr_rr_quot)
      endif
      work(norms+iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.1 .and. lda_hist.gt.iter) then
            grad_hist(iter+1,1) = sqrt(gtg)
            call pt1i1d('Iter:$',iter, '|g|:$',sqrt(gtg))
         endif
      endif

C================
C     Update search directions
C================

      if (iter.eq.0) then
         if (tptip) call pd0('Copy hrd -> dir$')
         call vvcopy(work(dir),work(hrd), vs)
      else
         call axby(work(dir),
     >        1.d0,work(hrd),rr_rr_quot,work(dir),vs)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after dir update$',iter,.true.)
      endif
      if (vt(iter)) then
         call dump_vector(work(dir),vec_inf,'dir$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after dir update$',iter,.true.)

C================
C     End of an iteration
C================

      if (iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = iter
      results(4) = stop_testee
      results(5) = normg1
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      plot_true_res = plot_true_res.and.lda_hist.gt.iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',iter,tstp)
      if (tetp) call pt1i('Iteration:$',iter)
      
C================
C     Matrix times search direction
C----
      call bvcler(work(dir),vec_inf)
      if (tptip) call pd1i('Matrix times search$',iter)
      call cg_mvp1(
     >     work(tmp),work(dir),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(adr),
     >     vec_inf,'adr$',iter)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ap$',iter,.true.)
      if (tptip) call pd1i('CGNE mat x search$',iter)
      call cg_mvp1(
     >     work(adr),work(tmp),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.true.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(adr),
     >     vec_inf,'atdr$',iter)

C================
C     Compute pAp inner products.
C================

      call cddot(work(dir),work(adr),vec_inf,
     >     work(dots_stack),maxdot,1,comm_context,'pAp dot$')
      call ddotv(pap,work(dots_stack),1,comm_context)
      call addflp(2*lvs)
      if (tstp) call pd1d('pap inprod$',pap)
      alpha = alpha / pap
      if (tstp) call pd1d('alpha:$',alpha)

      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after p inprods$',iter,.true.)

C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

C     Coupled two-term recurrences
C     gradient update using matrix times search direction
C----
      call axby(work(its),1.d0,work(its),
     >     -alpha,work(dir),vs)
      it_update_loc = dir
      it_update_mul = alpha
      call axby(work(grd),1.d0,work(grd),
     >     -alpha,work(adr),vs)

      if (vt(iter)) then
         call dump_vector(work(grd),vec_inf,'grd$',iter+1)
         call dump_vector(work(its),vec_inf,'itr$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',iter,.true.)

C     Calculate the exact residual (norm) for trace

C----
      if (plot_true_res .or. ts) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.true.,
     >        iter,lvs,ddum, ts,tp)
         grad_hist(iter+1,2) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

C     Iteration Loop End for CGNE
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_cgne_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_cgne_work(work,
     >     its,grd,hrd,dir,adr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,dir,adr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     vec_inf(*),mat_con(*),vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)

      its = alcate(vs,'iterates$',work)
      grd = alcate(vs,'right gradient$',work)
      if (no_prec) then
         hrd = grd
      else
         hrd = alcate(vs,'right Cinv gradient$',work)
      endif
      dir = alcate(vs,'right search direction$',work)
      adr = alcate(vs,'right A search$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      

      return
      end
C----------------------------------------------------------------
      subroutine bicg_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     iter, idum1, vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     it_update_mul, stop_tester,stop_testee,normg1,
     >     xtx,xmx,gtg,hth, pap,
     >     rr_rr_quot,alpha

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.1 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 2
      else
         nhist = 2-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_bicg_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      pap = 0.d0
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvzero(work(dir),vec_inf)
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',iter+1)
      call vvcopy(work(grdl),work(grd),vs)
      if (vt(iter)) call dump_vector
     >     (work(grdl),vec_inf, 'grdl$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,2),
     >     te, plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',iter,tstp)

C     Iteration Loop for BICG
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
      if (no_prec) then
         hrd = grd
         hrdl = grdl
         goto 11
      endif

      if (tstp) call pd0('Solve preconditioner$')
      call solve_preconditioner(work(hrd),
     >     work(grd),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), iter+1, 0,tptip,vt(iter))
      if (vt(iter)) call dump_vector(work(hrd),
     >     vec_inf,'hrd$',iter+1)
      call solve_preconditioner(work(hrdl),
     >     work(grdl),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), iter+1, 1,tptip,vt(iter))
      if (vt(iter)) call dump_vector(work(hrdl),
     >     vec_inf,'hrdl$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after prec solve$',iter,.true.)
 11   continue
C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_bicg_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots5(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grdl),gh_dotloc,
     >        'gth$',work(gh_dots+iter),
     >     n_hh_dots, work(hrd),hh_dotloc, 'hth$',hth,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',iter,.true.)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
      if (iter.gt.0) then
         rr_rr_quot = work(gh_dots+iter) / work(gh_dots+iter-1)
         if (tstp) call pd1d('Beta:$',rr_rr_quot)
      endif
      work(norms+iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.1 .and. lda_hist.gt.iter) then
            grad_hist(iter+1,1) = sqrt(gtg)
            call pt1i1d('Iter:$',iter, '|g|:$',sqrt(gtg))
         endif
      endif

C================
C     Update search directions
C================

      if (iter.eq.0) then
         if (tptip) call pd0('Copy hrd -> dir$')
         call vvcopy(work(dir),work(hrd), vs)
         call vvcopy(work(dirl), work(hrdl), vs)
      else
         call axby(work(dir),
     >        1.d0,work(hrd),rr_rr_quot,work(dir),vs)
         call axby(work(dirl),
     >        1.d0,work(hrdl),rr_rr_quot,work(dirl),vs)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after dir update$',iter,.true.)
      endif
      if (vt(iter)) then
         call dump_vector(work(dir),vec_inf,'dir$',iter+1)
         call dump_vector(work(dirl),vec_inf,'dirl$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after dir update$',iter,.true.)

C================
C     End of an iteration
C================

      if (iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = iter
      results(4) = stop_testee
      results(5) = normg1
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      plot_true_res = plot_true_res.and.lda_hist.gt.iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',iter,tstp)
      if (tetp) call pt1i('Iteration:$',iter)
      
C================
C     Matrix times search direction
C----
      call bvcler(work(dir),vec_inf)
      if (tptip) call pd1i('Matrix times search$',iter)
      call cg_mvp2(
     >     work(adr),work(dir),vec_inf,
     >     work(adrl),work(dirl),.true.,work(tmp),
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(adr),
     >     vec_inf,'adr$',iter)
      if (vt(iter)) call dump_vector(work(adrl),
     >     vec_inf,'adrl$',iter)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ap$',iter,.true.)

C================
C     Compute pAp inner products.
C================

      call cddot(work(dirl),work(adr),vec_inf,
     >     work(dots_stack),maxdot,1,comm_context,'pAp dot$')
      call ddotv(pap,work(dots_stack),1,comm_context)
      call addflp(2*lvs)
      if (tstp) call pd1d('pap inprod$',pap)
      alpha = alpha / pap
      if (tstp) call pd1d('alpha:$',alpha)

      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after p inprods$',iter,.true.)

C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

C     Coupled two-term recurrences
C     gradient update using matrix times search direction
C----
      call axby(work(its),1.d0,work(its),
     >     -alpha,work(dir),vs)
      it_update_loc = dir
      it_update_mul = alpha
      call axby(work(grd),1.d0,work(grd),
     >     -alpha,work(adr),vs)
      call axby(work(grdl),1.d0,work(grdl),
     >     -alpha,work(adrl),vs)

      if (vt(iter)) then
         call dump_vector(work(grd),vec_inf,'grd$',iter+1)
         call dump_vector(work(grdl),vec_inf,'grdl$',iter+1)
         call dump_vector(work(its),vec_inf,'itr$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',iter,.true.)

C     Calculate the exact residual (norm) for trace

C----
      if (plot_true_res .or. ts) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        iter,lvs,ddum, ts,tp)
         grad_hist(iter+1,2) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

C     Iteration Loop End for BICG
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_bicg_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_bicg_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     vec_inf(*),mat_con(*),vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)

      its = alcate(vs,'iterates$',work)
      grd = alcate(vs,'right gradient$',work)
      if (no_prec) then
         hrd = grd
      else
         hrd = alcate(vs,'right Cinv gradient$',work)
      endif
      dir = alcate(vs,'right search direction$',work)
      adr = alcate(vs,'right A search$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      grdl = alcate(vs,'left gradient$',work)
      if (no_prec) then
         hrdl = grdl
      else
         hrdl = alcate(vs,'left Cinv gradient$',work)
      endif
      dirl = alcate(vs,'left search$',work)
      adrl = alcate(vs,'left A search$',work)
      

      return
      end
C----------------------------------------------------------------
      subroutine qmr_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     iter, idum1, vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     err_guess,
     >     it_update_mul, stop_tester,stop_testee,normg1,
     >     xtx,xmx,gtg,hth, pap,
     >     rr_rr_quot,alpha

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.2 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 3
      else
         nhist = 3-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_qmr_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      pap = 0.d0
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvzero(work(dir),vec_inf)
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',iter+1)
      call vvcopy(work(grdl),work(grd),vs)
      if (vt(iter)) call dump_vector
     >     (work(grdl),vec_inf, 'grdl$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,3),
     >     .true., plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',iter,tstp)

C     Iteration Loop for QMR
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
      if (no_prec) then
         hrd = grd
         hrdl = grdl
         goto 11
      endif

      call vvcopy(work(ohrd),work(hrd),vs)
      if (tstp) call pd0('Solve preconditioner$')
      call solve_preconditioner(work(hrd),
     >     work(grd),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), iter+1, 0,tptip,vt(iter))
      if (vt(iter)) call dump_vector(work(hrd),
     >     vec_inf,'hrd$',iter+1)
      call solve_preconditioner(work(hrdl),
     >     work(grdl),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), iter+1, 1,tptip,vt(iter))
      if (vt(iter)) call dump_vector(work(hrdl),
     >     vec_inf,'hrdl$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after prec solve$',iter,.true.)
 11   continue
C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_qmr_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots5(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grdl),gh_dotloc,
     >        'gth$',work(gh_dots+iter),
     >     n_hh_dots, work(hrd),hh_dotloc, 'hth$',hth,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',iter,.true.)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
         if (iter.eq.0) work(lsqnorms) = normg1
      if (iter.gt.0) then
         rr_rr_quot = work(gh_dots+iter) / work(gh_dots+iter-1)
         if (tstp) call pd1d('Beta:$',rr_rr_quot)
         work(hesfac_ucol) = rr_rr_quot
      endif
      work(norms+iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.2 .and. lda_hist.gt.iter) then
            grad_hist(iter+1,2) = sqrt(gtg)
         endif
      endif

C====
C     Update the Hessenberg QR factorization, and update the iterate
C----
      if (iter.gt.0) then
         call hessqr(work(hssbrg_q),work(hssbrg),
     >        hess_ncol,hess_nupdi, work(norms),normg1,
     >        work(hess_Qrow),work(hess_Rcol),err_guess,
     >        iter,iter, tstp)
         work(lsqnorms+iter) = sqrt(err_guess)
         stop_testee = min(stop_testee,sqrt(err_guess))
         if (nhist.ge.1 .and. lda_hist.gt.iter)
     >        grad_hist(iter,1) = sqrt(err_guess)
         if (tetp)
     >        call pt1i1d('Iter:$',iter,'|g| (est)$',err_guess)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after hess qr$',iter,.true.)
         call requpdate(work(minres_grd),'Minres grd updt$',
     >        mrs_wd,vs,lvs,iter,vec_inf,
     >        1.d0,work(ohrd),work(hess_Rcol),iter,
     >        tptip,it_update_loc)
         it_update_loc = minres_grd+it_update_loc-1
         if (vt(iter)) call dump_vector(work(it_update_loc),
     >        vec_inf,'itupd$',iter)
         it_update_mul = -work(hess_Qrow+iter-1)
         call bvaxby(work(its),vec_inf, 1.d0,work(its),
     >        it_update_mul,work(it_update_loc))
         if (vt(iter)) call dump_vector(
     >        work(its),vec_inf,'itr$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after minres update$',iter,.true.)
      endif

C================
C     Update the Hessenberg matrix
C     with the coefficients just calculated
C================

      if (iter.gt.0) call split_recur_hescol_u(work(hssbrg),
     >     work(hesfac_ucol), hess_ncol,hess_nupdi,iter+1,tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after hess update$',iter,.true.)

C================
C     Update search directions
C================

      if (iter.eq.0) then
         if (tptip) call pd0('Copy hrd -> dir$')
         call vvcopy(work(dir),work(hrd), vs)
         call vvcopy(work(dirl), work(hrdl), vs)
      else
         call axby(work(dir),
     >        1.d0,work(hrd),rr_rr_quot,work(dir),vs)
         call axby(work(dirl),
     >        1.d0,work(hrdl),rr_rr_quot,work(dirl),vs)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after dir update$',iter,.true.)
      endif
      if (vt(iter)) then
         call dump_vector(work(dir),vec_inf,'dir$',iter+1)
         call dump_vector(work(dirl),vec_inf,'dirl$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after dir update$',iter,.true.)

C================
C     End of an iteration
C================

      if (iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = iter
      results(4) = stop_testee
      results(5) = normg1
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      plot_true_res = plot_true_res.and.lda_hist.gt.iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',iter,tstp)
      if (tetp) call pt1i('Iteration:$',iter)
      
C================
C     Matrix times search direction
C----
      call bvcler(work(dir),vec_inf)
      if (tptip) call pd1i('Matrix times search$',iter)
      call cg_mvp2(
     >     work(adr),work(dir),vec_inf,
     >     work(adrl),work(dirl),.true.,work(tmp),
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(adr),
     >     vec_inf,'adr$',iter)
      if (vt(iter)) call dump_vector(work(adrl),
     >     vec_inf,'adrl$',iter)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ap$',iter,.true.)

C================
C     Compute pAp inner products.
C================

      call cddot(work(dirl),work(adr),vec_inf,
     >     work(dots_stack),maxdot,1,comm_context,'pAp dot$')
      call ddotv(pap,work(dots_stack),1,comm_context)
      call addflp(2*lvs)
      if (tstp) call pd1d('pap inprod$',pap)
      alpha = alpha / pap
      if (tstp) call pd1d('alpha:$',alpha)

C     Update the Hessenberg matrix
C     with the alpha just calculated
C----
      call split_recur_hescol_d(work(hssbrg),alpha,
     >     hess_ncol,hess_nupdi,iter,tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after p inprods$',iter,.true.)

C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

C     Save the old gradient for the minres update
C----
      if (no_prec) call vvcopy(work(ohrd),work(grd),vs)

C     Coupled two-term recurrences
C     gradient update using matrix times search direction
C----
      call axby(work(grd),1.d0,work(grd),
     >     -alpha,work(adr),vs)
      call axby(work(grdl),1.d0,work(grdl),
     >     -alpha,work(adrl),vs)

      if (vt(iter)) then
         call dump_vector(work(grd),vec_inf,'grd$',iter+1)
         call dump_vector(work(grdl),vec_inf,'grdl$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',iter,.true.)

C     Calculate the exact residual (norm) for trace
C----
      if (plot_true_res .or. ts) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        iter,lvs,ddum, ts,tp)
         grad_hist(iter+1,3) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

C     Iteration Loop End for QMR
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_qmr_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_qmr_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,adrl,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hesfac_ucol,
     >     hess_ncol,hess_nupdi,
     >     vec_inf(*),mat_con(*),vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----

C     How many upper diagonals does the Hessenberg matrix have?
C----
      hess_nupdi = 1

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)
      lsqnorms = alcate(max_iter+1,'LSQ Gradient norms$',work)

      its = alcate(vs,'iterates$',work)
      grd = alcate(vs,'right gradient$',work)
      if (no_prec) then
         hrd = grd
      else
         hrd = alcate(vs,'right Cinv gradient$',work)
      endif
      dir = alcate(vs,'right search direction$',work)
      adr = alcate(vs,'right A search$',work)
      hesfac_ucol = alcate(hess_nupdi,'col of U from hesfac$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      grdl = alcate(vs,'left gradient$',work)
      if (no_prec) then
         hrdl = grdl
      else
         hrdl = alcate(vs,'left Cinv gradient$',work)
      endif
      dirl = alcate(vs,'left search$',work)
      adrl = alcate(vs,'left A search$',work)
      
C     For minimal residual methods allocate Q and R;
C     for Bicg three vectors are enough,
C     for unsymmetric cg this can be much more.
C---- 
      hess_ncol = max_iter+2
      hssbrg    = alcate(hess_ncol*hess_ncol,
     >     'band hessenberg matrix$',work)
      hssbrg_q = alcate(hess_ncol*hess_ncol,
     >     'hessenberg matrix Q$',work)
      hess_Qrow = alcate(hess_ncol+1,
     >     'first row of hessenberg matrix$',work)
      hess_Rcol = alcate(hess_ncol+1,
     >     'current column of hessenberg R$',work)
      mrs_wd = hess_nupdi+2
      minres_grd = alcate(mrs_wd*vs,'Min res combinations$',work)
      ohrd = alcate(vs,'One old gradient$',work)


      return
      end
C----------------------------------------------------------------
      subroutine nscg_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,restart,trunc,mod_gramsch,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),restart,trunc,mod_gramsch,
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,do_restart,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,grd_wd,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     iter,r_iter,last_start,
     >     idum1,grd_sh,grd_sh_old,cur_grd,
     >     vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     it_update_mul, stop_tester,stop_testee,normg1,normg0,
     >     xtx,xmx,gtg,hth,
     >     rr_rr_quot,alpha

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.1 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 2
      else
         nhist = 2-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_nscg_work(work,
     >     its,grd,hrd,grd_wd,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     no_prec,restart,trunc,mod_gramsch,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      r_iter = 0
      last_start = 0
      do_restart = .false.
      grd_sh = 0
      grd_sh_old = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     r_iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(r_iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',r_iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',r_iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,2),
     >     te, plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',r_iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',r_iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',r_iter,tstp)

C     Iteration Loop for NSCG
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
      if (no_prec) then
         call bvcopy(work(hrd+grd_sh),vec_inf,work(grd+grd_sh))
         goto 11
      endif

      if (tstp) call pd0('Solve preconditioner$')
      call solve_preconditioner(work(hrd+grd_sh),
     >     work(grd+grd_sh),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), r_iter+1, 0,tptip,vt(r_iter))
      if (vt(r_iter)) call dump_vector(work(hrd+grd_sh),
     >     vec_inf,'hrd$',r_iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after prec solve$',r_iter,.true.)
 11   continue
C====
C     Matrix times gradient
C----
      if (tptip) call pd1i('Matrix times hrd$',r_iter+1)
      call cg_mvp1(
     >     work(agr+grd_sh),work(hrd+grd_sh),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(r_iter)) call dump_vector(work(agr+grd_sh),
     >     vec_inf,'agr$',r_iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ar$',r_iter,.true.)

C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_nscg_dotlocs(iter+1,trunc,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots5(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its+grd_sh),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd+grd_sh),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grd+grd_sh),gh_dotloc,
     >        'gth$',work(gh_dots+r_iter),
     >     n_hh_dots, work(hrd+grd_sh),hh_dotloc, 'hth$',hth,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',r_iter,.true.)

C     Orthogonalise the new vector
C----
      call nscg_vec_orthize(
     >     work(grd),work(hrd),work(agr+grd_sh), grd_wd,
     >     work(its),work(hrd+grd_sh),
     >     n_rar_dots,rar_dotloc,
     >     'rark$',work(rark_dots),work(hescol),
     >     work(tmp1),work(tmp2),
     >     work(gh_dots),mod_gramsch,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, iter+1,last_start,cur_grd,vt(r_iter),tstp)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+r_iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
      if (r_iter.eq.0) normg0 = sqrt(gtg)
      if (iter.gt.0) then
      endif
      work(norms+r_iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     r_iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+r_iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.1 .and. lda_hist.gt.r_iter) then
            grad_hist(r_iter+1,1) = sqrt(gtg)
            call pt1i1d('Iter:$',r_iter, '|g|:$',sqrt(gtg))
         endif
      endif

C================
C     Update the Hessenberg matrix
C     with the coefficients just calculated
C================

      call vvcopy(work(hescol_old),work(hescol),3)
      call full_recur_hescol(work(hescol),
     >     work(rark_dots),n_rar_dots,work(gh_dots),
     >     work(hssbrg), r_iter+1,iter+1,
     >     hess_nupdi+2,hess_ncol, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after hess update$',r_iter,.true.)

C================
C     End of an iteration
C================

      if (r_iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (r_iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = r_iter
      results(4) = stop_testee
      results(5) = normg0
      call true_x_norm(ddum,
     >     work(tmp),work(its+grd_sh),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      r_iter = r_iter+1
      do_restart = restart.gt.0 .and. mod(iter,max(restart,1)).eq.0
      plot_true_res = plot_true_res.and.lda_hist.gt.r_iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',r_iter,tstp)
      if (tetp) call pt1i('Iteration:$',r_iter)
      
C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

      if (vt(r_iter)) then
         call dump_vector(work(grd+grd_sh),vec_inf,'grd$',r_iter+1)
         call dump_vector(work(its+grd_sh),vec_inf,'itr$',r_iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',r_iter,.true.)

C     Calculate the exact residual (norm) for trace

C     or periodical restart
C----
      if (plot_true_res .or. ts .or. do_restart) then
         call calc_true_res(work(tmp1),work(tmp2), work(its+grd_sh),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        r_iter,lvs,ddum, ts,tp)
         grad_hist(r_iter+1,2) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

      if (do_restart) then
         if (tptp.or.tptip)
     >        call pt1i('Restart process at iter$',r_iter)
         if (grd_sh.ne.0) then
            call vvcopy(work(its),work(its+cur_grd-1),vs)
            call vvcopy(work(grd),work(grd+cur_grd-1),vs)
         endif
         last_start = last_start+iter
         iter = 0
      endif

      grd_sh_old = grd_sh
      grd_sh = mod(iter,grd_wd)*vs

C     Iteration Loop End for NSCG
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its+grd_sh))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_nscg_dotlocs(
     >     iter,trunc,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     iter,trunc,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots
      rar_dotloc = xu_dotloc + 1
      n_rar_dots = max(1,min(iter,trunc))

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_nscg_work(work,
     >     its,grd,hrd,grd_wd,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     no_prec,restart,trunc,mod_gramsch,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,grd_wd,agr,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     vec_inf(*),mat_con(*),restart,trunc,mod_gramsch,vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----

C     How many upper diagonals does the Hessenberg matrix have?
C----
      hess_nupdi = max(1,trunc-2)

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)

      grd_wd = min(trunc+1,max_iter+1)
      its = alcate(grd_wd*vs,'iterates$',work)
      grd = alcate(grd_wd*vs,'right gradient$',work)
      agr = alcate(grd_wd*vs,'right A grad$',work)
      hrd = alcate(grd_wd*vs,'right Cinv gradient$',work)
      hescol = alcate(hess_nupdi+2,'col of U of hes$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      
C     Save old inner products
C---- 
      rar_dots = alcate(max_iter+1,'vn A vn dots$',work)
      rark_dots = alcate(max_iter+1,'vn A vk dots$',work)

C     For minimal residual methods allocate Q and R;
C     for Bicg three vectors are enough,
C     for unsymmetric cg this can be much more.
C---- 
      hess_ncol = max_iter+2
      hssbrg    = alcate(hess_ncol*hess_ncol,
     >     'band hessenberg matrix$',work)
      hssbrg_q = alcate(hess_ncol*hess_ncol,
     >     'hessenberg matrix Q$',work)
      hess_Qrow = alcate(hess_ncol+1,
     >     'first row of hessenberg matrix$',work)
      hess_Rcol = alcate(hess_ncol+1,
     >     'current column of hessenberg R$',work)


      return
      end
C----------------------------------------------------------------
C     Orthogonalize a vector against a series of other vectors.
C----------------------------------------------------------------
      subroutine nscg_vec_orthize(
     >     seq,oseq,newv,wid, aseq,anewv, n6,p6,
     >     s6,v6,vv6,vv6t, tmp, pap6,mgs,
     >     vec_inf,dots,maxdot,comm_context,
     >     vs,lvs, iter,last_start,tarloc,dump,trace
     >     )

C     Arguments
C----
      integer
     >     n6, wid,mgs, p6,
     >     vec_inf(*),vs,lvs, iter,last_start,tarloc,
     >     maxdot,comm_context(*)
      double precision
     >     seq(vs,*),oseq(vs,*),newv(*),aseq(vs,*),anewv(*),
     >     v6(*),vv6(*),vv6t(*),dots(*), pap6(*),tmp(*)
      character*(*) s6
      logical dump,trace

C     Local
C----
      integer cnt,loc,pass,dot_bot,idum
      character*85 nam
      double precision sum

      if (trace) call pd1i1i('Orth: @iter$',iter,'#vav dots=$',n6)
      call nulv(vv6,n6+1)
      dot_bot = 0*p6+1
      pass = 1
      tarloc = 1
 100  continue
      do 10 cnt=1,n6
         loc = 1+mod(iter-cnt,wid)
         if (pass.eq.1) then
            call cddot(oseq(1,loc),newv,vec_inf,
     >           dots,maxdot,dot_bot+cnt-1,comm_context,s6)
         else
            call cddot(oseq(1,loc),seq(1,tarloc),vec_inf,
     >           dots,maxdot,dot_bot+cnt-1,comm_context,s6)
         endif
 10   continue

      if (n6.gt.0) call ddotvs(v6,n6,dots,dot_bot,comm_context)
      if (n6.gt.0.and.trace) call pdar(s6,v6,n6)
      sum = 0.d0
      do 30 cnt=1,n6
         vv6t(cnt+1) = v6(cnt) / pap6(last_start+iter+1-cnt)
         vv6(cnt+1) = vv6(cnt+1) + vv6t(cnt+1)
         sum = sum+vv6t(cnt+1)
 30   continue
      vv6t(1) = -sum
      vv6(1) = vv6(1)+vv6t(1)
      if (trace.and.pass.eq.1) call pdar('MGS0 hescol$',vv6,n6+1)
      if (trace.and.pass.gt.1) call pdar('MGS0 hescol upd$',vv6t,n6+1)
      if (mgs.eq.0) then
         nam = 'grd$'
         call requpdate(seq, 'Update gradient (ful rec)$',
     >        wid,vs,lvs,iter+1,vec_inf,
     >        1.d0,newv,vv6,n6+1,trace,tarloc)
         call requpdate(aseq, 'Update iterate (ful rec)$',
     >        wid,vs,lvs,iter+1,vec_inf,
     >        1.d0,anewv,vv6,n6+1,trace,idum)
      else if (pass.eq.1) then
         call vvcopy(tmp,newv,vs)
         nam = 'grd$'
         call requpdate(seq,
     >        'Update gradient (ful rec)$',
     >        wid,vs,lvs,iter+1,vec_inf,
     >        1.d0,tmp,vv6,n6+1,trace,tarloc)
      else
         nam = 'grdp$'
         call requpdate(seq,
     >        'Update gradient (ful rec)$',
     >        wid,vs,lvs,iter+1,vec_inf,
     >        1.d0,tmp,vv6,n6+1,trace,tarloc)
      endif
      if (dump) call dump_vector(seq(1,1+(tarloc-1)/vs),
     >     vec_inf,nam,iter+1+last_start)
      if (mgs.ne.0 .and. pass.eq.1) tarloc = 1+(tarloc-1)/vs
      if (mgs.gt.0 .and. pass.eq.1) then
         pass = 2
         goto 100
      endif

      call addflp(pass*n6*lvs)

      return
      end
C----------------------------------------------------------------
      subroutine gmres_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,restart,trunc,mod_gramsch,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),restart,trunc,mod_gramsch,
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,do_restart,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,grd_wd,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     iter,r_iter,last_start,
     >     idum1,grd_sh,grd_sh_old,cur_grd,
     >     vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     err_guess,
     >     it_update_mul, stop_tester,stop_testee,normg1,normg0,
     >     xtx,xmx,gtg,hth,
     >     rr_rr_quot,alpha

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.2 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 3
      else
         nhist = 3-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_gmres_work(work,
     >     its,grd,hrd,grd_wd,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     no_prec,restart,trunc,mod_gramsch,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      r_iter = 0
      last_start = 0
      do_restart = .false.
      grd_sh = 0
      grd_sh_old = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     r_iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(r_iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',r_iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',r_iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,3),
     >     .true., plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',r_iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',r_iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',r_iter,tstp)

C     Iteration Loop for GMRES
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
      if (no_prec) then
         call bvcopy(work(hrd+grd_sh),vec_inf,work(grd+grd_sh))
         goto 11
      endif

      if (tstp) call pd0('Solve preconditioner$')
      call solve_preconditioner(work(hrd+grd_sh),
     >     work(grd+grd_sh),vec_inf,
     >     preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >     mat_con,comm_context,
     >     work(tmp1),work(tmp2), r_iter+1, 0,tptip,vt(r_iter))
      if (vt(r_iter)) call dump_vector(work(hrd+grd_sh),
     >     vec_inf,'hrd$',r_iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after prec solve$',r_iter,.true.)
 11   continue
C====
C     Matrix times gradient
C----
      if (tptip) call pd1i('Matrix times hrd$',r_iter+1)
      call cg_mvp1(
     >     work(agr+grd_sh),work(hrd+grd_sh),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(r_iter)) call dump_vector(work(agr+grd_sh),
     >     vec_inf,'agr$',r_iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ar$',r_iter,.true.)

C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_gmres_dotlocs(iter+1,trunc,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots5(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd+grd_sh),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grd+grd_sh),gh_dotloc,
     >        'gth$',work(gh_dots+r_iter),
     >     n_hh_dots, work(hrd+grd_sh),hh_dotloc, 'hth$',hth,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',r_iter,.true.)

C     Orthogonalise the new vector
C----
      call gmres_vec_orthize(
     >     work(grd),work(hrd),work(agr+grd_sh), grd_wd,
     >     n_rar_dots,rar_dotloc,
     >     'rark$',work(rark_dots),work(hescol),
     >     work(tmp1),work(tmp2),
     >     work(gh_dots),mod_gramsch,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, iter+1,last_start,cur_grd,vt(r_iter),tstp)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+r_iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
      if (r_iter.eq.0) normg0 = sqrt(gtg)
         if (r_iter.eq.0) work(lsqnorms) = normg1
      if (iter.gt.0) then
      endif
      work(norms+r_iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     r_iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+r_iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.2 .and. lda_hist.gt.r_iter) then
            grad_hist(r_iter+1,2) = sqrt(gtg)
         endif
      endif

C====
C     Update the Hessenberg QR factorization, and update the iterate
C----
      if (iter.gt.0) then
         call hessqr(work(hssbrg_q),work(hssbrg),
     >        hess_ncol,hess_nupdi, work(norms),normg1,
     >        work(hess_Qrow),work(hess_Rcol),err_guess,
     >        iter,r_iter, tstp)
         work(lsqnorms+r_iter) = sqrt(err_guess)
         stop_testee = min(stop_testee,sqrt(err_guess))
         if (nhist.ge.1 .and. lda_hist.gt.r_iter)
     >        grad_hist(r_iter,1) = sqrt(err_guess)
         if (tetp)
     >        call pt1i1d('Iter:$',r_iter,'|g| (est)$',err_guess)
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >        'Memtest after hess qr$',r_iter,.true.)
         call requpdate(work(minres_grd),'Minres grd updt$',
     >        mrs_wd,vs,lvs,iter,vec_inf,
     >        1.d0,work(hrd+grd_sh_old),work(hess_Rcol),iter,
     >        tptip,it_update_loc)
         it_update_loc = minres_grd+it_update_loc-1
         if (vt(iter)) call dump_vector(work(it_update_loc),
     >        vec_inf,'itupd$',r_iter)
         it_update_mul = -work(hess_Qrow+iter-1)
         call bvaxby(work(its),vec_inf, 1.d0,work(its),
     >        it_update_mul,work(it_update_loc))
         if (vt(iter)) call dump_vector(
     >        work(its),vec_inf,'itr$',r_iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after minres update$',r_iter,.true.)
      endif

C================
C     Update the Hessenberg matrix
C     with the coefficients just calculated
C================

      call vvcopy(work(hescol_old),work(hescol),3)
      call full_recur_hescol(work(hescol),
     >     work(rark_dots),n_rar_dots,work(gh_dots),
     >     work(hssbrg), r_iter+1,iter+1,
     >     hess_nupdi+2,hess_ncol, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after hess update$',r_iter,.true.)

C================
C     End of an iteration
C================

      if (r_iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (r_iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = r_iter
      results(4) = stop_testee
      results(5) = normg0
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      r_iter = r_iter+1
      do_restart = restart.gt.0 .and. mod(iter,max(restart,1)).eq.0
      plot_true_res = plot_true_res.and.lda_hist.gt.r_iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',r_iter,tstp)
      if (tetp) call pt1i('Iteration:$',r_iter)
      
C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

C     Save the old gradient for the minres update
C----

      if (vt(r_iter)) then
         call dump_vector(work(grd+grd_sh),vec_inf,'grd$',r_iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',r_iter,.true.)

C     Calculate the exact residual (norm) for trace
C     or periodical restart
C----
      if (plot_true_res .or. ts .or. do_restart) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        r_iter,lvs,ddum, ts,tp)
         grad_hist(r_iter+1,3) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

      if (do_restart) then
         if (tptp.or.tptip)
     >        call pt1i('Restart process at iter$',r_iter)
         call vvcopy(work(grd),work(tmp1),vs)
         last_start = last_start+iter
         iter = 0
      endif

      grd_sh_old = grd_sh
      grd_sh = mod(iter,grd_wd)*vs

C     Iteration Loop End for GMRES
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_gmres_dotlocs(
     >     iter,trunc,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     iter,trunc,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     rar_dotloc,n_rar_dots,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots
      rar_dotloc = xu_dotloc + 1
      n_rar_dots = max(1,min(iter,trunc))

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_gmres_work(work,
     >     its,grd,hrd,grd_wd,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     no_prec,restart,trunc,mod_gramsch,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,grd_wd,agr,
     >     ohrd,minres_grd,mrs_wd,lsqnorms,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots, rar_dots,rark_dots,
     >     hssbrg,hssbrg_q, hess_Qrow,hess_Rcol,
     >     hescol,hescol_old, hescol_wd,
     >     hess_ncol,hess_nupdi,
     >     vec_inf(*),mat_con(*),restart,trunc,mod_gramsch,vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----

C     How many upper diagonals does the Hessenberg matrix have?
C----
      hess_nupdi = max(1,trunc-2)

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)
      lsqnorms = alcate(max_iter+1,'LSQ Gradient norms$',work)

      grd_wd = min(trunc+1,max_iter+1)
      its = alcate(vs,'iterates$',work)
      grd = alcate(grd_wd*vs,'right gradient$',work)
      agr = alcate(grd_wd*vs,'right A grad$',work)
      hrd = alcate(grd_wd*vs,'right Cinv gradient$',work)
      hescol = alcate(hess_nupdi+2,'col of U of hes$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      
C     Save old inner products
C---- 
      rar_dots = alcate(max_iter+1,'vn A vn dots$',work)
      rark_dots = alcate(max_iter+1,'vn A vk dots$',work)

C     For minimal residual methods allocate Q and R;
C     for Bicg three vectors are enough,
C     for unsymmetric cg this can be much more.
C---- 
      hess_ncol = max_iter+2
      hssbrg    = alcate(hess_ncol*hess_ncol,
     >     'band hessenberg matrix$',work)
      hssbrg_q = alcate(hess_ncol*hess_ncol,
     >     'hessenberg matrix Q$',work)
      hess_Qrow = alcate(hess_ncol+1,
     >     'first row of hessenberg matrix$',work)
      hess_Rcol = alcate(hess_ncol+1,
     >     'current column of hessenberg R$',work)
      mrs_wd = hess_nupdi+2
      minres_grd = alcate(mrs_wd*vs,'Min res combinations$',work)


      return
      end
C----------------------------------------------------------------
C     Orthogonalize a vector against a series of other vectors.
C----------------------------------------------------------------
      subroutine gmres_vec_orthize(
     >     seq,oseq,newv,wid, n6,p6,
     >     s6,v6,vv6,vv6t, tmp, pap6,mgs,
     >     vec_inf,dots,maxdot,comm_context,
     >     vs,lvs, iter,last_start,tarloc,dump,trace
     >     )

C     Arguments
C----
      integer
     >     n6, wid,mgs, p6,
     >     vec_inf(*),vs,lvs, iter,last_start,tarloc,
     >     maxdot,comm_context(*)
      double precision
     >     seq(vs,*),oseq(vs,*),newv(*),
     >     v6(*),vv6(*),vv6t(*),dots(*), pap6(*),tmp(*)
      character*(*) s6
      logical dump,trace

C     Local
C----
      integer cnt,loc,pass,dot_bot,idum
      character*85 nam
      double precision sum

      if (trace) call pd1i1i('Orth: @iter$',iter,'#vav dots=$',n6)
      call nulv(vv6,n6+1)
      dot_bot = 0*p6+1
      pass = 1
      tarloc = 1
 100  continue
      do 10 cnt=1,n6
         loc = 1+mod(iter-cnt,wid)
         if (pass.eq.1) then
            call cddot(oseq(1,loc),newv,vec_inf,
     >           dots,maxdot,dot_bot+cnt-1,comm_context,s6)
         else
            call cddot(oseq(1,loc),seq(1,tarloc),vec_inf,
     >           dots,maxdot,dot_bot+cnt-1,comm_context,s6)
         endif
 10   continue

      if (n6.gt.0) call ddotvs(v6,n6,dots,dot_bot,comm_context)
      if (n6.gt.0.and.trace) call pdar(s6,v6,n6)
      sum = 0.d0
      do 30 cnt=1,n6
         vv6t(cnt+1) = v6(cnt) / pap6(last_start+iter+1-cnt)
         vv6(cnt+1) = vv6(cnt+1) + vv6t(cnt+1)
         sum = sum+vv6t(cnt+1)
 30   continue
      vv6t(1) = -sum
      vv6(1) = vv6(1)+vv6t(1)
      if (trace.and.pass.eq.1) call pdar('MGS0 hescol$',vv6,n6+1)
      if (trace.and.pass.gt.1) call pdar('MGS0 hescol upd$',vv6t,n6+1)
      if (mgs.eq.0) then
         nam = 'grd$'
         call requpdate(seq, 'Update gradient (ful rec)$',
     >        wid,vs,lvs,iter+1,vec_inf,
     >        1.d0,newv,vv6,n6+1,trace,tarloc)
      else if (pass.eq.1) then
         call vvcopy(tmp,newv,vs)
         nam = 'grd$'
         call requpdate(seq,
     >        'Update gradient (ful rec)$',
     >        wid,vs,lvs,iter+1,vec_inf,
     >        1.d0,tmp,vv6,n6+1,trace,tarloc)
      else
         nam = 'grdp$'
         call requpdate(seq,
     >        'Update gradient (ful rec)$',
     >        wid,vs,lvs,iter+1,vec_inf,
     >        1.d0,tmp,vv6,n6+1,trace,tarloc)
      endif
      if (dump) call dump_vector(seq(1,1+(tarloc-1)/vs),
     >     vec_inf,nam,iter+1+last_start)
      if (mgs.ne.0 .and. pass.eq.1) tarloc = 1+(tarloc-1)/vs
      if (mgs.gt.0 .and. pass.eq.1) then
         pass = 2
         goto 100
      endif

      call addflp(pass*n6*lvs)

      return
      end
C----------------------------------------------------------------
      subroutine cgs_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     iter, idum1,vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     it_update_mul, stop_tester,stop_testee,normg1,
     >     xtx,xmx,gtg,hth, pap,
     >     rr_rr_quot,alpha

C     Parameters for cgs and bcgs
C----
      integer
     >     cgs_rp,cgs_tg,cgs_mp,cgs_mt,
     >     cgs_xg,cgs_axg,cgs_xh,bcgs_xg,
     >     cgs_ti,cgs_xi,bcgs_xi

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.1 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 2
      else
         nhist = 2-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_cgs_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     cgs_xi,cgs_xg,cgs_axg,cgs_xh,bcgs_xg,bcgs_xi,
     >     cgs_rp, cgs_ti,cgs_tg,cgs_mp,cgs_mt,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      pap = 0.d0
      alpha = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvzero(work(dir),vec_inf)
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call nulv(work(hescol),3)
      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,2),
     >     .true., plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',iter,tstp)

C     Iteration Loop for CGS
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
C     For cgs / bicgs we save the first residual, in addition to
C     the first preconditioned residual
C----
      if (iter.eq.0) then
         call vvcopy(work(grdl),work(grd),vs)
         if (vt(iter)) call dump_vector
     >        (work(grdl),vec_inf,'grdl-sq$',iter+1)
      endif

      hrd = grd
      hrdl = grdl
C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_cgs_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots5(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grdl),gh_dotloc,
     >        'gth$',work(gh_dots+iter),
     >     n_hh_dots, work(hrd),hh_dotloc, 'hth$',hth,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',iter,.true.)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
      if (iter.gt.0) then
         rr_rr_quot = work(gh_dots+iter) / work(gh_dots+iter-1)
         if (tstp) call pd1d('Beta:$',rr_rr_quot)
      endif
      work(norms+iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.1 .and. lda_hist.gt.iter) then
            grad_hist(iter+1,1) = sqrt(gtg)
            call pt1i1d('Iter:$',iter, '|g|:$',sqrt(gtg))
         endif
      endif

C================
C     Update search directions
C================

C     Higher power methods search directions
C----
      if (iter.eq.0) then
         call vvcopy(work(cgs_rp),work(grd),vs)
         call vvcopy(work(dir),work(grd),vs)
      else
         call axby(work(cgs_rp),
     >        1.d0,work(grd),rr_rr_quot,work(cgs_tg),vs)
         call axby(work(tmp),
     >        1.d0,work(cgs_tg),rr_rr_quot,work(dir),vs)
         call axby(work(dir),
     >        1.d0,work(cgs_rp),rr_rr_quot,work(tmp),vs)
         call addflp(6*vs)
      endif
      if (no_prec) then
         cgs_mp = dir
      else
         call solve_preconditioner(work(cgs_mp),
     >        work(dir),vec_inf,preconditioner,prec_inf,
     >        matrix,mat_ptr,mat_idx,
     >        mat_con,comm_context,
     >        work(tmp1),work(tmp2),
     >        iter, 0,tptip,vt(iter))
      endif
      if (vt(iter)) then
         call dump_vector(work(cgs_mp),vec_inf,'dir$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after dir update$',iter,.true.)

C================
C     End of an iteration
C================

      if (iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = iter
      results(4) = stop_testee
      results(5) = normg1
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      plot_true_res = plot_true_res.and.lda_hist.gt.iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',iter,tstp)
      if (tetp) call pt1i('Iteration:$',iter)
      
C================
C     Matrix times search direction
C----
      call bvcler(work(cgs_mp),vec_inf)
      if (tptip) call pd1i('Matrix times search$',iter)
      call cg_mvp1(
     >     work(adr),work(cgs_mp),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(adr),
     >     vec_inf,'adr$',iter)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ap$',iter,.true.)

C================
C     Compute pAp inner products.
C================

      call cddot(work(hrdl),work(adr),vec_inf,
     >     work(dots_stack),maxdot,1,comm_context,'pAp dot$')
      call ddotv(pap,work(dots_stack),1,comm_context)
      call addflp(2*lvs)
      if (tstp) call pd1d('pap inprod$',pap)
      alpha = alpha / pap
      if (tstp) call pd1d('alpha:$',alpha)

      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after p inprods$',iter,.true.)

C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

      if (tptip) call pd0('Update gradient (split,pw)$')
      call axby(work(cgs_tg),1.d0,work(cgs_rp),
     >     -alpha,work(adr),vs)
      if (vt(iter)) call dump_vector(work(cgs_tg),
     >     vec_inf,'cgs-t$',iter+1)
      call axby(work(tmp),
     >     1.d0,work(cgs_rp),1.d0,work(cgs_tg),vs)
      if (no_prec) then
         cgs_mt = tmp
      else
         call solve_preconditioner(work(cgs_mt),
     >        work(tmp),vec_inf,
     >        preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >        mat_con,comm_context,
     >        work(tmp1),work(tmp2), iter,
     >        0,tptip,vt(iter))
      endif
      if (tptip) call pd1i('BCGS A cgs_tg$',iter+1)
      call cg_mvp1(
     >     work(cgs_axg),work(cgs_mt),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(cgs_axg),
     >     vec_inf,'Acgs_tg$',iter+1)
      call addflp(7*vs)
      call axby(work(grd),
     >     1.d0,work(grd),-alpha,work(cgs_axg),vs)
      call axby(work(its),
     >     1.d0,work(its),-alpha,work(cgs_mt),vs)
      it_update_loc = cgs_mt
      it_update_mul = alpha

      if (vt(iter)) then
         call dump_vector(work(grd),vec_inf,'grd$',iter+1)
         call dump_vector(work(its),vec_inf,'itr$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',iter,.true.)

C     Calculate the exact residual (norm) for trace

C----
      if (plot_true_res .or. ts) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        iter,lvs,ddum, ts,tp)
         grad_hist(iter+1,2) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

C     Iteration Loop End for CGS
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_cgs_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_cgs_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     cgs_xi,cgs_xg,cgs_axg,cgs_xh,bcgs_xg,bcgs_xi,
     >     cgs_rp, cgs_ti,cgs_tg,cgs_mp,cgs_mt,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     cgs_xg,cgs_axg,cgs_xh,bcgs_xg,
     >     cgs_rp,cgs_tg,cgs_mp,cgs_mt,
     >     cgs_xi,cgs_ti,bcgs_xi,
     >     vec_inf(*),mat_con(*),vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----
      integer pol_len,pol_b1,pol_b2

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)

      its = alcate(vs,'iterates$',work)
      grd = alcate(vs,'right gradient$',work)
      if (no_prec) then
         hrd = grd
      else
         hrd = alcate(vs,'right Cinv gradient$',work)
      endif
      dir = alcate(vs,'right search direction$',work)
      adr = alcate(vs,'right A search$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      grdl = alcate(vs,'left gradient$',work)
      if (no_prec) then
         hrdl = grdl
      else
         hrdl = alcate(vs,'left Cinv gradient$',work)
      endif
      
      ohrd = alcate(vs,'One old gradient$',work)
C     For polynomial analysis, allocate some rubbish;
C     this is used for the corrections in BiCGS
C----
      hess_ncol = max_iter+2
      pol_len = 2+max_iter
      pol_b1 = alcate(pol_len,'first polynomial$',work)
      pol_b2 = alcate(pol_len,'second polynomial$',work)
      call set_poly_info(pol_b1,pol_b2,pol_len)

      cgs_axg = alcate(vs,'CGS A * other gradient$',work)
      cgs_xg = 1
      cgs_xh = 1
      cgs_tg = alcate(vs,'(B)CGS weird term$',work)
      cgs_rp = alcate(vs,'CGS prec mix term$',work)
      if (no_prec) then
         cgs_mp  = 1
         cgs_mt  = 1
      else
         cgs_mp = alcate(vs,'CGS M x search$',work)
         cgs_mt = alcate(vs,'CGS M x weird term$',work)
      endif
      hescol = alcate(3,'hess column2$',work)
      hescol_old = alcate(3,'hess column2 old$',work)
      hssbrg = alcate(hess_ncol*hess_ncol,
     >     'CGS hessenberg$',work)

      return
      end
C----------------------------------------------------------------
      subroutine bcgs_inner
     >     (matrix,
     >     mat_ptr,mat_idx,mat_con,comm_context,
     >     preconditioner,prec_inf, itv,rhs, vec_inf,
     >     success,precision, max_iter,stop_quant,stop_type,
     >     grad_hist,lda_hist,nhist,results,
     >     test_alloc,need_mem, work
     >     )
      
C     Arguments
C----
      integer lda_hist,nhist
      integer mat_ptr(*),mat_idx(*),mat_con(*),comm_context(*),
     >     prec_inf(*), vec_inf(*),
     >     success, max_iter,stop_quant,stop_type,
     >     test_alloc,need_mem,need_mem_saved
      save need_mem_saved
      double precision matrix(*),preconditioner(*), itv(*),rhs(*),
     >     grad_hist(lda_hist,nhist), precision,results(*)
      double precision work(*)
      
      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Functions
C----
      integer needed_memory
      logical
     >     trace_progress,trace_setup,prec_null
      double precision clock00,t_dum(2),ttime

C     Local
C---- 
      logical
     >     vt,no_prec,plot_true_res,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
      integer maxtrc
      parameter (maxtrc=20)
      integer 
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     hescol2,hescol2_old,hssbrg2,
     >     iter, idum1,vs,lvs
      double precision ddum

C     Parameters for stopping test
C----
      integer it_update_loc
      double precision 
     >     it_update_mul, stop_tester,stop_testee,normg1,
     >     xtx,xmx,gtg,hth, pap,
     >     rr_cor,alpha_left,
     >     rr_rr_quot,alpha

C     Parameters for cgs and bcgs
C----
      integer
     >     cgs_rp,cgs_tg,cgs_mp,cgs_mt,
     >     cgs_xg,cgs_axg,cgs_xh,bcgs_xg,
     >     cgs_ti,cgs_xi,bcgs_xi

C     Vector trace
C----
      vt(idum1) = ti.and.idum1.le.maxtrc

C     Various conditions
C----
      call cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Is there a preconditioner step?
C----
      no_prec = prec_null()

C     How long is a vector?
C----
      vs = vec_inf(3)
      lvs = vec_inf(2)

C     Do we plot the true residual for comparison?
C----
      plot_true_res = nhist.gt.1 .and. lda_hist.gt.0
      if (plot_true_res) then
         nhist = 2
      else
         nhist = 2-1
      endif

      if (max_iter.eq.-1) then
         call pt0('>>>> Aborting test run <<<<$')
         return
      endif

C     Figure out how much work / buffer space is needed,
C     and allocate temporaries in this.
C     (2-step process: first time only measurement,
C     after return and external malloc alloc for real)
C----
      call set_mem_probe(test_alloc)
      call reset_allocation(work,1, need_mem)
      if (trace_progress()) then
         if (test_alloc.eq.0) then
            call pd1i('Allocating cg workspace$',need_mem)
         else
            call pd0('Measuring cg workspace$')
         endif
      endif

      if (test_alloc.eq.0) call nulv(work,need_mem_saved)

      call alloc_bcgs_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     hescol2,hescol2_old,hssbrg2,
     >     cgs_xi,cgs_xg,cgs_axg,cgs_xh,bcgs_xg,bcgs_xi,
     >     cgs_rp, cgs_ti,cgs_tg,cgs_mp,cgs_mt,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      if (test_alloc.eq.1) then
         need_mem = needed_memory()
         need_mem_saved = need_mem
         return
      else
         if (mem_trace_val.ge.2) call test_memory_array(work,
     >       'Memtest alloc$',1,.true.)
      endif

C     Initialization of constants
C----
      iter = 0
      it_update_mul = 1.d0
      it_update_loc = 1
      pap = 0.d0
      alpha = 1.d0
      alpha_left = 1.d0
      rr_rr_quot = 0.d0

C     Initialization part
C==== 
      if (tstp) call pt0('Initial constants$')
      call bvzero(work(dir),vec_inf)
      call bvcopy_ob(work(its),vec_inf,itv)
      if (vt(1)) call dump_vector(work(its),vec_inf,'itr$',1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest init vectors$',1,.true.)

      call nulv(work(hescol),3)
      call nulv(work(hescol2),3)
      call cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Calculate the initial residual
C----
      call calc_true_res(work(grd),work(tmp), work(its),rhs,
     >     vec_inf, work(dots_stack),maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     iter,lvs,ddum, ts,tp)
      if (tstp) call pt1d('True residual gtg$',ddum)
      if (vt(iter)) call dump_vector(work(grd),vec_inf,
     >     'grd$',iter+1)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after initial grad$',iter,.true.)

C     Compute quantities for stopping test: both numerical
C     stopping criterium, and number/type of dot products to be
C     done each iteration
C----
      call init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,work(tmp), vec_inf,lvs,
     >     work(dots_stack),maxdot,
     >     grad_hist(1,2),
     >     .true., plot_true_res,no_prec,
     >     tptip.or.(tp.and.trace_setup()))
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest stoptest setup$',iter,.true.)

C     Purely informative calculation of zero-th residual norm
C----
      if (ts) then
         call cddot(work(grd),work(grd),
     >        vec_inf,work(dots_stack),maxdot,1,
     >        comm_context,'rtr init$')
         call ddotv(gtg,work(dots_stack),1,comm_context)
         call addflp(2*lvs)
         if (tp) call pd1i1d('iter$',iter,'gtg:$',gtg)
      endif

C     Start bookkeeping
C----
      call zero_flops
      ttime = clock00(t_dum)

      if (tm) call test_memory_array(work,
     >    'Memtest at start iter$',iter,tstp)

C     Iteration Loop for BCGS
C     
C     When we enter here, we have just computed g_{i+1} for i=0,...
C     g_1 computed initially, otherwise at the end of the loop
C==== 
 10   continue

C====
C     preconditioner solve of gradient i+1;
C     unpreconditioned, then alias with gradient
C----
C     For cgs / bicgs we save the first residual, in addition to
C     the first preconditioned residual
C----
      if (iter.eq.0) then
         call vvcopy(work(grdl),work(grd),vs)
         if (vt(iter)) call dump_vector
     >        (work(grdl),vec_inf,'grdl-sq$',iter+1)
      endif

      hrd = grd
      hrdl = grdl
C     Corrections for inner products for the BiCGstab method.
C----
      if (iter.gt.0) rr_cor = alpha / alpha_left

C================================
C     Inner products g^tg-like:
C================================

C     Set the locations in the dots_stack where various simultaneous
C     inner products are to be stored
C----
      call set_bcgs_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Perform the inner products
C----
      call gtype_dots5(
     >     n_xmx_dots, work(it_update_loc),xu_dotloc, 'x-x$',xmx,
     >     n_xtx_dots, work(its),xx_dotloc, 'xx$',xtx,
     >     n_gg_dots, work(grd),gg_dotloc, 'gtg$',gtg,
     >     n_gh_dots, work(grdl),gh_dotloc,
     >        'gth$',work(gh_dots+iter),
     >     n_hh_dots, work(hrd),hh_dotloc, 'hth$',hth,
     >     vec_inf,work(dots_stack),maxdot,comm_context,
     >     vs,lvs, tstp)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after g inprods$',iter,.true.)
      x_norm = sqrt(xtx)
      alpha = work(gh_dots+iter)
      if (iter.eq.0) normg1 = sqrt(gtg)
      if (iter.gt.0) then
         rr_rr_quot = work(gh_dots+iter) / work(gh_dots+iter-1)
         if (tstp) call pd1d('Beta:$',rr_rr_quot)
         if (tstp) call pd2d('RR ratio orig/corr$',
     >        rr_rr_quot,rr_rr_quot*rr_cor)
         rr_rr_quot = rr_rr_quot * rr_cor
      endif
      work(norms+iter) = sqrt(gtg)

C     Update stopping test
C----
      call gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter+1, xmx,it_update_mul,
     >     gtg,work(gh_dots+iter),hth, tetp,tstp)

C     Maybe save some results for plotting, maybe write to screen or file
C----
      if (tetp) then
         if (nhist.ge.1 .and. lda_hist.gt.iter) then
            grad_hist(iter+1,1) = sqrt(gtg)
            call pt1i1d('Iter:$',iter, '|g|:$',sqrt(gtg))
         endif
      endif

C================
C     Update search directions
C================

C     Higher power methods search directions
C----
      if (iter.eq.0) then
         call vvcopy(work(dir),work(grd),vs)
      else
         call axby(work(tmp), 1.d0,work(dir),
     >        -alpha_left,work(adr),vs)
         call addflp(4*vs)
         call axby(work(dir),
     >        1.d0,work(grd),rr_rr_quot,work(tmp),vs)
      endif
      if (no_prec) then
         cgs_mp = dir
      else
         call solve_preconditioner(work(cgs_mp),
     >        work(dir),vec_inf,preconditioner,prec_inf,
     >        matrix,mat_ptr,mat_idx,
     >        mat_con,comm_context,
     >        work(tmp1),work(tmp2),
     >        iter, 0,tptip,vt(iter))
      endif
      if (vt(iter)) then
         call dump_vector(work(cgs_mp),vec_inf,'dir$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after dir update$',iter,.true.)

C================
C     End of an iteration
C================

      if (iter.eq.maxtrc) then
         ts = .false.
         call otrtxg                
      endif
      tstp = ts.and.tp

C     Return various codes if the iteration stops here.
C----
      if (tstp)
     >     call pd2d('Stop test and match$',stop_testee,stop_tester)
      if ( stop_testee.lt.stop_tester ) then
         success = 0
         goto 21
      else if (iter.ge.max_iter) then
         success = 1
         goto 21
      endif
      goto 22
 21   continue
      results(1) = success
      results(2) = iter
      results(4) = stop_testee
      results(5) = normg1
      call true_x_norm(ddum,
     >     work(tmp),work(its),rhs, vec_inf,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     work(dots_stack),maxdot,comm_context,
     >     lvs,tptip)
      results(6) = ddum
      goto 20
 22   continue

C====
      iter = iter+1
      plot_true_res = plot_true_res.and.lda_hist.gt.iter
      if (tm) call test_memory_array(work,
     >     'Memory tested at iteration$',iter,tstp)
      if (tetp) call pt1i('Iteration:$',iter)
      
C================
C     Matrix times search direction
C----
      call bvcler(work(cgs_mp),vec_inf)
      if (tptip) call pd1i('Matrix times search$',iter)
      call cg_mvp1(
     >     work(adr),work(cgs_mp),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(adr),
     >     vec_inf,'adr$',iter)
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after Ap$',iter,.true.)

C================
C     Compute pAp inner products.
C================

      call cddot(work(hrdl),work(adr),vec_inf,
     >     work(dots_stack),maxdot,1,comm_context,'pAp dot$')
      call ddotv(pap,work(dots_stack),1,comm_context)
      call addflp(2*lvs)
      if (tstp) call pd1d('pap inprod$',pap)
      alpha = alpha / pap
      if (tstp) call pd1d('alpha:$',alpha)

      if (mem_trace_val.ge.2) call test_memory_array(work,
     >     'Memtest after p inprods$',iter,.true.)

C================
C     Update gradients left and right
C     this yields g_{i+1}
C================

      if (tptip) call pd0('Update gradient (split,pw)$')
      cgs_rp = grd
      call axby(work(cgs_tg),1.d0,work(cgs_rp),
     >     -alpha,work(adr),vs)
      call axby(work(its),
     >     1.d0,work(its),-alpha,work(cgs_mp),vs)
      if (vt(iter)) call dump_vector(work(cgs_tg),
     >     vec_inf,'cgs-t$',iter+1)
      if (vt(iter)) call dump_vector(work(its),
     >     vec_inf,'it-half$',iter+1)
      if (no_prec) then
         cgs_mt = cgs_tg
      else
         call solve_preconditioner(work(cgs_mt),
     >        work(cgs_tg),vec_inf,
     >        preconditioner,prec_inf, matrix,mat_ptr,mat_idx,
     >        mat_con,comm_context,
     >        work(tmp1),work(tmp2), iter,
     >        0,tptip,vt(iter))
      endif
      if (tptip) call pd1i('BCGS A cgs_tg$',iter+1)
      call cg_mvp1(
     >     work(cgs_axg),work(cgs_mt),vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (vt(iter)) call dump_vector(work(cgs_axg),
     >     vec_inf,'Acgs_tg$',iter+1)
      alpha_left = alpha
      call bcgs_left_hescol(work(hescol2),alpha_left,
     >     work(cgs_axg),work(cgs_tg),
     >     vec_inf,lvs,work(dots_stack),maxdot,comm_context,tstp)
      call addflp(8*vs)
      call axby(work(grd),
     >     1.d0,work(cgs_tg),-alpha_left,work(cgs_axg),vs)
      call axby(work(its),
     >     1.d0,work(its),-alpha_left,work(cgs_mt),vs)
      it_update_loc = cgs_mt
      it_update_mul = alpha_left

      if (vt(iter)) then
         call dump_vector(work(grd),vec_inf,'grd$',iter+1)
         call dump_vector(work(its),vec_inf,'itr$',iter+1)
      endif
      if (mem_trace_val.ge.2) call test_memory_array(work,
     >    'Memtest after grad update$',iter,.true.)

C     Calculate the exact residual (norm) for trace

C----
      if (plot_true_res .or. ts) then
         call calc_true_res(work(tmp1),work(tmp2), work(its),rhs,
     >        vec_inf, work(dots_stack),maxdot,comm_context,
     >        matrix,mat_ptr,mat_idx,mat_con,
     >        is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >        iter,lvs,ddum, ts,tp)
         grad_hist(iter+1,2) = sqrt(ddum)
         if (tstp) call pt1d('True residual gtg$',ddum)
      endif

C     Iteration Loop End for BCGS
C----
      goto 10
 20   continue

      if (tstp) call pd0('Iteration loop ended$')
      results(7) = clock00(t_dum) - ttime
      call tally_flops(results(3),comm_context)

C     Copy solution back
C----
      call bvcopy_bo(itv,vec_inf,work(its))

C     Distribute the terminating information
C----
      call dpbcst(results,7,comm_context,'Iter results$')

      return
      end
C----------------------------------------------------------------
      subroutine set_bcgs_dotlocs(
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots
     >     )

C     Arguments
C----
      integer
     >     gg_dotloc,gh_dotloc,hh_dotloc,xx_dotloc,xu_dotloc,
     >     n_gg_dots,n_gh_dots,n_hh_dots,
     >     n_xtx_dots,n_xmx_dots

      gg_dotloc  = 1
      gh_dotloc  = gg_dotloc  + n_gg_dots
      hh_dotloc  = gh_dotloc  + n_gh_dots
      xx_dotloc  = hh_dotloc  + n_hh_dots
      xu_dotloc  = xx_dotloc  + n_xtx_dots

      return
      end
C----------------------------------------------------------------
C     Initialize pointers for internal storage
C----------------------------------------------------------------
      subroutine alloc_bcgs_work(work,
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     hescol2,hescol2_old,hssbrg2,
     >     cgs_xi,cgs_xg,cgs_axg,cgs_xh,bcgs_xg,bcgs_xi,
     >     cgs_rp, cgs_ti,cgs_tg,cgs_mp,cgs_mt,
     >     no_prec,
     >     vec_inf,mat_con,vs,max_iter
     >     )
      
C     Arguments
C---- 
      double precision work(*)
      integer
     >     its,grd,hrd,dir,adr,
     >     dirl,grdl,hrdl,ohrd,
     >     tmp1,tmp2,tmp,
     >     dots_stack,maxdot, norms,gh_dots,
     >     hess_ncol,hssbrg, hescol,hescol_old, hescol_wd,
     >     hescol2,hescol2_old,hssbrg2,
     >     cgs_xg,cgs_axg,cgs_xh,bcgs_xg,
     >     cgs_rp,cgs_tg,cgs_mp,cgs_mt,
     >     cgs_xi,cgs_ti,bcgs_xi,
     >     vec_inf(*),mat_con(*),vs, max_iter
      logical no_prec

C     Functions
C----
      integer alcate

C     Local
C----
      integer pol_len,pol_b1,pol_b2

C     Space for inner products; storage and local computation.
C----
      maxdot = max_iter+10
      dots_stack = alcate(maxdot,'Local inner products$',work)

C     Allocate cg vectors
C---- 
      norms = alcate(max_iter+1,'Gradient norms$',work)
      gh_dots = alcate(max_iter+1,'GH inprods$',work)

      its = alcate(vs,'iterates$',work)
      grd = alcate(vs,'right gradient$',work)
      if (no_prec) then
         hrd = grd
      else
         hrd = alcate(vs,'right Cinv gradient$',work)
      endif
      dir = alcate(vs,'right search direction$',work)
      adr = alcate(vs,'right A search$',work)
      tmp = alcate(vs,'cg temporary$',work)
      tmp1 = alcate(vs,'cg temporary1$',work)
      tmp2 = alcate(vs,'cg temporary2$',work)
      
C     For Bicg & squared methods, allocate left variants too
C---- 
      grdl = alcate(vs,'left gradient$',work)
      if (no_prec) then
         hrdl = grdl
      else
         hrdl = alcate(vs,'left Cinv gradient$',work)
      endif
      
      ohrd = alcate(vs,'One old gradient$',work)
C     For polynomial analysis, allocate some rubbish;
C     this is used for the corrections in BiCGS
C----
      hess_ncol = max_iter+2
      pol_len = 2+max_iter
      pol_b1 = alcate(pol_len,'first polynomial$',work)
      pol_b2 = alcate(pol_len,'second polynomial$',work)
      call set_poly_info(pol_b1,pol_b2,pol_len)

      cgs_axg = alcate(vs,'CGS A * other gradient$',work)
      cgs_xg = 1
      cgs_xh = 1
      cgs_tg = alcate(vs,'(B)CGS weird term$',work)
      cgs_rp = 1
      if (no_prec) then
         cgs_mp  = 1
         cgs_mt  = 1
      else
         cgs_mp = alcate(vs,'CGS M x search$',work)
         cgs_mt = alcate(vs,'CGS M x weird term$',work)
      endif
      hescol = alcate(3,'hess column2$',work)
      hescol_old = alcate(3,'hess column2 old$',work)
      hssbrg = alcate(hess_ncol*hess_ncol,
     >     'CGS hessenberg$',work)
      hssbrg2 = alcate(hess_ncol*hess_ncol,
     >     'CGS other hessenberg$',work)
      hescol2 = alcate(3,'hess column2$',work)
      hescol2_old = alcate(3,'hess column2 old$',work)

      return
      end
C----------------------------------------------------------------
      subroutine gtype_dots5(
     >     n1,x1,p1,s1,v1, n2,x2,p2,s2,v2,
     >     n3,x3,p3,s3,v3, n4,x4,p4,s4,v4, n5,x5,p5,s5,v5,
     >     vec_inf,dots,maxdot,comm_context,
     >     vs,lvs, trace)

C     Arguments
C----
      integer
     >     n1,n2,n3,n4,n5,
     >     p1,p2,p3,p4,p5,
     >     vec_inf(*),vs,lvs, maxdot,comm_context(*)
      double precision
     >     x1(*),x2(*),x3(*),x4(*),x5(*),
     >     v1,v2,v3,v4,v5,
     >     dots(*)
      character*(*) s1,s2,s3,s4,s5
      logical trace

      if (n1.gt.0) call cnorm(x1,vec_inf,dots,maxdot,p1,
     >     comm_context,s1)
      if (n2.gt.0) call cnorm(x2,vec_inf,dots,maxdot,p2,
     >     comm_context,s2)
      if (n3.gt.0) call cnorm(x3,vec_inf,dots,maxdot, p3,
     >     comm_context,s5)
      if (n4.gt.0) call cddot(x5,x4,vec_inf,dots,maxdot, p4,
     >     comm_context,s4)
      if (n5.gt.0) call cnorm(x5,vec_inf,dots,maxdot, p5,
     >     comm_context,s5)

      if (n1.gt.0) call ddotv(v1,dots,p1,comm_context)
      if (n1.gt.0.and.trace) call pd1d(s1,v1)
      if (n2.gt.0) call ddotv(v2,dots,p2,comm_context)
      if (n2.gt.0.and.trace) call pd1d(s2,v2)
      if (n4.gt.0) call ddotv(v4,dots,p4,comm_context)
      if (n4.gt.0.and.trace) call pd1d(s4,v4)
      v3 = v4
      if (n3.gt.0) call ddotv(v3,dots,p3,comm_context)
      if (n3.gt.0.and.trace) call pd1d(s3,v3)
      v5 = v4
      if (n5.gt.0) call ddotv(v5,dots,p5,comm_context)
      if (n5.gt.0.and.trace) call pd1d(s5,v5)

      call addflp((n1+n2+n3+n4+n5)*lvs)

      return
      end
C----------------------------------------------------------------
      subroutine gtype_dots6(
     >     n1,x1,p1,s1,v1, n2,x2,p2,s2,v2,
     >     n3,x3,p3,s3,v3, n4,x4,p4,s4,v4, n5,x5,p5,s5,v5,
     >     n6,x6,p6,s6,v6,
     >     vec_inf,dots,maxdot,comm_context,
     >     vs,lvs, trace)

C     Arguments
C----
      integer
     >     n1,n2,n3,n4,n5,n6,
     >     p1,p2,p3,p4,p5,p6,
     >     vec_inf(*),vs,lvs, maxdot,comm_context(*)
      double precision
     >     x1(*),x2(*),x3(*),x4(*),x5(*),x6(*),
     >     v1,v2,v3,v4,v5,v6,
     >     dots(*)
      character*(*) s1,s2,s3,s4,s5,s6
      logical trace

      if (n1.gt.0) call cnorm(x1,vec_inf,dots,maxdot,p1,
     >     comm_context,s1)
      if (n2.gt.0) call cnorm(x2,vec_inf,dots,maxdot,p2,
     >     comm_context,s2)
      if (n3.gt.0) call cnorm(x3,vec_inf,dots,maxdot, p3,
     >     comm_context,s5)
      if (n4.gt.0) call cddot(x5,x4,vec_inf,dots,maxdot, p4,
     >     comm_context,s4)
      if (n5.gt.0) call cnorm(x5,vec_inf,dots,maxdot, p5,
     >     comm_context,s5)
      if (n6.gt.0) call cddot(x5,x6,vec_inf,dots,maxdot,p6,
     >     comm_context,s6)

      if (n1.gt.0) call ddotv(v1,dots,p1,comm_context)
      if (n1.gt.0.and.trace) call pd1d(s1,v1)
      if (n2.gt.0) call ddotv(v2,dots,p2,comm_context)
      if (n2.gt.0.and.trace) call pd1d(s2,v2)
      if (n4.gt.0) call ddotv(v4,dots,p4,comm_context)
      if (n4.gt.0.and.trace) call pd1d(s4,v4)
      v3 = v4
      if (n3.gt.0) call ddotv(v3,dots,p3,comm_context)
      if (n3.gt.0.and.trace) call pd1d(s3,v3)
      v5 = v4
      if (n5.gt.0) call ddotv(v5,dots,p5,comm_context)
      if (n5.gt.0.and.trace) call pd1d(s5,v5)
      if (n6.gt.0) call ddotv(v6,dots,p6,comm_context)
      if (n6.gt.0.and.trace) call pd1d(s6,v6)

      call addflp((n1+n2+n3+n4+n5+n6)*lvs)

      return
      end
C----------------------------------------------------------------
      subroutine long_search_update(
     >     dir,tar,dir_wd, adr,hrd,agr,
     >     hesfac_ucol,lo,hi,vs)

C     Arguments
C----
      integer lo,hi,vs, tar,dir_wd
      double precision dir(vs,dir_wd),adr(vs,dir_wd), agr(*),hrd(*),
     >     hesfac_ucol(*)

C     Local
C----
      integer cnt,loc

      do 30 cnt=lo,hi
         loc = 1+mod(tar-cnt-1,dir_wd)
         if (cnt.eq.lo) then
            call axby(adr(1,tar),1.d0,agr,
     >           hesfac_ucol(cnt),adr(1,loc),vs)
            call axby(dir(1,tar),1.d0,hrd,
     >           hesfac_ucol(cnt),dir(1,loc),vs)
         else
            call axby(adr(1,tar),1.d0,adr(1,tar),
     >           hesfac_ucol(cnt),adr(1,loc),vs)
            call axby(dir(1,tar),1.d0,dir(1,tar),
     >           hesfac_ucol(cnt),dir(1,loc),vs)
            endif
 30      continue

      return
      end
C----------------------------------------------------------------
      subroutine cg_trace_setup(
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm
     >     )

C     Arguments
C----
      logical
     >     tstp,tetp,tptp,tptip, te,ts,ti,tp,tm


C     Dump quantities
C----
      integer dmp_trace_val
      common /dmp_trace/dmp_trace_val

C     Inspect dynamic memory for integrity
C     0=no trace, 1=once/iteration, 2=very often
C----
      integer mem_trace_val
      common /mem_trace/mem_trace_val

C     Function
C----
      logical tracer_proc,trace_progress

C     Dump trace of Error / all Scalars / Iterates ?
      te = dmp_trace_val.ge.1
      ts = dmp_trace_val.ge.3
      ti = dmp_trace_val.ge.4
C     Are we the Trace Processor?
      tp = tracer_proc()
C     If so, do we trace general/iteration progress?
      tstp = ts.and.tp
      tetp = te.and.tp
      tptp = trace_progress().and.tp
      tptip = dmp_trace_val.ge.2.and.tp
C     Check memory every once in a while?
      tm = mem_trace_val.ge.1.and..not.dmp_trace_val.eq.0

      return
      end
C----------------------------------------------------------------
      subroutine cg_format_setup(
     >     is_row,is_col,is_sym,is_dia,is_grd,vec_inf)

C     Arguments
C----
      logical is_row,is_col,is_sym,is_dia,is_grd
      integer vec_inf(*)

      is_row = 10*(vec_inf(1)/10).eq.30 .and. vec_inf(1)-30.eq.2
      is_col = 10*(vec_inf(1)/10).eq.30 .and. vec_inf(1)-30.eq.4
      is_sym = mod(vec_inf(1),2).eq.1
      is_grd = 10*(vec_inf(1)/10).eq.10
      is_dia = 10*(vec_inf(1)/10).eq.20

      return
      end
C----------------------------------------------------------------
C     A general, and complicated matrix-vector product routine
C     for cg-type methods
C
C     Guide to matrix parts:
C     0 (all): involve all of your local variables
C     2 (brd): involve only bordering variables
C     1 (own): involve only your owned variables
C----------------------------------------------------------------
      subroutine cg_mvp2(
     >     y1,x1, vec_inf, y2,x2, left_seq,tmp,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     matrix,mat_ptr,mat_idx,mat_con, comm_context)

C     Arguments
C     tmp(*) is a whole vector
C---- 
      double precision x1(*),x2(*),y1(*),y2(*), matrix(*), tmp(*)
      integer vec_inf(*),mat_ptr(*),mat_idx(*),
     >     mat_con(*),comm_context(*)
      logical left_seq, is_row,is_col,is_sym,is_dia,is_grd
      logical do_trace
      parameter (do_trace=.false.)

C     Tranceive trace information
C----
      integer tx_trace_int
      common /tx_int_block/tx_trace_int
      character*85 tx_trace_string
      common /tx_string_block/tx_trace_string

C     Local
C----
      logical update1,update2, do_diag1,do_diag2

      do_diag1 = .true.
      do_diag2 = .not.is_sym
      update1  = .false.
      update2  = is_sym



C     Transpose mvp before the communication stage
C----
      if (left_seq.and. (is_row.or.is_dia.or.is_grd.or.is_sym)) then
         if (do_trace)
     >        call pd0('- left sequence, transpose mvp$')
         call mvp(y2,x2,vec_inf, matrix,mat_ptr,mat_idx,
     >        1,update1,do_diag1)
      endif
      if (is_col.or.is_sym) then
         if (do_trace)
     >        call pd0('- right seq, sym, transpose mvp$')
         call mvp(y1, x1,vec_inf, matrix,mat_ptr,mat_idx,
     >        1,update1,do_diag1)
      endif

C     Send and receive data
C----
      tx_trace_string = 'MVP$'
      if (left_seq) then
         if (is_sym) then
            if (do_trace) 
     >           call pd0('- left seq sym exc/upd$')
            call vector_send_bord(y1,vec_inf,8,mat_con,comm_context,
     >           0,01)
            call vector_send_bord(y2,vec_inf,8,mat_con,comm_context,
     >           0,03)

            call vector_recv_edge(y1,vec_inf,4,mat_con,comm_context,
     >           0,11)
            call vector_recv_edge(y2,vec_inf,4,mat_con,comm_context,
     >           0,12)

            call bvcler(x1,vec_inf)
            call bvcler(x2,vec_inf)
            call vector_send_edge(x1,vec_inf,4,mat_con,comm_context,
     >           0,01)
            call vector_send_edge(x2,vec_inf,4,mat_con,comm_context,
     >           0,03)

            call vector_recv_bord(x1,vec_inf,8,mat_con,comm_context,
     >           0,11)
            call vector_recv_bord(x2,vec_inf,8,mat_con,comm_context,
     >           0,12)
         else if (is_row.or.is_dia.or.is_grd) then
            if (do_trace) 
     >           call pd0('- left seq row/dia exc/upd$')
            if (do_trace) tx_trace_string = 'mvp2 leftseq x1 exch$'
            call vector_part_x(x1,vec_inf,7,mat_con,comm_context,
     >           0,1,01,1)
            if (do_trace) tx_trace_string = 'mvp2 leftseq y2 updt$'
            call vector_part_x(y2,vec_inf,7,mat_con,comm_context,
     >           0,2,03,1)
            if (do_trace) tx_trace_string = 'mvp2 leftseq x1 exch$'
            call vector_part_x(x1,vec_inf,5,mat_con,comm_context,
     >           0,2,11,1)
            if (do_trace) tx_trace_string = 'mvp2 leftseq y2 updt$'
            call vector_part_x(y2,vec_inf,5,mat_con,comm_context,
     >           0,1,12,2)
         else if (is_col) then
            if (do_trace) 
     >           call pd0('- left seq col upd/exc$')
            if (do_trace) tx_trace_string = 'mvp2 leftseq y1 updt$'
            call vector_part_x(y1,vec_inf,5,mat_con,comm_context,
     >           0,2,01,1)
            if (do_trace) tx_trace_string = 'mvp2 leftseq x2 exch$'
            call vector_part_x(x2,vec_inf,5,mat_con,comm_context,
     >           0,1,03,1)
            if (do_trace) tx_trace_string = 'mvp2 leftseq y1 updt$'
            call vector_part_x(y1,vec_inf,7,mat_con,comm_context,
     >           0,1,11,2)
            if (do_trace) tx_trace_string = 'mvp2 leftseq x2 exch$'
            call vector_part_x(x2,vec_inf,7,mat_con,comm_context,
     >           0,2,12,2)
         else
            call pe0('Strange case left_seq$')
            call stop_connections('cg_mvp2$')
         endif
      else
         if (is_sym) then
            if (do_trace) 
     >           call pd0('- right seq symm upd/exc$')
            call vector_part_x(y1,vec_inf,2,mat_con,comm_context,
     >           0,2,04,1)
            call vector_part_x(y1,vec_inf,1,mat_con,comm_context,
     >           0,1,11,2)
            call bvcler(x1,vec_inf)
            call vector_part_x(x1,vec_inf,1,mat_con,comm_context,
     >           0,1,04,1)
            call vector_part_x(x1,vec_inf,2,mat_con,comm_context,
     >           0,2,11,1)
         else if (is_dia.or.is_grd.or.is_row) then
            if (do_trace)
     >           call pd0('- right seq row/dia prelim exc$')
            call vector_part_x(x1,vec_inf,1,mat_con,comm_context,
     >           0,1,04,1)
            call vector_part_x(x1,vec_inf,2,mat_con,comm_context,
     >           0,2,11,1)
         else if (is_col) then
            if (do_trace) 
     >           call pd0('- right seq col updt$')
            call vector_part_x(y1,vec_inf,2,mat_con,comm_context,
     >           0,2,04,1)
            call vector_part_x(y1,vec_inf,1,mat_con,comm_context,
     >           0,1,11,2)
         else
            call pe0('Strange case no_left_seq$')
            call stop_connections('cg_mvp2$')
         endif
      endif


C     Regular mvp after the communication stage
C----
      if (left_seq.and.(is_col.or.is_sym)) then
         if (do_trace)
     >        call pd0('- left sequence col/sym fmt mvp$')
         call mvp(y2,x2,vec_inf, matrix,mat_ptr,mat_idx,
     >        0,update2,do_diag2)
      endif
      if (is_row.or.is_sym.or.is_dia.or.is_grd) then
         if (do_trace)
     >        call pd0('- right sequence, row/sym/dia mvp$')
         call mvp(y1, x1,vec_inf, matrix,mat_ptr,mat_idx,
     >        0,update2,do_diag2)
      endif



      return
      end
C----------------------------------------------------------------
      subroutine cg_mvp1(
     >     y,x, vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,trans,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)

C     Arguments
C---- 
      double precision x(*),y(*), matrix(*)
      integer vec_inf(*),mat_ptr(*),mat_idx(*),
     >     mat_con(*),comm_context(*)
      logical trans,is_row,is_col,is_sym,is_dia,is_grd

C     Tranceive trace information
C----
      integer tx_trace_int
      common /tx_int_block/tx_trace_int
      character*85 tx_trace_string
      common /tx_string_block/tx_trace_string

C     Row-transpose and column storage:
C     computation for transpose product before the communication stage
C----
      if ((is_col.neqv.trans).or.is_sym) then
         call bvcler(x,vec_inf)
         call mvp(y,x,vec_inf,
     >     matrix,mat_ptr,mat_idx,1,.false.,.true.)
      endif

C     Exchange of boundary data;
C----
      tx_trace_string = 'Mvp$'
      tx_trace_int = 0
      if (is_sym .or. is_col.neqv.trans) then
         call vector_part_x(y,vec_inf,1,mat_con,comm_context,
     >        0,2,04,1)
         call vector_part_x(y,vec_inf,2,mat_con,comm_context,
     >        0,1,11,2)
      endif
      if (is_sym .or. .not. (is_col.neqv.trans)) then
         call vector_part_x(x,vec_inf,1,mat_con,comm_context,
     >        0,1,04,1)
         call vector_part_x(x,vec_inf,2,mat_con,comm_context,
     >        0,2,11,1)
      endif

C     More products
C----
      if ((is_col.eqv.trans).or.is_sym) call mvp(y,x,vec_inf,
     >     matrix,mat_ptr,mat_idx,0,
     >     is_sym,.not.is_sym)

      return
      end
C----------------------------------------------------------------
      subroutine calc_true_res(grad,tmp, itr,rhs,vec_inf,
     >     dots,maxdot,comm_context,
     >     matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,ata_prod,
     >     iter,lvs,norm_save,norm,trace)

C     Arguments
C----
      double precision grad(*),tmp(*),itr(*),rhs(*), matrix(*),
     >     dots(*), norm_save
      integer vec_inf(*),lvs,iter,
     >     mat_ptr(*),mat_idx(*),mat_con(*),maxdot,comm_context(*)
      logical is_row,is_col,is_sym,is_dia,is_grd,
     >     norm, ata_prod,trace

C     Local
C----
      double precision ddum

      if (norm.and.trace) call pd0('Calc true res for plot$')
      call cg_mvp1(
     >     grad,itr, vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      if (.not. ata_prod) then
         call bvsub_ob(grad,vec_inf, grad,rhs)
      else
         call bvsub_ob(tmp,vec_inf, grad,rhs)
         call cg_mvp1(
     >        grad,tmp, vec_inf,
     >        is_row,is_col,is_sym,is_dia,is_grd,.true.,
     >        matrix,mat_ptr,mat_idx,mat_con,comm_context)
      endif
      if (norm) then
         call cddot(grad,grad, vec_inf,dots,maxdot, 1,
     >        comm_context,'true res rtr$')
         call ddotv(ddum,dots,1,comm_context)
         call addflp(2*lvs)
         norm_save = sqrt(ddum)
      endif

      return
      end
C----------------------------------------------------------------
      subroutine default_iteration_parameters(buffer,nitems,cbuffer,
     >     precis)

C     Argument
C----
      integer buffer(*),nitems(1)
      character*5 cbuffer
      double precision precis(1)

C     Check buffer length; make all parameters zero initially
C----
      call force_range(nitems(1),15,-1,
     >     'Default iter pars; buffer length$')
      nitems(1) = 15
      call inulv(buffer,nitems(1))

      cbuffer = 'cg'
C     method is cg
      buffer(1) = 2
C     trunc = 1
      buffer(5) = 1
C     max iterations = 100
      buffer(6) = 100
      buffer(12) = 0
C     stopping test: norm g relative to Ax-b
      buffer(13) = 1
      buffer(14) = 4
C     tol = 4
      precis(1) = 1.d-4

      return
      end
C----------------------------------------------------------------
      subroutine gg_stop_tests(stop_testee,stop_tester,
     >     precision,stop_quant,stop_type,
     >     iter, xmx,it_update_mul,gtg,gth,hth, trace1,trace2)

C     Arguments
C----
      double precision stop_testee,stop_tester, precision,
     >     xmx,it_update_mul,gtg,gth,hth
      integer iter, stop_quant,stop_type
      logical trace1,trace2

      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

      if (stop_quant.eq.4.and.iter.gt.1) then
         stop_testee = xmx * it_update_mul**2
         if (trace2) call pd1d('X-X stop test$',stop_testee)
      else if (stop_quant.eq.1) then
         stop_testee = gtg
         if (trace2) call pd1d('GG stop test$',stop_testee)
      else if (stop_quant.eq.2) then
         stop_testee = gth
         if (trace2) call pd1d('GH stop test$',stop_testee)
      else if (stop_quant.eq.3) then
         stop_testee = hth
         if (trace2) call pd1d('HH stop test$',stop_testee)
      else if (stop_quant.ne.4) then
         call pe1i('Invalid stopping test$',stop_quant)
         call stop_connections('GG stop tests$')
      endif
      stop_testee = sqrt(stop_testee)
      if (iter.eq.1 .and. stop_type.eq.2) then
         if (stop_quant.eq.1) stop_tester = gtg
         if (stop_quant.eq.2) stop_tester = gth
         if (stop_quant.eq.3) stop_tester = hth
         stop_tester = precision*sqrt(stop_tester)
         if (trace1) call pt1d('Stop when reaching$',stop_tester)
      else if (iter.eq.2 .and.
     >     stop_type.eq.2.and.stop_quant.eq.4) then
         stop_tester = precision*sqrt(xmx)
         if (trace1) call pt1d('Stop when reaching$',stop_tester)
      else if (stop_type.eq.4) then
         stop_tester = precision*(a_norm(1)*x_norm+b_norm)
         if (trace2) call pd1d('Current axb level$',stop_tester)
      endif

      return
      end
C----------------------------------------------------------------
      subroutine init_stop_tests(stop_tester,
     >     precision,stop_quant,stop_type,
     >     n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     matrix,mat_ptr,mat_idx,comm_context,
     >     rhs,tmp, vec_inf,lvs,dots,maxdot, b_norm_save,
     >     need_g_norm,plot_true_res,no_prec,trace)

C     Arguments
C----
      integer n_xtx_dots,n_xmx_dots,n_gg_dots,n_gh_dots,n_hh_dots,
     >     stop_quant,stop_type,
     >     vec_inf(*),lvs,mat_ptr(*),mat_idx(*),maxdot,
     >     comm_context(*)
      double precision stop_tester, matrix(*),precision,
     >     rhs(*),tmp(*),dots(*),b_norm_save
      logical need_g_norm, plot_true_res,no_prec, trace

      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

      stop_tester = 1.d0
      if (stop_type.eq.1) stop_tester = precision
      if (stop_type.eq.3.or.stop_type.eq.4
     >     .or. plot_true_res) then
         call bvcopy_ob(tmp,vec_inf,rhs)
         call cnorm(tmp,vec_inf,dots,maxdot,1,
     >        comm_context,'rhs norm$')
         call ddotv(b_norm,dots,1,comm_context)
         call addflp(2*lvs)
         b_norm = sqrt(b_norm)
         if (plot_true_res) b_norm_save = b_norm
      endif
      if (stop_type.eq.3) stop_tester = precision*b_norm
      if (stop_type.ne.2.and.trace)
     >     call pt1d('Stop test (relative) set to$',stop_tester)
      n_xtx_dots = 0
      if (stop_type.eq.4) then
         call matrix_norm_estimate(a_norm,matrix,mat_ptr,mat_idx,
     >        comm_context,vec_inf)
         n_xtx_dots = 1
         if (trace) call pd1d('A norm est$',a_norm(1))
      endif
      n_xmx_dots = 0
      if (stop_quant.eq.4) n_xmx_dots = 1
      n_gg_dots = 0
      if ((stop_quant.eq.1.and..not.no_prec) .or.
     >     need_g_norm) n_gg_dots = 1
      n_gh_dots = 1
      n_hh_dots = 0
      if (stop_quant.eq.3.and..not.no_prec) n_hh_dots = 1

      return
      end
C----------------------------------------------------------------
      subroutine report_iter_parms(chan,
     >     precision,max_iter,stop_quant,stop_type)

C     Argument
C----
      integer chan,
     >     max_iter,stop_quant,stop_type
      double precision precision

      integer
     >     trunc,restart,mod_gramsch
      common /itrmet_vals/
     >     trunc,restart,mod_gramsch


      double precision
     >     a_norm(1),x_norm,b_norm
      common /itrmet_r/
     >     a_norm,x_norm,b_norm

      write(chan,*) '================ Iterative method'
      write(chan,*) 
     >     '-------------------------------------------------------'
      write(chan,*) '[1] Method'
      write(chan,*) '(1) stationary iteration (2) symmetric CG'
      write(chan,*) '(5) ns CG (6) BiCG'
      write(chan,*) '[2] Combined dot products (0/1)'
      write(chan,*) '[3] Minimal Residual? (0/1/2=modgs)'
      write(chan,*) '[5] Truncate? (n)'
      write(chan,*) '[6] Max iterations? (n; default=@(maxitr))'
      write(chan,*) '[7] Use A-inner product (0/1)'
      write(chan,*) '[8] Eliminate search directions (0/1)'
      write(chan,*) '[9] Regenerate residual (0/#it)'
      write(chan,*) '[10] Normal equation (0/1)'
      write(chan,*) '[11] Squared method bicgs'
      write(chan,*) '[12] Restart (0/#it)'
      write(chan,*) '[16] Tolerance (power of ten; default=4)'
      if (mod_gramsch.gt.0) write(chan,*) 'Modified GS',mod_gramsch
      if (stop_type.eq.1)
     >     write(chan,*) '---- Stop on absolute size'
      if (stop_type.eq.2)
     >     write(chan,*) '---- Stop on relative size'
      if (stop_type.eq.3)
     >     write(chan,*) '---- Stop on rhs size'
      if (stop_type.eq.4)
     >     write(chan,*) '---- Stop on Ax+b norm'
      if (stop_quant.eq.1)
     >     write(chan,*) '---- stop on |g|'
      if (stop_quant.eq.2)
     >     write(chan,*) '---- stop on |g^th|'
      if (stop_quant.eq.3)
     >     write(chan,*) '---- stop on |h|'
      if (stop_quant.eq.4)
     >     write(chan,*) '---- stop on |x-x|'
      write(chan,*) '---- maxit',max_iter
      write(chan,*) '---- Reduction:',precision

      return
      end
C----------------------------------------------------------------
      subroutine true_x_norm(xnorm,
     >     tmp,itr,rhs,vec_inf, matrix,mat_ptr,mat_idx,mat_con,
     >     is_row,is_col,is_sym,is_dia,is_grd,
     >     dots,maxdot,comm_context,
     >     lvs,trace)

C     Arguments
C----
      integer lvs,vec_inf(*),
     >     mat_ptr(*),mat_idx(*),mat_con(*),maxdot,comm_context(*)
      double precision xnorm,tmp(*),itr(*),rhs(*),
     >     matrix(*),dots(*)
      logical is_row,is_col,is_sym,is_dia,is_grd, trace

      if (trace) call pd1i('MVP true final residual$',0)
      call cg_mvp1(
     >     tmp,itr,vec_inf,
     >     is_row,is_col,is_sym,is_dia,is_grd,.false.,
     >     matrix,mat_ptr,mat_idx,mat_con,comm_context)
      call bvsub_ob(tmp,vec_inf,tmp,rhs)
      call cddot(tmp,tmp,vec_inf,dots,maxdot,1,
     >     comm_context,'final res$')
      call ddotv(xnorm,dots,1,comm_context)
      call addflp(2*lvs)
      xnorm= sqrt(xnorm)

      return
      end
C================================================================
C     Hessenberg matrix analysis
C================================================================
C----------------------------------------------------------------
C     Store the |r_{i+1}|/|r_i| coefficients in the
C     hessenberg matrix. The column will only be finished
C     after a call to hescol_d.
C----------------------------------------------------------------
      subroutine split_recur_hescol_u(hssbrg,
     >     betas,hess_wd,hess_bw,itno,trace)

C     Arguments
C----
      integer hess_bw,hess_wd,itno
      double precision hssbrg(hess_wd,hess_wd),betas(*)
      logical trace

C     Local
C----
      integer upper,it

      upper = min(hess_bw,itno-1)

      do 10 it=1,upper
         hssbrg(itno-it,itno) = hssbrg(itno-it+1,itno-it)*betas(it)
 10   continue
      hssbrg(itno,itno) = 0.d0
      do 20 it=upper-1,0,-1
         hssbrg(itno-it,itno) =
     >        hssbrg(itno-it,itno) - hssbrg(itno-(it+1),itno)
 20   continue

      if (trace.and.upper.ge.1) call pdar1i
     >     ('Hess col init (split rec)$', hssbrg(1,itno),itno,
     >     ' col:$',itno)

      return
      end
C----------------------------------------------------------------
C     Store the pivots of the factored Hessenberg matrix
C----------------------------------------------------------------
      subroutine split_recur_hescol_d(hssbrg,alpha,
     >     hess_wd,hess_bw,itno,trace)

C     Arguments
C----
      integer hess_bw,hess_wd,itno
      double precision hssbrg(hess_wd,hess_wd),alpha
      logical trace

      if (itno.eq.1) then
         hssbrg(itno,itno) = 1.d0/alpha
      else
         hssbrg(itno,itno) = hssbrg(itno,itno)+1.d0/alpha
      endif
      hssbrg(itno+1,itno) = -1.d0/alpha

      return
      end
C----------------------------------------------------------------
C     For three term recurrences constructing H is a lot easier
C----------------------------------------------------------------
      subroutine three_term_hescol(col, hssbrg,hess_bw,hess_wd,itno,
     >     rar_rr_quot,rr_rr_quot,trace)

C     Arguments
C----
      integer hess_bw,hess_wd,itno
      double precision col(*), hssbrg(-1:hess_bw-2,hess_wd),
     >     rar_rr_quot,rr_rr_quot
      logical trace

C     Local
C----
      double precision up,dia,lo

C     use formulas (37,38) and zero row sum fact
      if (itno.eq.1) then
         up = 0.d0
      else
         up = hssbrg(-1,itno-1) * rr_rr_quot
      endif
      dia = rar_rr_quot
      lo = -dia-up

      hssbrg(1,itno) = up
      hssbrg(0,itno) = dia
      hssbrg(-1,itno) = lo
      call vvcopy(col,hssbrg(-1,itno),3)

      if (trace) call pdar('Hess col (trid)$',hssbrg(-1,itno),
     >     min(3,itno+1))

      return
      end
C----------------------------------------------------------------
C     Three term (actually 2) recurrences for steepest descent
C----------------------------------------------------------------
      subroutine ttsdes(col, rar,rr)

C     Arguments
C----
      double precision col(*), rar,rr

C     Local
C----
      double precision up,dia,lo

      up = 0.d0
      dia = rar / rr
      lo = -dia

      col(1) = lo
      col(2) = dia
      col(3) = up

      return
      end
C----------------------------------------------------------------
      subroutine bcgs_left_hescol(col,alpha_left, agr,gr,
     >     vec_inf,lvs,dots,maxdot,comm_context,trace)

C     Arguments
C----
      double precision col(*),alpha_left, agr(*),gr(*),dots(*)
      integer vec_inf(*),lvs,maxdot,comm_context(*)
      logical trace

C     Local
C----
      double precision ddum1,ddum2

      call cddot(agr,gr, vec_inf,dots,maxdot,2,
     >     comm_context,'rAr bcgs$')
      call cddot(agr,agr, vec_inf,dots,maxdot,1,
     >     comm_context,'ArAr bcgs$')
      call addflp(4*lvs)
      call ddotv(ddum1,dots,1,comm_context)
      call ddotv(ddum2,dots,2,comm_context)
      alpha_left = ddum2/ddum1
      call ttsdes(col,ddum1,ddum2)
      if (trace) call pdar('BiCGS left hescols$',col,2)

      return
      end
C----------------------------------------------------------------
C     Update the QU factorization of the Hessenberg matrix.
C     Columns itno of H has just been updated with iterative
C     method coefficients.
C----------------------------------------------------------------
      subroutine hessqr(orthes,hesbrg, hess_wd,hess_bw,
     >     norms,normg1, hess_Qrow,hess_Rcol,erguess,
     >     itno,r_itno, trace)
      
C     Arguments
C---- 
      integer hess_bw,hess_wd, r_itno,itno
      double precision orthes(hess_wd,hess_wd),
     >     hesbrg(hess_wd,hess_wd),norms(*),
     >     hess_Qrow(*),hess_Rcol(0:1),
     >     normg1,erguess
      logical trace
      
C     Local
C---- 
      integer it, err_col,nold_col
      double precision cof

C     how many trailing directions?
C     (always 1, in principle equal to the iteration number-1,
C     but limited by truncation.)
C     total bandwidth of hessenberg matrix in current column
C----
      err_col = itno+1
      nold_col = itno-1

C     Preserve orthogonal last column
C----
      if (itno.gt.1) then
         call vvcopy(orthes(1,err_col),orthes(1,itno),itno+1)
      else
         orthes(1,2) = 1.d0
      endif

C     Initialize latest column of orthogonal hess matrix
C     The normalization makes this Hess matrix equal to that
C     from the bi-normalized Bicg algorithm.
C     (note that orthes columns can be longer than hesbrg columns)
C----
      if (trace) call pdar('H new column (pre nrm)$',
     >     hesbrg(1,itno),itno+1)
      if (trace) call pdar('Norms$',norms(r_itno-itno+1),itno+1)
      call vvprod(orthes(1,itno),
     >     hesbrg(1,itno),norms(r_itno-itno+1),itno+1)
      call ax(orthes(1,itno),
     >     1.d0/norms(r_itno),orthes(1,itno),itno+1)
      if (trace) call pdar('H new column (nrmd)$',
     >     orthes(1,itno),itno+1)

C     Compute inner products with previous columns
C     and subtract those columns
C----
      do 10 it=1,itno-1
         call inprod(hess_Rcol(itno-it),
     >        orthes(1,itno),orthes(1,it), it+1)
         call axby(orthes(1,itno), 1.d0,orthes(1,itno), 
     >        -hess_Rcol(itno-it),orthes(1,it), it+1)
 10   continue
      
C     Normalize
C----
      call inprod(cof,orthes(1,itno),orthes(1,itno),itno+1)
      cof = sqrt(cof)
      hess_Rcol(0) = cof
      call ax(orthes(1,itno), 1.d0/cof,orthes(1,itno), itno+1)
      hess_Qrow(itno) = orthes(1,itno)

C     Compute orthonormal column it+1
C----
      call inprod(cof,orthes(1,itno),orthes(1,err_col),itno+1)
      call axby(orthes(1,err_col),1.d0,orthes(1,err_col),
     >     -cof,orthes(1,itno),itno+1)
      call inprod(cof,orthes(1,err_col), orthes(1,err_col),itno+1)
      erguess = abs(orthes(1,err_col)*normg1/sqrt(cof))

      if (trace) then
         call pdar('Q column:$', orthes(1,itno),itno+1)
         call pdar('Q xtra col:$', orthes(1,err_col),itno+1)
         call pdar('R column (from H):$',hess_Rcol(0),itno)
         call pdar('Q first row:$',hess_Qrow,itno)
         call pd1d2d('Err est$',erguess,'(product of$',
     >        orthes(1,err_col),normg1/sqrt(cof))
      endif
      
      call ax(hess_Rcol(0),norms(r_itno)/normg1,hess_Rcol(0), itno)

      call addflp(2*itno**2)
      if (trace) call pdar('R column (lsq coefs):$',
     >     hess_Rcol(0),itno)

      return
      end
C----------------------------------------------------------------
C     From column i of the Hessenberg matrix,
C     compute column i+1 of the (polynomial) uptri matrix
C----------------------------------------------------------------
      subroutine poly_coef_from_hes_col(hes,iter,num,work)

C     Arguments
C----
      integer iter,num
      double precision work(*),hes(*)

C     Arrays for storing polynomial info
C----
      integer pol_base(2),pol_leng
      common /poly_block/pol_base,pol_leng

C     Local
C----
      integer pol

      pol = pol_base(num)-1

      if (iter.eq.1) then
         work(pol+2) = -work(pol+1)*hes(2)
      else
         work(pol+2) = (work(pol+2)-work(pol+1)*hes(2))
      endif

      return
      end
C----------------------------------------------------------------
      subroutine set_rr_rar_corrections(rr_cor,rar_cor,
     >     h2,h1, alpha,alpha_left, work)

C     Arguments
C----
      double precision rr_cor,rar_cor, h2(*),h1(*),
     >     alpha,alpha_left, work(*)
      
C     Arrays for storing polynomial info
C----
      integer pol_base(2),pol_leng
      common /poly_block/pol_base,pol_leng

C     Local
C----
      integer pol1,pol2
      double precision pol1_quot,pol2_quot
      data pol1_quot,pol2_quot/0.d0,0.d0/

      pol1 = pol_base(1)-1
      pol2 = pol_base(2)-1

      rr_cor = h2(1)/h1(1)
      pol1_quot = pol1_quot - h1(2)
      pol2_quot = pol2_quot - h2(2)
      rar_cor = pol1_quot - pol2_quot

      return
      end
C----------------------------------------------------------------
      subroutine init_poly_coef(work)

C     Arguments
C----
      double precision work(*)

C     Arrays for storing polynomial info
C----
      integer pol_base(2),pol_leng
      common /poly_block/pol_base,pol_leng

      call nulv(work(pol_base(1)),pol_leng)
      call nulv(work(pol_base(2)),pol_leng)
      work(pol_base(1)) = 1.d0
      work(pol_base(2)) = 1.d0

      return
      end
C----------------------------------------------------------------
      subroutine set_poly_info(pol_b1,pol_b2,pol_len)

C     Arguments
C----
      integer pol_len,pol_b1,pol_b2

C     Arrays for storing polynomial info
C----
      integer pol_base(2),pol_leng
      common /poly_block/pol_base,pol_leng

      pol_base(1) = pol_b1
      pol_base(2) = pol_b2
      pol_leng = pol_len

      return
      end
C----------------------------------------------------------------
      subroutine hess_eigenvalues(hssbrg,xhss,hess_wd,trunc,
     >     itvals, itno)

C     Arguments
C----
      integer hess_wd,trunc,itno
      double precision hssbrg(-1:trunc,hess_wd),xhss(hess_wd,-1:trunc),
     >     itvals(*)

C     Local
C----
      double precision ar1(1),ar2(1)
      integer ldz,info,count, loc

      do 10 count=1,itno
         xhss(count,0) = hssbrg(0,count)
         xhss(count,1) = hssbrg(1,count)
 10   continue

      ldz = 1
      call dstev('n',itno,xhss(1,0),xhss(1,1),
     >     ar1,ldz,ar2,info)
      call force_range(info,0,0,'Finding hess eigenvalues$')

      loc = (itno*(itno-1))/2
      do 20 count=1,itno
         itvals(loc+count) = xhss(count,0)
 20   continue

      return
      end
C================================================================
C     Routines for methods that keep old search directions
C================================================================
C----------------------------------------------------------------
C     Solve the coefficients u_{in+1} for generating the next
C     search direction it+1; this involves it old coefficients
C----------------------------------------------------------------
      subroutine hess_ucol_comp(ucol,ntail, pap,par,ndots,iter,
     >     trace)

C     Arguments
C----
      integer ndots,ntail,iter
      double precision ucol(ntail), pap(*),par(*)
      logical trace

C     Local
C----
      integer count,total

      total = min(ndots,ntail)
      do 10 count=1,total
         ucol(count) = -par(count) / pap(iter-count+1)
 10   continue

      if (trace) call pdar('Hess ucol from pAr$',ucol,total)

      return
      end
C----------------------------------------------------------------
      subroutine full_recur_hescol(hescol,vav_dots,ndots,gh_dots,
     >     hesbrg, r_iter,iter, hess_bw,hess_wd, trace)

C     Arguments
C----
      integer ndots, r_iter,iter, hess_bw,hess_wd
      double precision hescol(*),hesbrg(hess_wd,hess_wd),
     >     vav_dots(*),gh_dots(*)
      logical trace

C     Local
C----
      integer count

      call nulv(hesbrg(1,iter),hess_wd)
      do 10 count=1,ndots
         hesbrg(iter+1-count,iter) = hescol(count+1)
 10   continue
      hesbrg(iter+1,iter) = hescol(1)

      if (trace) call pdar('Hess col (full rec)$',hescol,iter+1)

      return
      end
