#include "blaswrap.h"
#include "f2c.h"

/* Subroutine */ int claqr2_(logical *wantt, logical *wantz, integer *n, 
	integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh,
	 integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer *
	ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, 
	complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, 
	complex *work, integer *lwork)
{
/*  -- LAPACK auxiliary routine (version 3.1) --   
       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..   
       November 2006   


       This subroutine is identical to CLAQR3 except that it avoids   
       recursion by calling CLAHQR instead of CLAQR4.   


       ******************************************************************   
       Aggressive early deflation:   

       This subroutine accepts as input an upper Hessenberg matrix   
       H and performs an unitary similarity transformation   
       designed to detect and deflate fully converged eigenvalues from   
       a trailing principal submatrix.  On output H has been over-   
       written by a new Hessenberg matrix that is a perturbation of   
       an unitary similarity transformation of H.  It is to be   
       hoped that the final version of H has many zero subdiagonal   
       entries.   

       ******************************************************************   
       WANTT   (input) LOGICAL   
            If .TRUE., then the Hessenberg matrix H is fully updated   
            so that the triangular Schur factor may be   
            computed (in cooperation with the calling subroutine).   
            If .FALSE., then only enough of H is updated to preserve   
            the eigenvalues.   

       WANTZ   (input) LOGICAL   
            If .TRUE., then the unitary matrix Z is updated so   
            so that the unitary Schur factor may be computed   
            (in cooperation with the calling subroutine).   
            If .FALSE., then Z is not referenced.   

       N       (input) INTEGER   
            The order of the matrix H and (if WANTZ is .TRUE.) the   
            order of the unitary matrix Z.   

       KTOP    (input) INTEGER   
            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.   
            KBOT and KTOP together determine an isolated block   
            along the diagonal of the Hessenberg matrix.   

       KBOT    (input) INTEGER   
            It is assumed without a check that either   
            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together   
            determine an isolated block along the diagonal of the   
            Hessenberg matrix.   

       NW      (input) INTEGER   
            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).   

       H       (input/output) COMPLEX array, dimension (LDH,N)   
            On input the initial N-by-N section of H stores the   
            Hessenberg matrix undergoing aggressive early deflation.   
            On output H has been transformed by a unitary   
            similarity transformation, perturbed, and the returned   
            to Hessenberg form that (it is to be hoped) has some   
            zero subdiagonal entries.   

       LDH     (input) integer   
            Leading dimension of H just as declared in the calling   
            subroutine.  N .LE. LDH   

       ILOZ    (input) INTEGER   
       IHIZ    (input) INTEGER   
            Specify the rows of Z to which transformations must be   
            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.   

       Z       (input/output) COMPLEX array, dimension (LDZ,IHI)   
            IF WANTZ is .TRUE., then on output, the unitary   
            similarity transformation mentioned above has been   
            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.   
            If WANTZ is .FALSE., then Z is unreferenced.   

       LDZ     (input) integer   
            The leading dimension of Z just as declared in the   
            calling subroutine.  1 .LE. LDZ.   

       NS      (output) integer   
            The number of unconverged (ie approximate) eigenvalues   
            returned in SR and SI that may be used as shifts by the   
            calling subroutine.   

       ND      (output) integer   
            The number of converged eigenvalues uncovered by this   
            subroutine.   

       SH      (output) COMPLEX array, dimension KBOT   
            On output, approximate eigenvalues that may   
            be used for shifts are stored in SH(KBOT-ND-NS+1)   
            through SR(KBOT-ND).  Converged eigenvalues are   
            stored in SH(KBOT-ND+1) through SH(KBOT).   

       V       (workspace) COMPLEX array, dimension (LDV,NW)   
            An NW-by-NW work array.   

       LDV     (input) integer scalar   
            The leading dimension of V just as declared in the   
            calling subroutine.  NW .LE. LDV   

       NH      (input) integer scalar   
            The number of columns of T.  NH.GE.NW.   

       T       (workspace) COMPLEX array, dimension (LDT,NW)   

       LDT     (input) integer   
            The leading dimension of T just as declared in the   
            calling subroutine.  NW .LE. LDT   

       NV      (input) integer   
            The number of rows of work array WV available for   
            workspace.  NV.GE.NW.   

       WV      (workspace) COMPLEX array, dimension (LDWV,NW)   

       LDWV    (input) integer   
            The leading dimension of W just as declared in the   
            calling subroutine.  NW .LE. LDV   

       WORK    (workspace) COMPLEX array, dimension LWORK.   
            On exit, WORK(1) is set to an estimate of the optimal value   
            of LWORK for the given values of N, NW, KTOP and KBOT.   

       LWORK   (input) integer   
            The dimension of the work array WORK.  LWORK = 2*NW   
            suffices, but greater efficiency may result from larger   
            values of LWORK.   

            If LWORK = -1, then a workspace query is assumed; CLAQR2   
            only estimates the optimal workspace size for the given   
            values of N, NW, KTOP and KBOT.  The estimate is returned   
            in WORK(1).  No error message related to LWORK is issued   
            by XERBLA.  Neither H nor Z are accessed.   

       ================================================================   
       Based on contributions by   
          Karen Braman and Ralph Byers, Department of Mathematics,   
          University of Kansas, USA   

       ==================================================================   

       ==== Estimate optimal workspace. ====   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {0.f,0.f};
    static complex c_b2 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static logical c_true = TRUE_;
    
    /* System generated locals */
    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, 
	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2, r__3, r__4, r__5, r__6;
    complex q__1, q__2;
    /* Builtin functions */
    double r_imag(complex *);
    void r_cnjg(complex *, complex *);
    /* Local variables */
    static integer i__, j;
    static complex s;
    static integer jw;
    static real foo;
    static integer kln;
    static complex tau;
    static integer knt;
    static real ulp;
    static integer lwk1, lwk2;
    static complex beta;
    static integer kcol, info, ifst, ilst, ltop, krow;
    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
	    , integer *, complex *, complex *, integer *, complex *), 
	    cgemm_(char *, char *, integer *, integer *, integer *, complex *, 
	     complex *, integer *, complex *, integer *, complex *, complex *, 
	     integer *), ccopy_(integer *, complex *, integer 
	    *, complex *, integer *);
    static integer infqr, kwtop;
    extern /* Subroutine */ int slabad_(real *, real *), cgehrd_(integer *, 
	    integer *, integer *, complex *, integer *, complex *, complex *, 
	    integer *, integer *), clarfg_(integer *, complex *, complex *, 
	    integer *, complex *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int clahqr_(logical *, logical *, integer *, 
	    integer *, integer *, complex *, integer *, complex *, integer *, 
	    integer *, complex *, integer *, integer *), clacpy_(char *, 
	    integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex 
	    *, complex *, integer *);
    static real safmin, safmax;
    extern /* Subroutine */ int cunghr_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, integer *),
	     ctrexc_(char *, integer *, complex *, integer *, complex *, 
	    integer *, integer *, integer *, integer *);
    static real smlnum;
    static integer lwkopt;


    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --sh;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    wv_dim1 = *ldwv;
    wv_offset = 1 + wv_dim1;
    wv -= wv_offset;
    --work;

    /* Function Body   
   Computing MIN */
    i__1 = *nw, i__2 = *kbot - *ktop + 1;
    jw = min(i__1,i__2);
    if (jw <= 2) {
	lwkopt = 1;
    } else {

/*        ==== Workspace query call to CGEHRD ==== */

	i__1 = jw - 1;
	cgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
		c_n1, &info);
	lwk1 = (integer) work[1].r;

/*        ==== Workspace query call to CUNGHR ==== */

	i__1 = jw - 1;
	cunghr_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
		c_n1, &info);
	lwk2 = (integer) work[1].r;

/*        ==== Optimal workspace ==== */

	lwkopt = jw + max(lwk1,lwk2);
    }

/*     ==== Quick return in case of workspace query. ==== */

    if (*lwork == -1) {
	r__1 = (real) lwkopt;
	q__1.r = r__1, q__1.i = 0.f;
	work[1].r = q__1.r, work[1].i = q__1.i;
	return 0;
    }

/*     ==== Nothing to do ...   
       ... for an empty active block ... ==== */
    *ns = 0;
    *nd = 0;
    if (*ktop > *kbot) {
	return 0;
    }
/*     ... nor for an empty deflation window. ==== */
    if (*nw < 1) {
	return 0;
    }

/*     ==== Machine constants ==== */

    safmin = slamch_("SAFE MINIMUM");
    safmax = 1.f / safmin;
    slabad_(&safmin, &safmax);
    ulp = slamch_("PRECISION");
    smlnum = safmin * ((real) (*n) / ulp);

/*     ==== Setup deflation window ====   

   Computing MIN */
    i__1 = *nw, i__2 = *kbot - *ktop + 1;
    jw = min(i__1,i__2);
    kwtop = *kbot - jw + 1;
    if (kwtop == *ktop) {
	s.r = 0.f, s.i = 0.f;
    } else {
	i__1 = kwtop + (kwtop - 1) * h_dim1;
	s.r = h__[i__1].r, s.i = h__[i__1].i;
    }

    if (*kbot == kwtop) {

/*        ==== 1-by-1 deflation window: not much to do ==== */

	i__1 = kwtop;
	i__2 = kwtop + kwtop * h_dim1;
	sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i;
	*ns = 1;
	*nd = 0;
/* Computing MAX */
	i__1 = kwtop + kwtop * h_dim1;
	r__5 = smlnum, r__6 = ulp * ((r__1 = h__[i__1].r, dabs(r__1)) + (r__2 
		= r_imag(&h__[kwtop + kwtop * h_dim1]), dabs(r__2)));
	if ((r__3 = s.r, dabs(r__3)) + (r__4 = r_imag(&s), dabs(r__4)) <= 
		dmax(r__5,r__6)) {
	    *ns = 0;
	    *nd = 1;
	    if (kwtop > *ktop) {
		i__1 = kwtop + (kwtop - 1) * h_dim1;
		h__[i__1].r = 0.f, h__[i__1].i = 0.f;
	    }
	}
	return 0;
    }

/*     ==== Convert to spike-triangular form.  (In case of a   
       .    rare QR failure, this routine continues to do   
       .    aggressive early deflation using that part of   
       .    the deflation window that converged using INFQR   
       .    here and there to keep track.) ==== */

    clacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], 
	    ldt);
    i__1 = jw - 1;
    i__2 = *ldh + 1;
    i__3 = *ldt + 1;
    ccopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
	    i__3);

    claset_("A", &jw, &jw, &c_b1, &c_b2, &v[v_offset], ldv);
    clahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[kwtop], 
	    &c__1, &jw, &v[v_offset], ldv, &infqr);

/*     ==== Deflation detection loop ==== */

    *ns = jw;
    ilst = infqr + 1;
    i__1 = jw;
    for (knt = infqr + 1; knt <= i__1; ++knt) {

/*        ==== Small spike tip deflation test ==== */

	i__2 = *ns + *ns * t_dim1;
	foo = (r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[*ns + *ns * 
		t_dim1]), dabs(r__2));
	if (foo == 0.f) {
	    foo = (r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2));
	}
	i__2 = *ns * v_dim1 + 1;
/* Computing MAX */
	r__5 = smlnum, r__6 = ulp * foo;
	if (((r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2))) * ((
		r__3 = v[i__2].r, dabs(r__3)) + (r__4 = r_imag(&v[*ns * 
		v_dim1 + 1]), dabs(r__4))) <= dmax(r__5,r__6)) {

/*           ==== One more converged eigenvalue ==== */

	    --(*ns);
	} else {

/*           ==== One undflatable eigenvalue.  Move it up out of the   
             .    way.   (CTREXC can not fail in this case.) ==== */

	    ifst = *ns;
	    ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &
		    ilst, &info);
	    ++ilst;
	}
/* L10: */
    }

/*        ==== Return to Hessenberg form ==== */

    if (*ns == 0) {
	s.r = 0.f, s.i = 0.f;
    }

    if (*ns < jw) {

/*        ==== sorting the diagonal of T improves accuracy for   
          .    graded matrices.  ==== */

	i__1 = *ns;
	for (i__ = infqr + 1; i__ <= i__1; ++i__) {
	    ifst = i__;
	    i__2 = *ns;
	    for (j = i__ + 1; j <= i__2; ++j) {
		i__3 = j + j * t_dim1;
		i__4 = ifst + ifst * t_dim1;
		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[j + j *
			 t_dim1]), dabs(r__2)) > (r__3 = t[i__4].r, dabs(r__3) 			) + (r__4 = r_imag(&t[ifst + ifst * t_dim1]), dabs(
			r__4))) {
		    ifst = j;
		}
/* L20: */
	    }
	    ilst = i__;
	    if (ifst != ilst) {
		ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
			 &ilst, &info);
	    }
/* L30: */
	}
    }

/*     ==== Restore shift/eigenvalue array from T ==== */

    i__1 = jw;
    for (i__ = infqr + 1; i__ <= i__1; ++i__) {
	i__2 = kwtop + i__ - 1;
	i__3 = i__ + i__ * t_dim1;
	sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i;
/* L40: */
    }


    if (*ns < jw || s.r == 0.f && s.i == 0.f) {
	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {

/*           ==== Reflect spike back into lower triangle ==== */

	    ccopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
	    i__1 = *ns;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		i__2 = i__;
		r_cnjg(&q__1, &work[i__]);
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L50: */
	    }
	    beta.r = work[1].r, beta.i = work[1].i;
	    clarfg_(ns, &beta, &work[2], &c__1, &tau);
	    work[1].r = 1.f, work[1].i = 0.f;

	    i__1 = jw - 2;
	    i__2 = jw - 2;
	    claset_("L", &i__1, &i__2, &c_b1, &c_b1, &t[t_dim1 + 3], ldt);

	    r_cnjg(&q__1, &tau);
	    clarf_("L", ns, &jw, &work[1], &c__1, &q__1, &t[t_offset], ldt, &
		    work[jw + 1]);
	    clarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
		    work[jw + 1]);
	    clarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
		    work[jw + 1]);

	    i__1 = *lwork - jw;
	    cgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
		    , &i__1, &info);
	}

/*        ==== Copy updated reduced window into place ==== */

	if (kwtop > 1) {
	    i__1 = kwtop + (kwtop - 1) * h_dim1;
	    r_cnjg(&q__2, &v[v_dim1 + 1]);
	    q__1.r = s.r * q__2.r - s.i * q__2.i, q__1.i = s.r * q__2.i + s.i 
		    * q__2.r;
	    h__[i__1].r = q__1.r, h__[i__1].i = q__1.i;
	}
	clacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
		, ldh);
	i__1 = jw - 1;
	i__2 = *ldt + 1;
	i__3 = *ldh + 1;
	ccopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], 
		 &i__3);

/*        ==== Accumulate orthogonal matrix in order update   
          .    H and Z, if requested.  (A modified version   
          .    of  CUNGHR that accumulates block Householder   
          .    transformations into V directly might be   
          .    marginally more efficient than the following.) ==== */

	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {
	    i__1 = *lwork - jw;
	    cunghr_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
		    , &i__1, &info);
	    cgemm_("N", "N", &jw, ns, ns, &c_b2, &v[v_offset], ldv, &t[
		    t_offset], ldt, &c_b1, &wv[wv_offset], ldwv);
	    clacpy_("A", &jw, ns, &wv[wv_offset], ldwv, &v[v_offset], ldv);
	}

/*        ==== Update vertical slab in H ==== */

	if (*wantt) {
	    ltop = 1;
	} else {
	    ltop = *ktop;
	}
	i__1 = kwtop - 1;
	i__2 = *nv;
	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += 
		i__2) {
/* Computing MIN */
	    i__3 = *nv, i__4 = kwtop - krow;
	    kln = min(i__3,i__4);
	    cgemm_("N", "N", &kln, &jw, &jw, &c_b2, &h__[krow + kwtop * 
		    h_dim1], ldh, &v[v_offset], ldv, &c_b1, &wv[wv_offset], 
		    ldwv);
	    clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * 
		    h_dim1], ldh);
/* L60: */
	}

/*        ==== Update horizontal slab in H ==== */

	if (*wantt) {
	    i__2 = *n;
	    i__1 = *nh;
	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; 
		    kcol += i__1) {
/* Computing MIN */
		i__3 = *nh, i__4 = *n - kcol + 1;
		kln = min(i__3,i__4);
		cgemm_("C", "N", &jw, &kln, &jw, &c_b2, &v[v_offset], ldv, &
			h__[kwtop + kcol * h_dim1], ldh, &c_b1, &t[t_offset], 
			ldt);
		clacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
			 h_dim1], ldh);
/* L70: */
	    }
	}

/*        ==== Update vertical slab in Z ==== */

	if (*wantz) {
	    i__1 = *ihiz;
	    i__2 = *nv;
	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
		     i__2) {
/* Computing MIN */
		i__3 = *nv, i__4 = *ihiz - krow + 1;
		kln = min(i__3,i__4);
		cgemm_("N", "N", &kln, &jw, &jw, &c_b2, &z__[krow + kwtop * 
			z_dim1], ldz, &v[v_offset], ldv, &c_b1, &wv[wv_offset]
, ldwv);
		clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + 
			kwtop * z_dim1], ldz);
/* L80: */
	    }
	}
    }

/*     ==== Return the number of deflations ... ==== */

    *nd = jw - *ns;

/*     ==== ... and the number of shifts. (Subtracting   
       .    INFQR from the spike length takes care   
       .    of the case of a rare QR failure while   
       .    calculating eigenvalues of the deflation   
       .    window.)  ==== */

    *ns -= infqr;

/*      ==== Return optimal workspace. ==== */

    r__1 = (real) lwkopt;
    q__1.r = r__1, q__1.i = 0.f;
    work[1].r = q__1.r, work[1].i = q__1.i;

/*     ==== End of CLAQR2 ==== */

    return 0;
} /* claqr2_ */