df/d4a/dbbcsd_8f_source.html

*> \brief \b DBBCSD

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download DBBCSD + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dbbcsd.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dbbcsd.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dbbcsd.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,

*                          THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,

*                          V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,

*                          B22D, B22E, WORK, LWORK, INFO )

*

*       .. Scalar Arguments ..

*       CHARACTER          JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS

*       INTEGER            INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q

*       ..

*       .. Array Arguments ..

*       DOUBLE PRECISION   B11D( * ), B11E( * ), B12D( * ), B12E( * ),

*      $                   B21D( * ), B21E( * ), B22D( * ), B22E( * ),

*      $                   PHI( * ), THETA( * ), WORK( * )

*       DOUBLE PRECISION   U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),

*      $                   V2T( LDV2T, * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> DBBCSD computes the CS decomposition of an orthogonal matrix in

*> bidiagonal-block form,

*>

*>

*>     [ B11 | B12 0  0 ]

*>     [  0  |  0 -I  0 ]

*> X = [----------------]

*>     [ B21 | B22 0  0 ]

*>     [  0  |  0  0  I ]

*>

*>                               [  C | -S  0  0 ]

*>                   [ U1 |    ] [  0 |  0 -I  0 ] [ V1 |    ]**T

*>                 = [---------] [---------------] [---------]   .

*>                   [    | U2 ] [  S |  C  0  0 ] [    | V2 ]

*>                               [  0 |  0  0  I ]

*>

*> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger

*> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be

*> transposed and/or permuted. This can be done in constant time using

*> the TRANS and SIGNS options. See DORCSD for details.)

*>

*> The bidiagonal matrices B11, B12, B21, and B22 are represented

*> implicitly by angles THETA(1:Q) and PHI(1:Q-1).

*>

*> The orthogonal matrices U1, U2, V1T, and V2T are input/output.

*> The input matrices are pre- or post-multiplied by the appropriate

*> singular vector matrices.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] JOBU1

*> \verbatim

*>          JOBU1 is CHARACTER

*>          = 'Y':      U1 is updated;

*>          otherwise:  U1 is not updated.

*> \endverbatim

*>

*> \param[in] JOBU2

*> \verbatim

*>          JOBU2 is CHARACTER

*>          = 'Y':      U2 is updated;

*>          otherwise:  U2 is not updated.

*> \endverbatim

*>

*> \param[in] JOBV1T

*> \verbatim

*>          JOBV1T is CHARACTER

*>          = 'Y':      V1T is updated;

*>          otherwise:  V1T is not updated.

*> \endverbatim

*>

*> \param[in] JOBV2T

*> \verbatim

*>          JOBV2T is CHARACTER

*>          = 'Y':      V2T is updated;

*>          otherwise:  V2T is not updated.

*> \endverbatim

*>

*> \param[in] TRANS

*> \verbatim

*>          TRANS is CHARACTER

*>          = 'T':      X, U1, U2, V1T, and V2T are stored in row-major

*>                      order;

*>          otherwise:  X, U1, U2, V1T, and V2T are stored in column-

*>                      major order.

*> \endverbatim

*>

*> \param[in] M

*> \verbatim

*>          M is INTEGER

*>          The number of rows and columns in X, the orthogonal matrix in

*>          bidiagonal-block form.

*> \endverbatim

*>

*> \param[in] P

*> \verbatim

*>          P is INTEGER

*>          The number of rows in the top-left block of X. 0 <= P <= M.

*> \endverbatim

*>

*> \param[in] Q

*> \verbatim

*>          Q is INTEGER

*>          The number of columns in the top-left block of X.

*>          0 <= Q <= MIN(P,M-P,M-Q).

*> \endverbatim

*>

*> \param[in,out] THETA

*> \verbatim

*>          THETA is DOUBLE PRECISION array, dimension (Q)

*>          On entry, the angles THETA(1),...,THETA(Q) that, along with

*>          PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block

*>          form. On exit, the angles whose cosines and sines define the

*>          diagonal blocks in the CS decomposition.

*> \endverbatim

*>

*> \param[in,out] PHI

*> \verbatim

*>          PHI is DOUBLE PRECISION array, dimension (Q-1)

*>          The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,

*>          THETA(Q), define the matrix in bidiagonal-block form.

*> \endverbatim

*>

*> \param[in,out] U1

*> \verbatim

*>          U1 is DOUBLE PRECISION array, dimension (LDU1,P)

*>          On entry, an LDU1-by-P matrix. On exit, U1 is postmultiplied

*>          by the left singular vector matrix common to [ B11 ; 0 ] and

*>          [ B12 0 0 ; 0 -I 0 0 ].

*> \endverbatim

*>

*> \param[in] LDU1

*> \verbatim

*>          LDU1 is INTEGER

*>          The leading dimension of the array U1.

*> \endverbatim

*>

*> \param[in,out] U2

*> \verbatim

*>          U2 is DOUBLE PRECISION array, dimension (LDU2,M-P)

*>          On entry, an LDU2-by-(M-P) matrix. On exit, U2 is

*>          postmultiplied by the left singular vector matrix common to

*>          [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].

*> \endverbatim

*>

*> \param[in] LDU2

*> \verbatim

*>          LDU2 is INTEGER

*>          The leading dimension of the array U2.

*> \endverbatim

*>

*> \param[in,out] V1T

*> \verbatim

*>          V1T is DOUBLE PRECISION array, dimension (LDV1T,Q)

*>          On entry, a LDV1T-by-Q matrix. On exit, V1T is premultiplied

*>          by the transpose of the right singular vector

*>          matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].

*> \endverbatim

*>

*> \param[in] LDV1T

*> \verbatim

*>          LDV1T is INTEGER

*>          The leading dimension of the array V1T.

*> \endverbatim

*>

*> \param[in,out] V2T

*> \verbatim

*>          V2T is DOUBLE PRECISION array, dimenison (LDV2T,M-Q)

*>          On entry, a LDV2T-by-(M-Q) matrix. On exit, V2T is

*>          premultiplied by the transpose of the right

*>          singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and

*>          [ B22 0 0 ; 0 0 I ].

*> \endverbatim

*>

*> \param[in] LDV2T

*> \verbatim

*>          LDV2T is INTEGER

*>          The leading dimension of the array V2T.

*> \endverbatim

*>

*> \param[out] B11D

*> \verbatim

*>          B11D is DOUBLE PRECISION array, dimension (Q)

*>          When DBBCSD converges, B11D contains the cosines of THETA(1),

*>          ..., THETA(Q). If DBBCSD fails to converge, then B11D

*>          contains the diagonal of the partially reduced top-left

*>          block.

*> \endverbatim

*>

*> \param[out] B11E

*> \verbatim

*>          B11E is DOUBLE PRECISION array, dimension (Q-1)

*>          When DBBCSD converges, B11E contains zeros. If DBBCSD fails

*>          to converge, then B11E contains the superdiagonal of the

*>          partially reduced top-left block.

*> \endverbatim

*>

*> \param[out] B12D

*> \verbatim

*>          B12D is DOUBLE PRECISION array, dimension (Q)

*>          When DBBCSD converges, B12D contains the negative sines of

*>          THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then

*>          B12D contains the diagonal of the partially reduced top-right

*>          block.

*> \endverbatim

*>

*> \param[out] B12E

*> \verbatim

*>          B12E is DOUBLE PRECISION array, dimension (Q-1)

*>          When DBBCSD converges, B12E contains zeros. If DBBCSD fails

*>          to converge, then B12E contains the subdiagonal of the

*>          partially reduced top-right block.

*> \endverbatim

*>

*> \param[out] B21D

*> \verbatim

*>          B21D is DOUBLE PRECISION  array, dimension (Q)

*>          When CBBCSD converges, B21D contains the negative sines of

*>          THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then

*>          B21D contains the diagonal of the partially reduced bottom-left

*>          block.

*> \endverbatim

*>

*> \param[out] B21E

*> \verbatim

*>          B21E is DOUBLE PRECISION  array, dimension (Q-1)

*>          When CBBCSD converges, B21E contains zeros. If CBBCSD fails

*>          to converge, then B21E contains the subdiagonal of the

*>          partially reduced bottom-left block.

*> \endverbatim

*>

*> \param[out] B22D

*> \verbatim

*>          B22D is DOUBLE PRECISION  array, dimension (Q)

*>          When CBBCSD converges, B22D contains the negative sines of

*>          THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then

*>          B22D contains the diagonal of the partially reduced bottom-right

*>          block.

*> \endverbatim

*>

*> \param[out] B22E

*> \verbatim

*>          B22E is DOUBLE PRECISION  array, dimension (Q-1)

*>          When CBBCSD converges, B22E contains zeros. If CBBCSD fails

*>          to converge, then B22E contains the subdiagonal of the

*>          partially reduced bottom-right block.

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))

*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

*> \endverbatim

*>

*> \param[in] LWORK

*> \verbatim

*>          LWORK is INTEGER

*>          The dimension of the array WORK. LWORK >= MAX(1,8*Q).

*>

*>          If LWORK = -1, then a workspace query is assumed; the

*>          routine only calculates the optimal size of the WORK array,

*>          returns this value as the first entry of the work array, and

*>          no error message related to LWORK is issued by XERBLA.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit.

*>          < 0:  if INFO = -i, the i-th argument had an illegal value.

*>          > 0:  if DBBCSD did not converge, INFO specifies the number

*>                of nonzero entries in PHI, and B11D, B11E, etc.,

*>                contain the partially reduced matrix.

*> \endverbatim

*

*> \par Internal Parameters:

*  =========================

*>

*> \verbatim

*>  TOLMUL  DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))

*>          TOLMUL controls the convergence criterion of the QR loop.

*>          Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they

*>          are within TOLMUL*EPS of either bound.

*> \endverbatim

*

*> \par References:

*  ================

*>

*>  [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.

*>      Algorithms, 50(1):33-65, 2009.

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \date November 2011

*

*> \ingroup doubleOTHERcomputational

*

*  =====================================================================

      SUBROUTINE dbbcsd( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,

     $                   theta, phi, u1, ldu1, u2, ldu2, v1t, ldv1t,

     $                   v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,

     $                   b22d, b22e, work, lwork, info )

*

*  -- LAPACK computational routine (version 3.4.0) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*     November 2011

*

*     .. Scalar Arguments ..

      CHARACTER          jobu1, jobu2, jobv1t, jobv2t, trans

      INTEGER            info, ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION   b11d( * ), b11e( * ), b12d( * ), b12e( * ),

     $                   b21d( * ), b21e( * ), b22d( * ), b22e( * ),

     $                   phi( * ), theta( * ), work( * )

      DOUBLE PRECISION   u1( ldu1, * ), u2( ldu2, * ), v1t( ldv1t, * ),

     $                   v2t( ldv2t, * )

*     ..

*

*  ===================================================================

*

*     .. Parameters ..

      INTEGER            maxitr

      parameter( maxitr = 6 )

      DOUBLE PRECISION   hundred, meighth, one, piover2, ten, zero

      parameter( hundred = 100.0d0, meighth = -0.125d0,

     $                     one = 1.0d0, piover2 = 1.57079632679489662d0,

     $                     ten = 10.0d0, zero = 0.0d0 )

      DOUBLE PRECISION   negonecomplex

      parameter( negonecomplex = -1.0d0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            colmajor, lquery, restart11, restart12,

     $                   restart21, restart22, wantu1, wantu2, wantv1t,

     $                   wantv2t

      INTEGER            i, imin, imax, iter, iu1cs, iu1sn, iu2cs,

     $                   iu2sn, iv1tcs, iv1tsn, iv2tcs, iv2tsn, j,

     $                   lworkmin, lworkopt, maxit, mini

      DOUBLE PRECISION   b11bulge, b12bulge, b21bulge, b22bulge, dummy,

     $                   eps, mu, nu, r, sigma11, sigma21,

     $                   temp, thetamax, thetamin, thresh, tol, tolmul,

     $                   unfl, x1, x2, y1, y2

*

*     .. External Subroutines ..

      EXTERNAL           dlasr, dscal, dswap, dlartgp, dlartgs, dlas2,

     $                   xerbla

*     ..

*     .. External Functions ..

      DOUBLE PRECISION   dlamch

      LOGICAL            lsame

      EXTERNAL           lsame, dlamch

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, atan2, cos, max, min, sin, sqrt

*     ..

*     .. Executable Statements ..

*

*     Test input arguments

*

      info = 0

      lquery = lwork .EQ. -1

      wantu1 = lsame( jobu1, 'Y' )

      wantu2 = lsame( jobu2, 'Y' )

      wantv1t = lsame( jobv1t, 'Y' )

      wantv2t = lsame( jobv2t, 'Y' )

      colmajor = .NOT. lsame( trans, 'T' )

*

      IF( m .LT. 0 ) THEN

         info = -6

      ELSE IF( p .LT. 0 .OR. p .GT. m ) THEN

         info = -7

      ELSE IF( q .LT. 0 .OR. q .GT. m ) THEN

         info = -8

      ELSE IF( q .GT. p .OR. q .GT. m-p .OR. q .GT. m-q ) THEN

         info = -8

      ELSE IF( wantu1 .AND. ldu1 .LT. p ) THEN

         info = -12

      ELSE IF( wantu2 .AND. ldu2 .LT. m-p ) THEN

         info = -14

      ELSE IF( wantv1t .AND. ldv1t .LT. q ) THEN

         info = -16

      ELSE IF( wantv2t .AND. ldv2t .LT. m-q ) THEN

         info = -18

      END IF

*

*     Quick return if Q = 0

*

      IF( info .EQ. 0 .AND. q .EQ. 0 ) THEN

         lworkmin = 1

         work(1) = lworkmin

         return

      END IF

*

*     Compute workspace

*

      IF( info .EQ. 0 ) THEN

         iu1cs = 1

         iu1sn = iu1cs + q

         iu2cs = iu1sn + q

         iu2sn = iu2cs + q

         iv1tcs = iu2sn + q

         iv1tsn = iv1tcs + q

         iv2tcs = iv1tsn + q

         iv2tsn = iv2tcs + q

         lworkopt = iv2tsn + q - 1

         lworkmin = lworkopt

         work(1) = lworkopt

         IF( lwork .LT. lworkmin .AND. .NOT. lquery ) THEN

            info = -28

         END IF

      END IF

*

      IF( info .NE. 0 ) THEN

         CALL xerbla( 'DBBCSD', -info )

         return

      ELSE IF( lquery ) THEN

         return

      END IF

*

*     Get machine constants

*

      eps = dlamch( 'Epsilon' )

      unfl = dlamch( 'Safe minimum' )

      tolmul = max( ten, min( hundred, eps**meighth ) )

      tol = tolmul*eps

      thresh = max( tol, maxitr*q*q*unfl )

*

*     Test for negligible sines or cosines

*

      DO i = 1, q

         IF( theta(i) .LT. thresh ) THEN

            theta(i) = zero

         ELSE IF( theta(i) .GT. piover2-thresh ) THEN

            theta(i) = piover2

         END IF

      END DO

      DO i = 1, q-1

         IF( phi(i) .LT. thresh ) THEN

            phi(i) = zero

         ELSE IF( phi(i) .GT. piover2-thresh ) THEN

            phi(i) = piover2

         END IF

      END DO

*

*     Initial deflation

*

      imax = q

      DO WHILE( ( imax .GT. 1 ) .AND. ( phi(imax-1) .EQ. zero ) )

         imax = imax - 1

      END DO

      imin = imax - 1

      IF  ( imin .GT. 1 ) THEN

         DO WHILE( phi(imin-1) .NE. zero )

            imin = imin - 1

            IF  ( imin .LE. 1 ) exit

         END DO

      END IF

*

*     Initialize iteration counter

*

      maxit = maxitr*q*q

      iter = 0

*

*     Begin main iteration loop

*

      DO WHILE( imax .GT. 1 )

*

*        Compute the matrix entries

*

         b11d(imin) = cos( theta(imin) )

         b21d(imin) = -sin( theta(imin) )

         DO i = imin, imax - 1

            b11e(i) = -sin( theta(i) ) * sin( phi(i) )

            b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) )

            b12d(i) = sin( theta(i) ) * cos( phi(i) )

            b12e(i) = cos( theta(i+1) ) * sin( phi(i) )

            b21e(i) = -cos( theta(i) ) * sin( phi(i) )

            b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) )

            b22d(i) = cos( theta(i) ) * cos( phi(i) )

            b22e(i) = -sin( theta(i+1) ) * sin( phi(i) )

         END DO

         b12d(imax) = sin( theta(imax) )

         b22d(imax) = cos( theta(imax) )

*

*        Abort if not converging; otherwise, increment ITER

*

         IF( iter .GT. maxit ) THEN

            info = 0

            DO i = 1, q

               IF( phi(i) .NE. zero )

     $            info = info + 1

            END DO

            return

         END IF

*

         iter = iter + imax - imin

*

*        Compute shifts

*

         thetamax = theta(imin)

         thetamin = theta(imin)

         DO i = imin+1, imax

            IF( theta(i) > thetamax )

     $         thetamax = theta(i)

            IF( theta(i) < thetamin )

     $         thetamin = theta(i)

         END DO

*

         IF( thetamax .GT. piover2 - thresh ) THEN

*

*           Zero on diagonals of B11 and B22; induce deflation with a

*           zero shift

*

            mu = zero

            nu = one

*

         ELSE IF( thetamin .LT. thresh ) THEN

*

*           Zero on diagonals of B12 and B22; induce deflation with a

*           zero shift

*

            mu = one

            nu = zero

*

         ELSE

*

*           Compute shifts for B11 and B21 and use the lesser

*

            CALL dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,

     $                  dummy )

            CALL dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,

     $                  dummy )

*

            IF( sigma11 .LE. sigma21 ) THEN

               mu = sigma11

               nu = sqrt( one - mu**2 )

               IF( mu .LT. thresh ) THEN

                  mu = zero

                  nu = one

               END IF

            ELSE

               nu = sigma21

               mu = sqrt( 1.0 - nu**2 )

               IF( nu .LT. thresh ) THEN

                  mu = one

                  nu = zero

               END IF

            END IF

         END IF

*

*        Rotate to produce bulges in B11 and B21

*

         IF( mu .LE. nu ) THEN

            CALL dlartgs( b11d(imin), b11e(imin), mu,

     $                    work(iv1tcs+imin-1), work(iv1tsn+imin-1) )

         ELSE

            CALL dlartgs( b21d(imin), b21e(imin), nu,

     $                    work(iv1tcs+imin-1), work(iv1tsn+imin-1) )

         END IF

*

         temp = work(iv1tcs+imin-1)*b11d(imin) +

     $          work(iv1tsn+imin-1)*b11e(imin)

         b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -

     $                work(iv1tsn+imin-1)*b11d(imin)

         b11d(imin) = temp

         b11bulge = work(iv1tsn+imin-1)*b11d(imin+1)

         b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1)

         temp = work(iv1tcs+imin-1)*b21d(imin) +

     $          work(iv1tsn+imin-1)*b21e(imin)

         b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -

     $                work(iv1tsn+imin-1)*b21d(imin)

         b21d(imin) = temp

         b21bulge = work(iv1tsn+imin-1)*b21d(imin+1)

         b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1)

*

*        Compute THETA(IMIN)

*

         theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),

     $                   sqrt( b11d(imin)**2+b11bulge**2 ) )

*

*        Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)

*

         IF( b11d(imin)**2+b11bulge**2 .GT. thresh**2 ) THEN

            CALL dlartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),

     $                    work(iu1cs+imin-1), r )

         ELSE IF( mu .LE. nu ) THEN

            CALL dlartgs( b11e( imin ), b11d( imin + 1 ), mu,

     $                    work(iu1cs+imin-1), work(iu1sn+imin-1) )

         ELSE

            CALL dlartgs( b12d( imin ), b12e( imin ), nu,

     $                    work(iu1cs+imin-1), work(iu1sn+imin-1) )

         END IF

         IF( b21d(imin)**2+b21bulge**2 .GT. thresh**2 ) THEN

            CALL dlartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),

     $                    work(iu2cs+imin-1), r )

         ELSE IF( nu .LT. mu ) THEN

            CALL dlartgs( b21e( imin ), b21d( imin + 1 ), nu,

     $                    work(iu2cs+imin-1), work(iu2sn+imin-1) )

         ELSE

            CALL dlartgs( b22d(imin), b22e(imin), mu,

     $                    work(iu2cs+imin-1), work(iu2sn+imin-1) )

         END IF

         work(iu2cs+imin-1) = -work(iu2cs+imin-1)

         work(iu2sn+imin-1) = -work(iu2sn+imin-1)

*

         temp = work(iu1cs+imin-1)*b11e(imin) +

     $          work(iu1sn+imin-1)*b11d(imin+1)

         b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -

     $                  work(iu1sn+imin-1)*b11e(imin)

         b11e(imin) = temp

         IF( imax .GT. imin+1 ) THEN

            b11bulge = work(iu1sn+imin-1)*b11e(imin+1)

            b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1)

         END IF

         temp = work(iu1cs+imin-1)*b12d(imin) +

     $          work(iu1sn+imin-1)*b12e(imin)

         b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -

     $                work(iu1sn+imin-1)*b12d(imin)

         b12d(imin) = temp

         b12bulge = work(iu1sn+imin-1)*b12d(imin+1)

         b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1)

         temp = work(iu2cs+imin-1)*b21e(imin) +

     $          work(iu2sn+imin-1)*b21d(imin+1)

         b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -

     $                  work(iu2sn+imin-1)*b21e(imin)

         b21e(imin) = temp

         IF( imax .GT. imin+1 ) THEN

            b21bulge = work(iu2sn+imin-1)*b21e(imin+1)

            b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1)

         END IF

         temp = work(iu2cs+imin-1)*b22d(imin) +

     $          work(iu2sn+imin-1)*b22e(imin)

         b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -

     $                work(iu2sn+imin-1)*b22d(imin)

         b22d(imin) = temp

         b22bulge = work(iu2sn+imin-1)*b22d(imin+1)

         b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1)

*

*        Inner loop: chase bulges from B11(IMIN,IMIN+2),

*        B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to

*        bottom-right

*

         DO i = imin+1, imax-1

*

*           Compute PHI(I-1)

*

            x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1)

            x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge

            y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1)

            y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge

*

            phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) )

*

*           Determine if there are bulges to chase or if a new direct

*           summand has been reached

*

            restart11 = b11e(i-1)**2 + b11bulge**2 .LE. thresh**2

            restart21 = b21e(i-1)**2 + b21bulge**2 .LE. thresh**2

            restart12 = b12d(i-1)**2 + b12bulge**2 .LE. thresh**2

            restart22 = b22d(i-1)**2 + b22bulge**2 .LE. thresh**2

*

*           If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),

*           B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-

*           chasing by applying the original shift again.

*

            IF( .NOT. restart11 .AND. .NOT. restart21 ) THEN

               CALL dlartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),

     $                       r )

            ELSE IF( .NOT. restart11 .AND. restart21 ) THEN

               CALL dlartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),

     $                       work(iv1tcs+i-1), r )

            ELSE IF( restart11 .AND. .NOT. restart21 ) THEN

               CALL dlartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),

     $                       work(iv1tcs+i-1), r )

            ELSE IF( mu .LE. nu ) THEN

               CALL dlartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),

     $                       work(iv1tsn+i-1) )

            ELSE

               CALL dlartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),

     $                       work(iv1tsn+i-1) )

            END IF

            work(iv1tcs+i-1) = -work(iv1tcs+i-1)

            work(iv1tsn+i-1) = -work(iv1tsn+i-1)

            IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN

               CALL dlartgp( y2, y1, work(iv2tsn+i-1-1),

     $                       work(iv2tcs+i-1-1), r )

            ELSE IF( .NOT. restart12 .AND. restart22 ) THEN

               CALL dlartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),

     $                       work(iv2tcs+i-1-1), r )

            ELSE IF( restart12 .AND. .NOT. restart22 ) THEN

               CALL dlartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),

     $                       work(iv2tcs+i-1-1), r )

            ELSE IF( nu .LT. mu ) THEN

               CALL dlartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),

     $                       work(iv2tsn+i-1-1) )

            ELSE

               CALL dlartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),

     $                       work(iv2tsn+i-1-1) )

            END IF

*

            temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i)

            b11e(i) = work(iv1tcs+i-1)*b11e(i) -

     $                work(iv1tsn+i-1)*b11d(i)

            b11d(i) = temp

            b11bulge = work(iv1tsn+i-1)*b11d(i+1)

            b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1)

            temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i)

            b21e(i) = work(iv1tcs+i-1)*b21e(i) -

     $                work(iv1tsn+i-1)*b21d(i)

            b21d(i) = temp

            b21bulge = work(iv1tsn+i-1)*b21d(i+1)

            b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1)

            temp = work(iv2tcs+i-1-1)*b12e(i-1) +

     $             work(iv2tsn+i-1-1)*b12d(i)

            b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -

     $                work(iv2tsn+i-1-1)*b12e(i-1)

            b12e(i-1) = temp

            b12bulge = work(iv2tsn+i-1-1)*b12e(i)

            b12e(i) = work(iv2tcs+i-1-1)*b12e(i)

            temp = work(iv2tcs+i-1-1)*b22e(i-1) +

     $             work(iv2tsn+i-1-1)*b22d(i)

            b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -

     $                work(iv2tsn+i-1-1)*b22e(i-1)

            b22e(i-1) = temp

            b22bulge = work(iv2tsn+i-1-1)*b22e(i)

            b22e(i) = work(iv2tcs+i-1-1)*b22e(i)

*

*           Compute THETA(I)

*

            x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1)

            x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge

            y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1)

            y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge

*

            theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) )

*

*           Determine if there are bulges to chase or if a new direct

*           summand has been reached

*

            restart11 =   b11d(i)**2 + b11bulge**2 .LE. thresh**2

            restart12 = b12e(i-1)**2 + b12bulge**2 .LE. thresh**2

            restart21 =   b21d(i)**2 + b21bulge**2 .LE. thresh**2

            restart22 = b22e(i-1)**2 + b22bulge**2 .LE. thresh**2

*

*           If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),

*           B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-

*           chasing by applying the original shift again.

*

            IF( .NOT. restart11 .AND. .NOT. restart12 ) THEN

               CALL dlartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),

     $                       r )

            ELSE IF( .NOT. restart11 .AND. restart12 ) THEN

               CALL dlartgp( b11bulge, b11d(i), work(iu1sn+i-1),

     $                       work(iu1cs+i-1), r )

            ELSE IF( restart11 .AND. .NOT. restart12 ) THEN

               CALL dlartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),

     $                       work(iu1cs+i-1), r )

            ELSE IF( mu .LE. nu ) THEN

               CALL dlartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),

     $                       work(iu1sn+i-1) )

            ELSE

               CALL dlartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),

     $                       work(iu1sn+i-1) )

            END IF

            IF( .NOT. restart21 .AND. .NOT. restart22 ) THEN

               CALL dlartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),

     $                       r )

            ELSE IF( .NOT. restart21 .AND. restart22 ) THEN

               CALL dlartgp( b21bulge, b21d(i), work(iu2sn+i-1),

     $                       work(iu2cs+i-1), r )

            ELSE IF( restart21 .AND. .NOT. restart22 ) THEN

               CALL dlartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),

     $                       work(iu2cs+i-1), r )

            ELSE IF( nu .LT. mu ) THEN

               CALL dlartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),

     $                       work(iu2sn+i-1) )

            ELSE

               CALL dlartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),

     $                       work(iu2sn+i-1) )

            END IF

            work(iu2cs+i-1) = -work(iu2cs+i-1)

            work(iu2sn+i-1) = -work(iu2sn+i-1)

*

            temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1)

            b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -

     $                  work(iu1sn+i-1)*b11e(i)

            b11e(i) = temp

            IF( i .LT. imax - 1 ) THEN

               b11bulge = work(iu1sn+i-1)*b11e(i+1)

               b11e(i+1) = work(iu1cs+i-1)*b11e(i+1)

            END IF

            temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1)

            b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -

     $                  work(iu2sn+i-1)*b21e(i)

            b21e(i) = temp

            IF( i .LT. imax - 1 ) THEN

               b21bulge = work(iu2sn+i-1)*b21e(i+1)

               b21e(i+1) = work(iu2cs+i-1)*b21e(i+1)

            END IF

            temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i)

            b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i)

            b12d(i) = temp

            b12bulge = work(iu1sn+i-1)*b12d(i+1)

            b12d(i+1) = work(iu1cs+i-1)*b12d(i+1)

            temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i)

            b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i)

            b22d(i) = temp

            b22bulge = work(iu2sn+i-1)*b22d(i+1)

            b22d(i+1) = work(iu2cs+i-1)*b22d(i+1)

*

         END DO

*

*        Compute PHI(IMAX-1)

*

         x1 = sin(theta(imax-1))*b11e(imax-1) +

     $        cos(theta(imax-1))*b21e(imax-1)

         y1 = sin(theta(imax-1))*b12d(imax-1) +

     $        cos(theta(imax-1))*b22d(imax-1)

         y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge

*

         phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) )

*

*        Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)

*

         restart12 = b12d(imax-1)**2 + b12bulge**2 .LE. thresh**2

         restart22 = b22d(imax-1)**2 + b22bulge**2 .LE. thresh**2

*

         IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN

            CALL dlartgp( y2, y1, work(iv2tsn+imax-1-1),

     $                    work(iv2tcs+imax-1-1), r )

         ELSE IF( .NOT. restart12 .AND. restart22 ) THEN

            CALL dlartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),

     $                    work(iv2tcs+imax-1-1), r )

         ELSE IF( restart12 .AND. .NOT. restart22 ) THEN

            CALL dlartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),

     $                    work(iv2tcs+imax-1-1), r )

         ELSE IF( nu .LT. mu ) THEN

            CALL dlartgs( b12e(imax-1), b12d(imax), nu,

     $                    work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )

         ELSE

            CALL dlartgs( b22e(imax-1), b22d(imax), mu,

     $                    work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )

         END IF

*

         temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +

     $          work(iv2tsn+imax-1-1)*b12d(imax)

         b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -

     $                work(iv2tsn+imax-1-1)*b12e(imax-1)

         b12e(imax-1) = temp

         temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +

     $          work(iv2tsn+imax-1-1)*b22d(imax)

         b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -

     $                work(iv2tsn+imax-1-1)*b22e(imax-1)

         b22e(imax-1) = temp

*

*        Update singular vectors

*

         IF( wantu1 ) THEN

            IF( colmajor ) THEN

               CALL dlasr( 'R', 'V', 'F', p, imax-imin+1,

     $                     work(iu1cs+imin-1), work(iu1sn+imin-1),

     $                     u1(1,imin), ldu1 )

            ELSE

               CALL dlasr( 'L', 'V', 'F', imax-imin+1, p,

     $                     work(iu1cs+imin-1), work(iu1sn+imin-1),

     $                     u1(imin,1), ldu1 )

            END IF

         END IF

         IF( wantu2 ) THEN

            IF( colmajor ) THEN

               CALL dlasr( 'R', 'V', 'F', m-p, imax-imin+1,

     $                     work(iu2cs+imin-1), work(iu2sn+imin-1),

     $                     u2(1,imin), ldu2 )

            ELSE

               CALL dlasr( 'L', 'V', 'F', imax-imin+1, m-p,

     $                     work(iu2cs+imin-1), work(iu2sn+imin-1),

     $                     u2(imin,1), ldu2 )

            END IF

         END IF

         IF( wantv1t ) THEN

            IF( colmajor ) THEN

               CALL dlasr( 'L', 'V', 'F', imax-imin+1, q,

     $                     work(iv1tcs+imin-1), work(iv1tsn+imin-1),

     $                     v1t(imin,1), ldv1t )

            ELSE

               CALL dlasr( 'R', 'V', 'F', q, imax-imin+1,

     $                     work(iv1tcs+imin-1), work(iv1tsn+imin-1),

     $                     v1t(1,imin), ldv1t )

            END IF

         END IF

         IF( wantv2t ) THEN

            IF( colmajor ) THEN

               CALL dlasr( 'L', 'V', 'F', imax-imin+1, m-q,

     $                     work(iv2tcs+imin-1), work(iv2tsn+imin-1),

     $                     v2t(imin,1), ldv2t )

            ELSE

               CALL dlasr( 'R', 'V', 'F', m-q, imax-imin+1,

     $                     work(iv2tcs+imin-1), work(iv2tsn+imin-1),

     $                     v2t(1,imin), ldv2t )

            END IF

         END IF

*

*        Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)

*

         IF( b11e(imax-1)+b21e(imax-1) .GT. 0 ) THEN

            b11d(imax) = -b11d(imax)

            b21d(imax) = -b21d(imax)

            IF( wantv1t ) THEN

               IF( colmajor ) THEN

                  CALL dscal( q, negonecomplex, v1t(imax,1), ldv1t )

               ELSE

                  CALL dscal( q, negonecomplex, v1t(1,imax), 1 )

               END IF

            END IF

         END IF

*

*        Compute THETA(IMAX)

*

         x1 = cos(phi(imax-1))*b11d(imax) +

     $        sin(phi(imax-1))*b12e(imax-1)

         y1 = cos(phi(imax-1))*b21d(imax) +

     $        sin(phi(imax-1))*b22e(imax-1)

*

         theta(imax) = atan2( abs(y1), abs(x1) )

*

*        Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),

*        and B22(IMAX,IMAX-1)

*

         IF( b11d(imax)+b12e(imax-1) .LT. 0 ) THEN

            b12d(imax) = -b12d(imax)

            IF( wantu1 ) THEN

               IF( colmajor ) THEN

                  CALL dscal( p, negonecomplex, u1(1,imax), 1 )

               ELSE

                  CALL dscal( p, negonecomplex, u1(imax,1), ldu1 )

               END IF

            END IF

         END IF

         IF( b21d(imax)+b22e(imax-1) .GT. 0 ) THEN

            b22d(imax) = -b22d(imax)

            IF( wantu2 ) THEN

               IF( colmajor ) THEN

                  CALL dscal( m-p, negonecomplex, u2(1,imax), 1 )

               ELSE

                  CALL dscal( m-p, negonecomplex, u2(imax,1), ldu2 )

               END IF

            END IF

         END IF

*

*        Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)

*

         IF( b12d(imax)+b22d(imax) .LT. 0 ) THEN

            IF( wantv2t ) THEN

               IF( colmajor ) THEN

                  CALL dscal( m-q, negonecomplex, v2t(imax,1), ldv2t )

               ELSE

                  CALL dscal( m-q, negonecomplex, v2t(1,imax), 1 )

               END IF

            END IF

         END IF

*

*        Test for negligible sines or cosines

*

         DO i = imin, imax

            IF( theta(i) .LT. thresh ) THEN

               theta(i) = zero

            ELSE IF( theta(i) .GT. piover2-thresh ) THEN

               theta(i) = piover2

            END IF

         END DO

         DO i = imin, imax-1

            IF( phi(i) .LT. thresh ) THEN

               phi(i) = zero

            ELSE IF( phi(i) .GT. piover2-thresh ) THEN

               phi(i) = piover2

            END IF

         END DO

*

*        Deflate

*

         IF (imax .GT. 1) THEN

            DO WHILE( phi(imax-1) .EQ. zero )

               imax = imax - 1

               IF (imax .LE. 1) exit

            END DO

         END IF

         IF( imin .GT. imax - 1 )

     $      imin = imax - 1

         IF (imin .GT. 1) THEN

            DO WHILE (phi(imin-1) .NE. zero)

                imin = imin - 1

                IF (imin .LE. 1) exit

            END DO

         END IF

*

*        Repeat main iteration loop

*

      END DO

*

*     Postprocessing: order THETA from least to greatest

*

      DO i = 1, q

*

         mini = i

         thetamin = theta(i)

         DO j = i+1, q

            IF( theta(j) .LT. thetamin ) THEN

               mini = j

               thetamin = theta(j)

            END IF

         END DO

*

         IF( mini .NE. i ) THEN

            theta(mini) = theta(i)

            theta(i) = thetamin

            IF( colmajor ) THEN

               IF( wantu1 )

     $            CALL dswap( p, u1(1,i), 1, u1(1,mini), 1 )

               IF( wantu2 )

     $            CALL dswap( m-p, u2(1,i), 1, u2(1,mini), 1 )

               IF( wantv1t )

     $            CALL dswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t )

               IF( wantv2t )

     $            CALL dswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),

     $               ldv2t )

            ELSE

               IF( wantu1 )

     $            CALL dswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 )

               IF( wantu2 )

     $            CALL dswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 )

               IF( wantv1t )

     $            CALL dswap( q, v1t(1,i), 1, v1t(1,mini), 1 )

               IF( wantv2t )

     $            CALL dswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 )

            END IF

         END IF

*

      END DO

*

      return

*

*     End of DBBCSD

*

      END