SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
     $                   IWORK, INFO )
*
*  -- LAPACK routine (version 3.1) --
*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
*     November 2006
*
*     .. Scalar Arguments ..
      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
     $                   SMLSIZ
*     ..
*     .. Array Arguments ..
      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
     $                   K( * ), PERM( LDGCOL, * )
      REAL               C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
     $                   GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
     $                   S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
      COMPLEX            B( LDB, * ), BX( LDBX, * )
*     ..
*
*  Purpose
*  =======
*
*  CLALSA is an itermediate step in solving the least squares problem
*  by computing the SVD of the coefficient matrix in compact form (The
*  singular vectors are computed as products of simple orthorgonal
*  matrices.).
*
*  If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector
*  matrix of an upper bidiagonal matrix to the right hand side; and if
*  ICOMPQ = 1, CLALSA applies the right singular vector matrix to the
*  right hand side. The singular vector matrices were generated in
*  compact form by CLALSA.
*
*  Arguments
*  =========
*
*  ICOMPQ (input) INTEGER
*         Specifies whether the left or the right singular vector
*         matrix is involved.
*         = 0: Left singular vector matrix
*         = 1: Right singular vector matrix
*
*  SMLSIZ (input) INTEGER
*         The maximum size of the subproblems at the bottom of the
*         computation tree.
*
*  N      (input) INTEGER
*         The row and column dimensions of the upper bidiagonal matrix.
*
*  NRHS   (input) INTEGER
*         The number of columns of B and BX. NRHS must be at least 1.
*
*  B      (input/output) COMPLEX array, dimension ( LDB, NRHS )
*         On input, B contains the right hand sides of the least
*         squares problem in rows 1 through M.
*         On output, B contains the solution X in rows 1 through N.
*
*  LDB    (input) INTEGER
*         The leading dimension of B in the calling subprogram.
*         LDB must be at least max(1,MAX( M, N ) ).
*
*  BX     (output) COMPLEX array, dimension ( LDBX, NRHS )
*         On exit, the result of applying the left or right singular
*         vector matrix to B.
*
*  LDBX   (input) INTEGER
*         The leading dimension of BX.
*
*  U      (input) REAL array, dimension ( LDU, SMLSIZ ).
*         On entry, U contains the left singular vector matrices of all
*         subproblems at the bottom level.
*
*  LDU    (input) INTEGER, LDU = > N.
*         The leading dimension of arrays U, VT, DIFL, DIFR,
*         POLES, GIVNUM, and Z.
*
*  VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
*         On entry, VT' contains the right singular vector matrices of
*         all subproblems at the bottom level.
*
*  K      (input) INTEGER array, dimension ( N ).
*
*  DIFL   (input) REAL array, dimension ( LDU, NLVL ).
*         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
*
*  DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
*         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
*         distances between singular values on the I-th level and
*         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
*         record the normalizing factors of the right singular vectors
*         matrices of subproblems on I-th level.
*
*  Z      (input) REAL array, dimension ( LDU, NLVL ).
*         On entry, Z(1, I) contains the components of the deflation-
*         adjusted updating row vector for subproblems on the I-th
*         level.
*
*  POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
*         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
*         singular values involved in the secular equations on the I-th
*         level.
*
*  GIVPTR (input) INTEGER array, dimension ( N ).
*         On entry, GIVPTR( I ) records the number of Givens
*         rotations performed on the I-th problem on the computation
*         tree.
*
*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
*         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
*         locations of Givens rotations performed on the I-th level on
*         the computation tree.
*
*  LDGCOL (input) INTEGER, LDGCOL = > N.
*         The leading dimension of arrays GIVCOL and PERM.
*
*  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
*         On entry, PERM(*, I) records permutations done on the I-th
*         level of the computation tree.
*
*  GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
*         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
*         values of Givens rotations performed on the I-th level on the
*         computation tree.
*
*  C      (input) REAL array, dimension ( N ).
*         On entry, if the I-th subproblem is not square,
*         C( I ) contains the C-value of a Givens rotation related to
*         the right null space of the I-th subproblem.
*
*  S      (input) REAL array, dimension ( N ).
*         On entry, if the I-th subproblem is not square,
*         S( I ) contains the S-value of a Givens rotation related to
*         the right null space of the I-th subproblem.
*
*  RWORK  (workspace) REAL array, dimension at least
*         max ( N, (SMLSZ+1)*NRHS*3 ).
*
*  IWORK  (workspace) INTEGER array.
*         The dimension must be at least 3 * N
*
*  INFO   (output) INTEGER
*          = 0:  successful exit.
*          < 0:  if INFO = -i, the i-th argument had an illegal value.
*
*  Further Details
*  ===============
*
*  Based on contributions by
*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
*       California at Berkeley, USA
*     Osni Marques, LBNL/NERSC, USA
*
*  =====================================================================
*
*     .. Parameters ..
      REAL               ZERO, ONE
      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
*     ..
*     .. Local Scalars ..
      INTEGER            I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
     $                   JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
*     ..
*     .. External Subroutines ..
      EXTERNAL           CCOPY, CLALS0, SGEMM, SLASDT, XERBLA
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          AIMAG, CMPLX, REAL
*     ..
*     .. Executable Statements ..
*
*     Test the input parameters.
*
      INFO = 0
*
      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
         INFO = -1
      ELSE IF( SMLSIZ.LT.3 ) THEN
         INFO = -2
      ELSE IF( N.LT.SMLSIZ ) THEN
         INFO = -3
      ELSE IF( NRHS.LT.1 ) THEN
         INFO = -4
      ELSE IF( LDB.LT.N ) THEN
         INFO = -6
      ELSE IF( LDBX.LT.N ) THEN
         INFO = -8
      ELSE IF( LDU.LT.N ) THEN
         INFO = -10
      ELSE IF( LDGCOL.LT.N ) THEN
         INFO = -19
      END IF
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'CLALSA', -INFO )
         RETURN
      END IF
*
*     Book-keeping and  setting up the computation tree.
*
      INODE = 1
      NDIML = INODE + N
      NDIMR = NDIML + N
*
      CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
     $             IWORK( NDIMR ), SMLSIZ )
*
*     The following code applies back the left singular vector factors.
*     For applying back the right singular vector factors, go to 170.
*
      IF( ICOMPQ.EQ.1 ) THEN
         GO TO 170
      END IF
*
*     The nodes on the bottom level of the tree were solved
*     by SLASDQ. The corresponding left and right singular vector
*     matrices are in explicit form. First apply back the left
*     singular vector matrices.
*
      NDB1 = ( ND+1 ) / 2
      DO 130 I = NDB1, ND
*
*        IC : center row of each node
*        NL : number of rows of left  subproblem
*        NR : number of rows of right subproblem
*        NLF: starting row of the left   subproblem
*        NRF: starting row of the right  subproblem
*
         I1 = I - 1
         IC = IWORK( INODE+I1 )
         NL = IWORK( NDIML+I1 )
         NR = IWORK( NDIMR+I1 )
         NLF = IC - NL
         NRF = IC + 1
*
*        Since B and BX are complex, the following call to SGEMM
*        is performed in two steps (real and imaginary parts).
*
*        CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
*     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
*
         J = NL*NRHS*2
         DO 20 JCOL = 1, NRHS
            DO 10 JROW = NLF, NLF + NL - 1
               J = J + 1
               RWORK( J ) = REAL( B( JROW, JCOL ) )
   10       CONTINUE
   20    CONTINUE
         CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
         J = NL*NRHS*2
         DO 40 JCOL = 1, NRHS
            DO 30 JROW = NLF, NLF + NL - 1
               J = J + 1
               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
   30       CONTINUE
   40    CONTINUE
         CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
     $               NL )
         JREAL = 0
         JIMAG = NL*NRHS
         DO 60 JCOL = 1, NRHS
            DO 50 JROW = NLF, NLF + NL - 1
               JREAL = JREAL + 1
               JIMAG = JIMAG + 1
               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
     $                            RWORK( JIMAG ) )
   50       CONTINUE
   60    CONTINUE
*
*        Since B and BX are complex, the following call to SGEMM
*        is performed in two steps (real and imaginary parts).
*
*        CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
*
         J = NR*NRHS*2
         DO 80 JCOL = 1, NRHS
            DO 70 JROW = NRF, NRF + NR - 1
               J = J + 1
               RWORK( J ) = REAL( B( JROW, JCOL ) )
   70       CONTINUE
   80    CONTINUE
         CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
         J = NR*NRHS*2
         DO 100 JCOL = 1, NRHS
            DO 90 JROW = NRF, NRF + NR - 1
               J = J + 1
               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
   90       CONTINUE
  100    CONTINUE
         CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
     $               NR )
         JREAL = 0
         JIMAG = NR*NRHS
         DO 120 JCOL = 1, NRHS
            DO 110 JROW = NRF, NRF + NR - 1
               JREAL = JREAL + 1
               JIMAG = JIMAG + 1
               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
     $                            RWORK( JIMAG ) )
  110       CONTINUE
  120    CONTINUE
*
  130 CONTINUE
*
*     Next copy the rows of B that correspond to unchanged rows
*     in the bidiagonal matrix to BX.
*
      DO 140 I = 1, ND
         IC = IWORK( INODE+I-1 )
         CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
  140 CONTINUE
*
*     Finally go through the left singular vector matrices of all
*     the other subproblems bottom-up on the tree.
*
      J = 2**NLVL
      SQRE = 0
*
      DO 160 LVL = NLVL, 1, -1
         LVL2 = 2*LVL - 1
*
*        find the first node LF and last node LL on
*        the current level LVL
*
         IF( LVL.EQ.1 ) THEN
            LF = 1
            LL = 1
         ELSE
            LF = 2**( LVL-1 )
            LL = 2*LF - 1
         END IF
         DO 150 I = LF, LL
            IM1 = I - 1
            IC = IWORK( INODE+IM1 )
            NL = IWORK( NDIML+IM1 )
            NR = IWORK( NDIMR+IM1 )
            NLF = IC - NL
            NRF = IC + 1
            J = J - 1
            CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
     $                   INFO )
  150    CONTINUE
  160 CONTINUE
      GO TO 330
*
*     ICOMPQ = 1: applying back the right singular vector factors.
*
  170 CONTINUE
*
*     First now go through the right singular vector matrices of all
*     the tree nodes top-down.
*
      J = 0
      DO 190 LVL = 1, NLVL
         LVL2 = 2*LVL - 1
*
*        Find the first node LF and last node LL on
*        the current level LVL.
*
         IF( LVL.EQ.1 ) THEN
            LF = 1
            LL = 1
         ELSE
            LF = 2**( LVL-1 )
            LL = 2*LF - 1
         END IF
         DO 180 I = LL, LF, -1
            IM1 = I - 1
            IC = IWORK( INODE+IM1 )
            NL = IWORK( NDIML+IM1 )
            NR = IWORK( NDIMR+IM1 )
            NLF = IC - NL
            NRF = IC + 1
            IF( I.EQ.LL ) THEN
               SQRE = 0
            ELSE
               SQRE = 1
            END IF
            J = J + 1
            CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
     $                   INFO )
  180    CONTINUE
  190 CONTINUE
*
*     The nodes on the bottom level of the tree were solved
*     by SLASDQ. The corresponding right singular vector
*     matrices are in explicit form. Apply them back.
*
      NDB1 = ( ND+1 ) / 2
      DO 320 I = NDB1, ND
         I1 = I - 1
         IC = IWORK( INODE+I1 )
         NL = IWORK( NDIML+I1 )
         NR = IWORK( NDIMR+I1 )
         NLP1 = NL + 1
         IF( I.EQ.ND ) THEN
            NRP1 = NR
         ELSE
            NRP1 = NR + 1
         END IF
         NLF = IC - NL
         NRF = IC + 1
*
*        Since B and BX are complex, the following call to SGEMM is
*        performed in two steps (real and imaginary parts).
*
*        CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
*    $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
*
         J = NLP1*NRHS*2
         DO 210 JCOL = 1, NRHS
            DO 200 JROW = NLF, NLF + NLP1 - 1
               J = J + 1
               RWORK( J ) = REAL( B( JROW, JCOL ) )
  200       CONTINUE
  210    CONTINUE
         CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
     $               NLP1 )
         J = NLP1*NRHS*2
         DO 230 JCOL = 1, NRHS
            DO 220 JROW = NLF, NLF + NLP1 - 1
               J = J + 1
               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
  220       CONTINUE
  230    CONTINUE
         CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
     $               RWORK( 1+NLP1*NRHS ), NLP1 )
         JREAL = 0
         JIMAG = NLP1*NRHS
         DO 250 JCOL = 1, NRHS
            DO 240 JROW = NLF, NLF + NLP1 - 1
               JREAL = JREAL + 1
               JIMAG = JIMAG + 1
               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
     $                            RWORK( JIMAG ) )
  240       CONTINUE
  250    CONTINUE
*
*        Since B and BX are complex, the following call to SGEMM is
*        performed in two steps (real and imaginary parts).
*
*        CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
*
         J = NRP1*NRHS*2
         DO 270 JCOL = 1, NRHS
            DO 260 JROW = NRF, NRF + NRP1 - 1
               J = J + 1
               RWORK( J ) = REAL( B( JROW, JCOL ) )
  260       CONTINUE
  270    CONTINUE
         CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
     $               NRP1 )
         J = NRP1*NRHS*2
         DO 290 JCOL = 1, NRHS
            DO 280 JROW = NRF, NRF + NRP1 - 1
               J = J + 1
               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
  280       CONTINUE
  290    CONTINUE
         CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
     $               RWORK( 1+NRP1*NRHS ), NRP1 )
         JREAL = 0
         JIMAG = NRP1*NRHS
         DO 310 JCOL = 1, NRHS
            DO 300 JROW = NRF, NRF + NRP1 - 1
               JREAL = JREAL + 1
               JIMAG = JIMAG + 1
               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
     $                            RWORK( JIMAG ) )
  300       CONTINUE
  310    CONTINUE
*
  320 CONTINUE
*
  330 CONTINUE
*
      RETURN
*
*     End of CLALSA
*
      END