LAPACK  3.8.0
LAPACK: Linear Algebra PACKage

◆ sbbcsd()

subroutine sbbcsd ( character  JOBU1,
character  JOBU2,
character  JOBV1T,
character  JOBV2T,
character  TRANS,
integer  M,
integer  P,
integer  Q,
real, dimension( * )  THETA,
real, dimension( * )  PHI,
real, dimension( ldu1, * )  U1,
integer  LDU1,
real, dimension( ldu2, * )  U2,
integer  LDU2,
real, dimension( ldv1t, * )  V1T,
integer  LDV1T,
real, dimension( ldv2t, * )  V2T,
integer  LDV2T,
real, dimension( * )  B11D,
real, dimension( * )  B11E,
real, dimension( * )  B12D,
real, dimension( * )  B12E,
real, dimension( * )  B21D,
real, dimension( * )  B21E,
real, dimension( * )  B22D,
real, dimension( * )  B22E,
real, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

SBBCSD

Download SBBCSD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SBBCSD computes the CS decomposition of an orthogonal matrix in
 bidiagonal-block form,


     [ B11 | B12 0  0 ]
     [  0  |  0 -I  0 ]
 X = [----------------]
     [ B21 | B22 0  0 ]
     [  0  |  0  0  I ]

                               [  C | -S  0  0 ]
                   [ U1 |    ] [  0 |  0 -I  0 ] [ V1 |    ]**T
                 = [---------] [---------------] [---------]   .
                   [    | U2 ] [  S |  C  0  0 ] [    | V2 ]
                               [  0 |  0  0  I ]

 X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
 than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
 transposed and/or permuted. This can be done in constant time using
 the TRANS and SIGNS options. See SORCSD for details.)

 The bidiagonal matrices B11, B12, B21, and B22 are represented
 implicitly by angles THETA(1:Q) and PHI(1:Q-1).

 The orthogonal matrices U1, U2, V1T, and V2T are input/output.
 The input matrices are pre- or post-multiplied by the appropriate
 singular vector matrices.
Parameters
[in]JOBU1
          JOBU1 is CHARACTER
          = 'Y':      U1 is updated;
          otherwise:  U1 is not updated.
[in]JOBU2
          JOBU2 is CHARACTER
          = 'Y':      U2 is updated;
          otherwise:  U2 is not updated.
[in]JOBV1T
          JOBV1T is CHARACTER
          = 'Y':      V1T is updated;
          otherwise:  V1T is not updated.
[in]JOBV2T
          JOBV2T is CHARACTER
          = 'Y':      V2T is updated;
          otherwise:  V2T is not updated.
[in]TRANS
          TRANS is CHARACTER
          = 'T':      X, U1, U2, V1T, and V2T are stored in row-major
                      order;
          otherwise:  X, U1, U2, V1T, and V2T are stored in column-
                      major order.
[in]M
          M is INTEGER
          The number of rows and columns in X, the orthogonal matrix in
          bidiagonal-block form.
[in]P
          P is INTEGER
          The number of rows in the top-left block of X. 0 <= P <= M.
[in]Q
          Q is INTEGER
          The number of columns in the top-left block of X.
          0 <= Q <= MIN(P,M-P,M-Q).
[in,out]THETA
          THETA is REAL array, dimension (Q)
          On entry, the angles THETA(1),...,THETA(Q) that, along with
          PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
          form. On exit, the angles whose cosines and sines define the
          diagonal blocks in the CS decomposition.
[in,out]PHI
          PHI is REAL array, dimension (Q-1)
          The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
          THETA(Q), define the matrix in bidiagonal-block form.
[in,out]U1
          U1 is REAL array, dimension (LDU1,P)
          On entry, a P-by-P matrix. On exit, U1 is postmultiplied
          by the left singular vector matrix common to [ B11 ; 0 ] and
          [ B12 0 0 ; 0 -I 0 0 ].
[in]LDU1
          LDU1 is INTEGER
          The leading dimension of the array U1, LDU1 >= MAX(1,P).
[in,out]U2
          U2 is REAL array, dimension (LDU2,M-P)
          On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is
          postmultiplied by the left singular vector matrix common to
          [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
[in]LDU2
          LDU2 is INTEGER
          The leading dimension of the array U2, LDU2 >= MAX(1,M-P).
[in,out]V1T
          V1T is REAL array, dimension (LDV1T,Q)
          On entry, a Q-by-Q matrix. On exit, V1T is premultiplied
          by the transpose of the right singular vector
          matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
[in]LDV1T
          LDV1T is INTEGER
          The leading dimension of the array V1T, LDV1T >= MAX(1,Q).
[in,out]V2T
          V2T is REAL array, dimension (LDV2T,M-Q)
          On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is
          premultiplied by the transpose of the right
          singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
          [ B22 0 0 ; 0 0 I ].
[in]LDV2T
          LDV2T is INTEGER
          The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q).
[out]B11D
          B11D is REAL array, dimension (Q)
          When SBBCSD converges, B11D contains the cosines of THETA(1),
          ..., THETA(Q). If SBBCSD fails to converge, then B11D
          contains the diagonal of the partially reduced top-left
          block.
[out]B11E
          B11E is REAL array, dimension (Q-1)
          When SBBCSD converges, B11E contains zeros. If SBBCSD fails
          to converge, then B11E contains the superdiagonal of the
          partially reduced top-left block.
[out]B12D
          B12D is REAL array, dimension (Q)
          When SBBCSD converges, B12D contains the negative sines of
          THETA(1), ..., THETA(Q). If SBBCSD fails to converge, then
          B12D contains the diagonal of the partially reduced top-right
          block.
[out]B12E
          B12E is REAL array, dimension (Q-1)
          When SBBCSD converges, B12E contains zeros. If SBBCSD fails
          to converge, then B12E contains the subdiagonal of the
          partially reduced top-right block.
[out]B21D
          B21D is REAL array, dimension (Q)
          When SBBCSD converges, B21D contains the negative sines of
          THETA(1), ..., THETA(Q). If SBBCSD fails to converge, then
          B21D contains the diagonal of the partially reduced bottom-left
          block.
[out]B21E
          B21E is REAL array, dimension (Q-1)
          When SBBCSD converges, B21E contains zeros. If SBBCSD fails
          to converge, then B21E contains the subdiagonal of the
          partially reduced bottom-left block.
[out]B22D
          B22D is REAL array, dimension (Q)
          When SBBCSD converges, B22D contains the negative sines of
          THETA(1), ..., THETA(Q). If SBBCSD fails to converge, then
          B22D contains the diagonal of the partially reduced bottom-right
          block.
[out]B22E
          B22E is REAL array, dimension (Q-1)
          When SBBCSD converges, B22E contains zeros. If SBBCSD fails
          to converge, then B22E contains the subdiagonal of the
          partially reduced bottom-right block.
[out]WORK
          WORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK. LWORK >= MAX(1,8*Q).

          If LWORK = -1, then a workspace query is assumed; the
          routine only calculates the optimal size of the WORK array,
          returns this value as the first entry of the work array, and
          no error message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  if SBBCSD did not converge, INFO specifies the number
                of nonzero entries in PHI, and B11D, B11E, etc.,
                contain the partially reduced matrix.
Internal Parameters:
  TOLMUL  REAL, default = MAX(10,MIN(100,EPS**(-1/8)))
          TOLMUL controls the convergence criterion of the QR loop.
          Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
          are within TOLMUL*EPS of either bound.
References:
[1] Brian D. Sutton. Computing the complete CS decomposition. Numer. Algorithms, 50(1):33-65, 2009.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
June 2016

Definition at line 334 of file sbbcsd.f.

334 *
335 * -- LAPACK computational routine (version 3.7.1) --
336 * -- LAPACK is a software package provided by Univ. of Tennessee, --
337 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
338 * June 2016
339 *
340 * .. Scalar Arguments ..
341  CHARACTER jobu1, jobu2, jobv1t, jobv2t, trans
342  INTEGER info, ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q
343 * ..
344 * .. Array Arguments ..
345  REAL b11d( * ), b11e( * ), b12d( * ), b12e( * ),
346  $ b21d( * ), b21e( * ), b22d( * ), b22e( * ),
347  $ phi( * ), theta( * ), work( * )
348  REAL u1( ldu1, * ), u2( ldu2, * ), v1t( ldv1t, * ),
349  $ v2t( ldv2t, * )
350 * ..
351 *
352 * ===================================================================
353 *
354 * .. Parameters ..
355  INTEGER maxitr
356  parameter( maxitr = 6 )
357  REAL hundred, meighth, one, piover2, ten, zero
358  parameter( hundred = 100.0e0, meighth = -0.125e0,
359  $ one = 1.0e0, piover2 = 1.57079632679489662e0,
360  $ ten = 10.0e0, zero = 0.0e0 )
361  REAL negone
362  parameter( negone = -1.0e0 )
363 * ..
364 * .. Local Scalars ..
365  LOGICAL colmajor, lquery, restart11, restart12,
366  $ restart21, restart22, wantu1, wantu2, wantv1t,
367  $ wantv2t
368  INTEGER i, imin, imax, iter, iu1cs, iu1sn, iu2cs,
369  $ iu2sn, iv1tcs, iv1tsn, iv2tcs, iv2tsn, j,
370  $ lworkmin, lworkopt, maxit, mini
371  REAL b11bulge, b12bulge, b21bulge, b22bulge, dummy,
372  $ eps, mu, nu, r, sigma11, sigma21,
373  $ temp, thetamax, thetamin, thresh, tol, tolmul,
374  $ unfl, x1, x2, y1, y2
375 *
376 * .. External Subroutines ..
377  EXTERNAL slasr, sscal, sswap, slartgp, slartgs, slas2,
378  $ xerbla
379 * ..
380 * .. External Functions ..
381  REAL slamch
382  LOGICAL lsame
383  EXTERNAL lsame, slamch
384 * ..
385 * .. Intrinsic Functions ..
386  INTRINSIC abs, atan2, cos, max, min, sin, sqrt
387 * ..
388 * .. Executable Statements ..
389 *
390 * Test input arguments
391 *
392  info = 0
393  lquery = lwork .EQ. -1
394  wantu1 = lsame( jobu1, 'Y' )
395  wantu2 = lsame( jobu2, 'Y' )
396  wantv1t = lsame( jobv1t, 'Y' )
397  wantv2t = lsame( jobv2t, 'Y' )
398  colmajor = .NOT. lsame( trans, 'T' )
399 *
400  IF( m .LT. 0 ) THEN
401  info = -6
402  ELSE IF( p .LT. 0 .OR. p .GT. m ) THEN
403  info = -7
404  ELSE IF( q .LT. 0 .OR. q .GT. m ) THEN
405  info = -8
406  ELSE IF( q .GT. p .OR. q .GT. m-p .OR. q .GT. m-q ) THEN
407  info = -8
408  ELSE IF( wantu1 .AND. ldu1 .LT. p ) THEN
409  info = -12
410  ELSE IF( wantu2 .AND. ldu2 .LT. m-p ) THEN
411  info = -14
412  ELSE IF( wantv1t .AND. ldv1t .LT. q ) THEN
413  info = -16
414  ELSE IF( wantv2t .AND. ldv2t .LT. m-q ) THEN
415  info = -18
416  END IF
417 *
418 * Quick return if Q = 0
419 *
420  IF( info .EQ. 0 .AND. q .EQ. 0 ) THEN
421  lworkmin = 1
422  work(1) = lworkmin
423  RETURN
424  END IF
425 *
426 * Compute workspace
427 *
428  IF( info .EQ. 0 ) THEN
429  iu1cs = 1
430  iu1sn = iu1cs + q
431  iu2cs = iu1sn + q
432  iu2sn = iu2cs + q
433  iv1tcs = iu2sn + q
434  iv1tsn = iv1tcs + q
435  iv2tcs = iv1tsn + q
436  iv2tsn = iv2tcs + q
437  lworkopt = iv2tsn + q - 1
438  lworkmin = lworkopt
439  work(1) = lworkopt
440  IF( lwork .LT. lworkmin .AND. .NOT. lquery ) THEN
441  info = -28
442  END IF
443  END IF
444 *
445  IF( info .NE. 0 ) THEN
446  CALL xerbla( 'SBBCSD', -info )
447  RETURN
448  ELSE IF( lquery ) THEN
449  RETURN
450  END IF
451 *
452 * Get machine constants
453 *
454  eps = slamch( 'Epsilon' )
455  unfl = slamch( 'Safe minimum' )
456  tolmul = max( ten, min( hundred, eps**meighth ) )
457  tol = tolmul*eps
458  thresh = max( tol, maxitr*q*q*unfl )
459 *
460 * Test for negligible sines or cosines
461 *
462  DO i = 1, q
463  IF( theta(i) .LT. thresh ) THEN
464  theta(i) = zero
465  ELSE IF( theta(i) .GT. piover2-thresh ) THEN
466  theta(i) = piover2
467  END IF
468  END DO
469  DO i = 1, q-1
470  IF( phi(i) .LT. thresh ) THEN
471  phi(i) = zero
472  ELSE IF( phi(i) .GT. piover2-thresh ) THEN
473  phi(i) = piover2
474  END IF
475  END DO
476 *
477 * Initial deflation
478 *
479  imax = q
480  DO WHILE( imax .GT. 1 )
481  IF( phi(imax-1) .NE. zero ) THEN
482  EXIT
483  END IF
484  imax = imax - 1
485  END DO
486  imin = imax - 1
487  IF ( imin .GT. 1 ) THEN
488  DO WHILE( phi(imin-1) .NE. zero )
489  imin = imin - 1
490  IF ( imin .LE. 1 ) EXIT
491  END DO
492  END IF
493 *
494 * Initialize iteration counter
495 *
496  maxit = maxitr*q*q
497  iter = 0
498 *
499 * Begin main iteration loop
500 *
501  DO WHILE( imax .GT. 1 )
502 *
503 * Compute the matrix entries
504 *
505  b11d(imin) = cos( theta(imin) )
506  b21d(imin) = -sin( theta(imin) )
507  DO i = imin, imax - 1
508  b11e(i) = -sin( theta(i) ) * sin( phi(i) )
509  b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) )
510  b12d(i) = sin( theta(i) ) * cos( phi(i) )
511  b12e(i) = cos( theta(i+1) ) * sin( phi(i) )
512  b21e(i) = -cos( theta(i) ) * sin( phi(i) )
513  b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) )
514  b22d(i) = cos( theta(i) ) * cos( phi(i) )
515  b22e(i) = -sin( theta(i+1) ) * sin( phi(i) )
516  END DO
517  b12d(imax) = sin( theta(imax) )
518  b22d(imax) = cos( theta(imax) )
519 *
520 * Abort if not converging; otherwise, increment ITER
521 *
522  IF( iter .GT. maxit ) THEN
523  info = 0
524  DO i = 1, q
525  IF( phi(i) .NE. zero )
526  $ info = info + 1
527  END DO
528  RETURN
529  END IF
530 *
531  iter = iter + imax - imin
532 *
533 * Compute shifts
534 *
535  thetamax = theta(imin)
536  thetamin = theta(imin)
537  DO i = imin+1, imax
538  IF( theta(i) > thetamax )
539  $ thetamax = theta(i)
540  IF( theta(i) < thetamin )
541  $ thetamin = theta(i)
542  END DO
543 *
544  IF( thetamax .GT. piover2 - thresh ) THEN
545 *
546 * Zero on diagonals of B11 and B22; induce deflation with a
547 * zero shift
548 *
549  mu = zero
550  nu = one
551 *
552  ELSE IF( thetamin .LT. thresh ) THEN
553 *
554 * Zero on diagonals of B12 and B22; induce deflation with a
555 * zero shift
556 *
557  mu = one
558  nu = zero
559 *
560  ELSE
561 *
562 * Compute shifts for B11 and B21 and use the lesser
563 *
564  CALL slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,
565  $ dummy )
566  CALL slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,
567  $ dummy )
568 *
569  IF( sigma11 .LE. sigma21 ) THEN
570  mu = sigma11
571  nu = sqrt( one - mu**2 )
572  IF( mu .LT. thresh ) THEN
573  mu = zero
574  nu = one
575  END IF
576  ELSE
577  nu = sigma21
578  mu = sqrt( 1.0 - nu**2 )
579  IF( nu .LT. thresh ) THEN
580  mu = one
581  nu = zero
582  END IF
583  END IF
584  END IF
585 *
586 * Rotate to produce bulges in B11 and B21
587 *
588  IF( mu .LE. nu ) THEN
589  CALL slartgs( b11d(imin), b11e(imin), mu,
590  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1) )
591  ELSE
592  CALL slartgs( b21d(imin), b21e(imin), nu,
593  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1) )
594  END IF
595 *
596  temp = work(iv1tcs+imin-1)*b11d(imin) +
597  $ work(iv1tsn+imin-1)*b11e(imin)
598  b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -
599  $ work(iv1tsn+imin-1)*b11d(imin)
600  b11d(imin) = temp
601  b11bulge = work(iv1tsn+imin-1)*b11d(imin+1)
602  b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1)
603  temp = work(iv1tcs+imin-1)*b21d(imin) +
604  $ work(iv1tsn+imin-1)*b21e(imin)
605  b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -
606  $ work(iv1tsn+imin-1)*b21d(imin)
607  b21d(imin) = temp
608  b21bulge = work(iv1tsn+imin-1)*b21d(imin+1)
609  b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1)
610 *
611 * Compute THETA(IMIN)
612 *
613  theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),
614  $ sqrt( b11d(imin)**2+b11bulge**2 ) )
615 *
616 * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
617 *
618  IF( b11d(imin)**2+b11bulge**2 .GT. thresh**2 ) THEN
619  CALL slartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),
620  $ work(iu1cs+imin-1), r )
621  ELSE IF( mu .LE. nu ) THEN
622  CALL slartgs( b11e( imin ), b11d( imin + 1 ), mu,
623  $ work(iu1cs+imin-1), work(iu1sn+imin-1) )
624  ELSE
625  CALL slartgs( b12d( imin ), b12e( imin ), nu,
626  $ work(iu1cs+imin-1), work(iu1sn+imin-1) )
627  END IF
628  IF( b21d(imin)**2+b21bulge**2 .GT. thresh**2 ) THEN
629  CALL slartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),
630  $ work(iu2cs+imin-1), r )
631  ELSE IF( nu .LT. mu ) THEN
632  CALL slartgs( b21e( imin ), b21d( imin + 1 ), nu,
633  $ work(iu2cs+imin-1), work(iu2sn+imin-1) )
634  ELSE
635  CALL slartgs( b22d(imin), b22e(imin), mu,
636  $ work(iu2cs+imin-1), work(iu2sn+imin-1) )
637  END IF
638  work(iu2cs+imin-1) = -work(iu2cs+imin-1)
639  work(iu2sn+imin-1) = -work(iu2sn+imin-1)
640 *
641  temp = work(iu1cs+imin-1)*b11e(imin) +
642  $ work(iu1sn+imin-1)*b11d(imin+1)
643  b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -
644  $ work(iu1sn+imin-1)*b11e(imin)
645  b11e(imin) = temp
646  IF( imax .GT. imin+1 ) THEN
647  b11bulge = work(iu1sn+imin-1)*b11e(imin+1)
648  b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1)
649  END IF
650  temp = work(iu1cs+imin-1)*b12d(imin) +
651  $ work(iu1sn+imin-1)*b12e(imin)
652  b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -
653  $ work(iu1sn+imin-1)*b12d(imin)
654  b12d(imin) = temp
655  b12bulge = work(iu1sn+imin-1)*b12d(imin+1)
656  b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1)
657  temp = work(iu2cs+imin-1)*b21e(imin) +
658  $ work(iu2sn+imin-1)*b21d(imin+1)
659  b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -
660  $ work(iu2sn+imin-1)*b21e(imin)
661  b21e(imin) = temp
662  IF( imax .GT. imin+1 ) THEN
663  b21bulge = work(iu2sn+imin-1)*b21e(imin+1)
664  b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1)
665  END IF
666  temp = work(iu2cs+imin-1)*b22d(imin) +
667  $ work(iu2sn+imin-1)*b22e(imin)
668  b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -
669  $ work(iu2sn+imin-1)*b22d(imin)
670  b22d(imin) = temp
671  b22bulge = work(iu2sn+imin-1)*b22d(imin+1)
672  b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1)
673 *
674 * Inner loop: chase bulges from B11(IMIN,IMIN+2),
675 * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
676 * bottom-right
677 *
678  DO i = imin+1, imax-1
679 *
680 * Compute PHI(I-1)
681 *
682  x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1)
683  x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge
684  y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1)
685  y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge
686 *
687  phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) )
688 *
689 * Determine if there are bulges to chase or if a new direct
690 * summand has been reached
691 *
692  restart11 = b11e(i-1)**2 + b11bulge**2 .LE. thresh**2
693  restart21 = b21e(i-1)**2 + b21bulge**2 .LE. thresh**2
694  restart12 = b12d(i-1)**2 + b12bulge**2 .LE. thresh**2
695  restart22 = b22d(i-1)**2 + b22bulge**2 .LE. thresh**2
696 *
697 * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
698 * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
699 * chasing by applying the original shift again.
700 *
701  IF( .NOT. restart11 .AND. .NOT. restart21 ) THEN
702  CALL slartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),
703  $ r )
704  ELSE IF( .NOT. restart11 .AND. restart21 ) THEN
705  CALL slartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),
706  $ work(iv1tcs+i-1), r )
707  ELSE IF( restart11 .AND. .NOT. restart21 ) THEN
708  CALL slartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),
709  $ work(iv1tcs+i-1), r )
710  ELSE IF( mu .LE. nu ) THEN
711  CALL slartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),
712  $ work(iv1tsn+i-1) )
713  ELSE
714  CALL slartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),
715  $ work(iv1tsn+i-1) )
716  END IF
717  work(iv1tcs+i-1) = -work(iv1tcs+i-1)
718  work(iv1tsn+i-1) = -work(iv1tsn+i-1)
719  IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
720  CALL slartgp( y2, y1, work(iv2tsn+i-1-1),
721  $ work(iv2tcs+i-1-1), r )
722  ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
723  CALL slartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),
724  $ work(iv2tcs+i-1-1), r )
725  ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
726  CALL slartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),
727  $ work(iv2tcs+i-1-1), r )
728  ELSE IF( nu .LT. mu ) THEN
729  CALL slartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),
730  $ work(iv2tsn+i-1-1) )
731  ELSE
732  CALL slartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),
733  $ work(iv2tsn+i-1-1) )
734  END IF
735 *
736  temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i)
737  b11e(i) = work(iv1tcs+i-1)*b11e(i) -
738  $ work(iv1tsn+i-1)*b11d(i)
739  b11d(i) = temp
740  b11bulge = work(iv1tsn+i-1)*b11d(i+1)
741  b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1)
742  temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i)
743  b21e(i) = work(iv1tcs+i-1)*b21e(i) -
744  $ work(iv1tsn+i-1)*b21d(i)
745  b21d(i) = temp
746  b21bulge = work(iv1tsn+i-1)*b21d(i+1)
747  b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1)
748  temp = work(iv2tcs+i-1-1)*b12e(i-1) +
749  $ work(iv2tsn+i-1-1)*b12d(i)
750  b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -
751  $ work(iv2tsn+i-1-1)*b12e(i-1)
752  b12e(i-1) = temp
753  b12bulge = work(iv2tsn+i-1-1)*b12e(i)
754  b12e(i) = work(iv2tcs+i-1-1)*b12e(i)
755  temp = work(iv2tcs+i-1-1)*b22e(i-1) +
756  $ work(iv2tsn+i-1-1)*b22d(i)
757  b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -
758  $ work(iv2tsn+i-1-1)*b22e(i-1)
759  b22e(i-1) = temp
760  b22bulge = work(iv2tsn+i-1-1)*b22e(i)
761  b22e(i) = work(iv2tcs+i-1-1)*b22e(i)
762 *
763 * Compute THETA(I)
764 *
765  x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1)
766  x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge
767  y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1)
768  y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge
769 *
770  theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) )
771 *
772 * Determine if there are bulges to chase or if a new direct
773 * summand has been reached
774 *
775  restart11 = b11d(i)**2 + b11bulge**2 .LE. thresh**2
776  restart12 = b12e(i-1)**2 + b12bulge**2 .LE. thresh**2
777  restart21 = b21d(i)**2 + b21bulge**2 .LE. thresh**2
778  restart22 = b22e(i-1)**2 + b22bulge**2 .LE. thresh**2
779 *
780 * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
781 * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
782 * chasing by applying the original shift again.
783 *
784  IF( .NOT. restart11 .AND. .NOT. restart12 ) THEN
785  CALL slartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),
786  $ r )
787  ELSE IF( .NOT. restart11 .AND. restart12 ) THEN
788  CALL slartgp( b11bulge, b11d(i), work(iu1sn+i-1),
789  $ work(iu1cs+i-1), r )
790  ELSE IF( restart11 .AND. .NOT. restart12 ) THEN
791  CALL slartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),
792  $ work(iu1cs+i-1), r )
793  ELSE IF( mu .LE. nu ) THEN
794  CALL slartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),
795  $ work(iu1sn+i-1) )
796  ELSE
797  CALL slartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),
798  $ work(iu1sn+i-1) )
799  END IF
800  IF( .NOT. restart21 .AND. .NOT. restart22 ) THEN
801  CALL slartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),
802  $ r )
803  ELSE IF( .NOT. restart21 .AND. restart22 ) THEN
804  CALL slartgp( b21bulge, b21d(i), work(iu2sn+i-1),
805  $ work(iu2cs+i-1), r )
806  ELSE IF( restart21 .AND. .NOT. restart22 ) THEN
807  CALL slartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),
808  $ work(iu2cs+i-1), r )
809  ELSE IF( nu .LT. mu ) THEN
810  CALL slartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),
811  $ work(iu2sn+i-1) )
812  ELSE
813  CALL slartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),
814  $ work(iu2sn+i-1) )
815  END IF
816  work(iu2cs+i-1) = -work(iu2cs+i-1)
817  work(iu2sn+i-1) = -work(iu2sn+i-1)
818 *
819  temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1)
820  b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -
821  $ work(iu1sn+i-1)*b11e(i)
822  b11e(i) = temp
823  IF( i .LT. imax - 1 ) THEN
824  b11bulge = work(iu1sn+i-1)*b11e(i+1)
825  b11e(i+1) = work(iu1cs+i-1)*b11e(i+1)
826  END IF
827  temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1)
828  b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -
829  $ work(iu2sn+i-1)*b21e(i)
830  b21e(i) = temp
831  IF( i .LT. imax - 1 ) THEN
832  b21bulge = work(iu2sn+i-1)*b21e(i+1)
833  b21e(i+1) = work(iu2cs+i-1)*b21e(i+1)
834  END IF
835  temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i)
836  b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i)
837  b12d(i) = temp
838  b12bulge = work(iu1sn+i-1)*b12d(i+1)
839  b12d(i+1) = work(iu1cs+i-1)*b12d(i+1)
840  temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i)
841  b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i)
842  b22d(i) = temp
843  b22bulge = work(iu2sn+i-1)*b22d(i+1)
844  b22d(i+1) = work(iu2cs+i-1)*b22d(i+1)
845 *
846  END DO
847 *
848 * Compute PHI(IMAX-1)
849 *
850  x1 = sin(theta(imax-1))*b11e(imax-1) +
851  $ cos(theta(imax-1))*b21e(imax-1)
852  y1 = sin(theta(imax-1))*b12d(imax-1) +
853  $ cos(theta(imax-1))*b22d(imax-1)
854  y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge
855 *
856  phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) )
857 *
858 * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
859 *
860  restart12 = b12d(imax-1)**2 + b12bulge**2 .LE. thresh**2
861  restart22 = b22d(imax-1)**2 + b22bulge**2 .LE. thresh**2
862 *
863  IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
864  CALL slartgp( y2, y1, work(iv2tsn+imax-1-1),
865  $ work(iv2tcs+imax-1-1), r )
866  ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
867  CALL slartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),
868  $ work(iv2tcs+imax-1-1), r )
869  ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
870  CALL slartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),
871  $ work(iv2tcs+imax-1-1), r )
872  ELSE IF( nu .LT. mu ) THEN
873  CALL slartgs( b12e(imax-1), b12d(imax), nu,
874  $ work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )
875  ELSE
876  CALL slartgs( b22e(imax-1), b22d(imax), mu,
877  $ work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )
878  END IF
879 *
880  temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +
881  $ work(iv2tsn+imax-1-1)*b12d(imax)
882  b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -
883  $ work(iv2tsn+imax-1-1)*b12e(imax-1)
884  b12e(imax-1) = temp
885  temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +
886  $ work(iv2tsn+imax-1-1)*b22d(imax)
887  b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -
888  $ work(iv2tsn+imax-1-1)*b22e(imax-1)
889  b22e(imax-1) = temp
890 *
891 * Update singular vectors
892 *
893  IF( wantu1 ) THEN
894  IF( colmajor ) THEN
895  CALL slasr( 'R', 'V', 'F', p, imax-imin+1,
896  $ work(iu1cs+imin-1), work(iu1sn+imin-1),
897  $ u1(1,imin), ldu1 )
898  ELSE
899  CALL slasr( 'L', 'V', 'F', imax-imin+1, p,
900  $ work(iu1cs+imin-1), work(iu1sn+imin-1),
901  $ u1(imin,1), ldu1 )
902  END IF
903  END IF
904  IF( wantu2 ) THEN
905  IF( colmajor ) THEN
906  CALL slasr( 'R', 'V', 'F', m-p, imax-imin+1,
907  $ work(iu2cs+imin-1), work(iu2sn+imin-1),
908  $ u2(1,imin), ldu2 )
909  ELSE
910  CALL slasr( 'L', 'V', 'F', imax-imin+1, m-p,
911  $ work(iu2cs+imin-1), work(iu2sn+imin-1),
912  $ u2(imin,1), ldu2 )
913  END IF
914  END IF
915  IF( wantv1t ) THEN
916  IF( colmajor ) THEN
917  CALL slasr( 'L', 'V', 'F', imax-imin+1, q,
918  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1),
919  $ v1t(imin,1), ldv1t )
920  ELSE
921  CALL slasr( 'R', 'V', 'F', q, imax-imin+1,
922  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1),
923  $ v1t(1,imin), ldv1t )
924  END IF
925  END IF
926  IF( wantv2t ) THEN
927  IF( colmajor ) THEN
928  CALL slasr( 'L', 'V', 'F', imax-imin+1, m-q,
929  $ work(iv2tcs+imin-1), work(iv2tsn+imin-1),
930  $ v2t(imin,1), ldv2t )
931  ELSE
932  CALL slasr( 'R', 'V', 'F', m-q, imax-imin+1,
933  $ work(iv2tcs+imin-1), work(iv2tsn+imin-1),
934  $ v2t(1,imin), ldv2t )
935  END IF
936  END IF
937 *
938 * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
939 *
940  IF( b11e(imax-1)+b21e(imax-1) .GT. 0 ) THEN
941  b11d(imax) = -b11d(imax)
942  b21d(imax) = -b21d(imax)
943  IF( wantv1t ) THEN
944  IF( colmajor ) THEN
945  CALL sscal( q, negone, v1t(imax,1), ldv1t )
946  ELSE
947  CALL sscal( q, negone, v1t(1,imax), 1 )
948  END IF
949  END IF
950  END IF
951 *
952 * Compute THETA(IMAX)
953 *
954  x1 = cos(phi(imax-1))*b11d(imax) +
955  $ sin(phi(imax-1))*b12e(imax-1)
956  y1 = cos(phi(imax-1))*b21d(imax) +
957  $ sin(phi(imax-1))*b22e(imax-1)
958 *
959  theta(imax) = atan2( abs(y1), abs(x1) )
960 *
961 * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
962 * and B22(IMAX,IMAX-1)
963 *
964  IF( b11d(imax)+b12e(imax-1) .LT. 0 ) THEN
965  b12d(imax) = -b12d(imax)
966  IF( wantu1 ) THEN
967  IF( colmajor ) THEN
968  CALL sscal( p, negone, u1(1,imax), 1 )
969  ELSE
970  CALL sscal( p, negone, u1(imax,1), ldu1 )
971  END IF
972  END IF
973  END IF
974  IF( b21d(imax)+b22e(imax-1) .GT. 0 ) THEN
975  b22d(imax) = -b22d(imax)
976  IF( wantu2 ) THEN
977  IF( colmajor ) THEN
978  CALL sscal( m-p, negone, u2(1,imax), 1 )
979  ELSE
980  CALL sscal( m-p, negone, u2(imax,1), ldu2 )
981  END IF
982  END IF
983  END IF
984 *
985 * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
986 *
987  IF( b12d(imax)+b22d(imax) .LT. 0 ) THEN
988  IF( wantv2t ) THEN
989  IF( colmajor ) THEN
990  CALL sscal( m-q, negone, v2t(imax,1), ldv2t )
991  ELSE
992  CALL sscal( m-q, negone, v2t(1,imax), 1 )
993  END IF
994  END IF
995  END IF
996 *
997 * Test for negligible sines or cosines
998 *
999  DO i = imin, imax
1000  IF( theta(i) .LT. thresh ) THEN
1001  theta(i) = zero
1002  ELSE IF( theta(i) .GT. piover2-thresh ) THEN
1003  theta(i) = piover2
1004  END IF
1005  END DO
1006  DO i = imin, imax-1
1007  IF( phi(i) .LT. thresh ) THEN
1008  phi(i) = zero
1009  ELSE IF( phi(i) .GT. piover2-thresh ) THEN
1010  phi(i) = piover2
1011  END IF
1012  END DO
1013 *
1014 * Deflate
1015 *
1016  IF (imax .GT. 1) THEN
1017  DO WHILE( phi(imax-1) .EQ. zero )
1018  imax = imax - 1
1019  IF (imax .LE. 1) EXIT
1020  END DO
1021  END IF
1022  IF( imin .GT. imax - 1 )
1023  $ imin = imax - 1
1024  IF (imin .GT. 1) THEN
1025  DO WHILE (phi(imin-1) .NE. zero)
1026  imin = imin - 1
1027  IF (imin .LE. 1) EXIT
1028  END DO
1029  END IF
1030 *
1031 * Repeat main iteration loop
1032 *
1033  END DO
1034 *
1035 * Postprocessing: order THETA from least to greatest
1036 *
1037  DO i = 1, q
1038 *
1039  mini = i
1040  thetamin = theta(i)
1041  DO j = i+1, q
1042  IF( theta(j) .LT. thetamin ) THEN
1043  mini = j
1044  thetamin = theta(j)
1045  END IF
1046  END DO
1047 *
1048  IF( mini .NE. i ) THEN
1049  theta(mini) = theta(i)
1050  theta(i) = thetamin
1051  IF( colmajor ) THEN
1052  IF( wantu1 )
1053  $ CALL sswap( p, u1(1,i), 1, u1(1,mini), 1 )
1054  IF( wantu2 )
1055  $ CALL sswap( m-p, u2(1,i), 1, u2(1,mini), 1 )
1056  IF( wantv1t )
1057  $ CALL sswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t )
1058  IF( wantv2t )
1059  $ CALL sswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),
1060  $ ldv2t )
1061  ELSE
1062  IF( wantu1 )
1063  $ CALL sswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 )
1064  IF( wantu2 )
1065  $ CALL sswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 )
1066  IF( wantv1t )
1067  $ CALL sswap( q, v1t(1,i), 1, v1t(1,mini), 1 )
1068  IF( wantv2t )
1069  $ CALL sswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 )
1070  END IF
1071  END IF
1072 *
1073  END DO
1074 *
1075  RETURN
1076 *
1077 * End of SBBCSD
1078 *
subroutine slas2(F, G, H, SSMIN, SSMAX)
SLAS2 computes singular values of a 2-by-2 triangular matrix.
Definition: slas2.f:109
subroutine slasr(SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA)
SLASR applies a sequence of plane rotations to a general rectangular matrix.
Definition: slasr.f:201
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:69
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:81
subroutine slartgp(F, G, CS, SN, R)
SLARTGP generates a plane rotation so that the diagonal is nonnegative.
Definition: slartgp.f:97
subroutine sswap(N, SX, INCX, SY, INCY)
SSWAP
Definition: sswap.f:84
subroutine slartgs(X, Y, SIGMA, CS, SN)
SLARTGS generates a plane rotation designed to introduce a bulge in implicit QR iteration for the bid...
Definition: slartgs.f:92
Here is the call graph for this function:
Here is the caller graph for this function: