LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
sbbcsd.f
Go to the documentation of this file.
1 *> \brief \b SBBCSD
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SBBCSD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sbbcsd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sbbcsd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sbbcsd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
22 * THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
23 * V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
24 * B22D, B22E, WORK, LWORK, INFO )
25 *
26 * .. Scalar Arguments ..
27 * CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
28 * INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q
29 * ..
30 * .. Array Arguments ..
31 * REAL B11D( * ), B11E( * ), B12D( * ), B12E( * ),
32 * $ B21D( * ), B21E( * ), B22D( * ), B22E( * ),
33 * $ PHI( * ), THETA( * ), WORK( * )
34 * REAL U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
35 * $ V2T( LDV2T, * )
36 * ..
37 *
38 *
39 *> \par Purpose:
40 * =============
41 *>
42 *> \verbatim
43 *>
44 *> SBBCSD computes the CS decomposition of an orthogonal matrix in
45 *> bidiagonal-block form,
46 *>
47 *>
48 *> [ B11 | B12 0 0 ]
49 *> [ 0 | 0 -I 0 ]
50 *> X = [----------------]
51 *> [ B21 | B22 0 0 ]
52 *> [ 0 | 0 0 I ]
53 *>
54 *> [ C | -S 0 0 ]
55 *> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T
56 *> = [---------] [---------------] [---------] .
57 *> [ | U2 ] [ S | C 0 0 ] [ | V2 ]
58 *> [ 0 | 0 0 I ]
59 *>
60 *> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
61 *> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
62 *> transposed and/or permuted. This can be done in constant time using
63 *> the TRANS and SIGNS options. See SORCSD for details.)
64 *>
65 *> The bidiagonal matrices B11, B12, B21, and B22 are represented
66 *> implicitly by angles THETA(1:Q) and PHI(1:Q-1).
67 *>
68 *> The orthogonal matrices U1, U2, V1T, and V2T are input/output.
69 *> The input matrices are pre- or post-multiplied by the appropriate
70 *> singular vector matrices.
71 *> \endverbatim
72 *
73 * Arguments:
74 * ==========
75 *
76 *> \param[in] JOBU1
77 *> \verbatim
78 *> JOBU1 is CHARACTER
79 *> = 'Y': U1 is updated;
80 *> otherwise: U1 is not updated.
81 *> \endverbatim
82 *>
83 *> \param[in] JOBU2
84 *> \verbatim
85 *> JOBU2 is CHARACTER
86 *> = 'Y': U2 is updated;
87 *> otherwise: U2 is not updated.
88 *> \endverbatim
89 *>
90 *> \param[in] JOBV1T
91 *> \verbatim
92 *> JOBV1T is CHARACTER
93 *> = 'Y': V1T is updated;
94 *> otherwise: V1T is not updated.
95 *> \endverbatim
96 *>
97 *> \param[in] JOBV2T
98 *> \verbatim
99 *> JOBV2T is CHARACTER
100 *> = 'Y': V2T is updated;
101 *> otherwise: V2T is not updated.
102 *> \endverbatim
103 *>
104 *> \param[in] TRANS
105 *> \verbatim
106 *> TRANS is CHARACTER
107 *> = 'T': X, U1, U2, V1T, and V2T are stored in row-major
108 *> order;
109 *> otherwise: X, U1, U2, V1T, and V2T are stored in column-
110 *> major order.
111 *> \endverbatim
112 *>
113 *> \param[in] M
114 *> \verbatim
115 *> M is INTEGER
116 *> The number of rows and columns in X, the orthogonal matrix in
117 *> bidiagonal-block form.
118 *> \endverbatim
119 *>
120 *> \param[in] P
121 *> \verbatim
122 *> P is INTEGER
123 *> The number of rows in the top-left block of X. 0 <= P <= M.
124 *> \endverbatim
125 *>
126 *> \param[in] Q
127 *> \verbatim
128 *> Q is INTEGER
129 *> The number of columns in the top-left block of X.
130 *> 0 <= Q <= MIN(P,M-P,M-Q).
131 *> \endverbatim
132 *>
133 *> \param[in,out] THETA
134 *> \verbatim
135 *> THETA is REAL array, dimension (Q)
136 *> On entry, the angles THETA(1),...,THETA(Q) that, along with
137 *> PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
138 *> form. On exit, the angles whose cosines and sines define the
139 *> diagonal blocks in the CS decomposition.
140 *> \endverbatim
141 *>
142 *> \param[in,out] PHI
143 *> \verbatim
144 *> PHI is REAL array, dimension (Q-1)
145 *> The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
146 *> THETA(Q), define the matrix in bidiagonal-block form.
147 *> \endverbatim
148 *>
149 *> \param[in,out] U1
150 *> \verbatim
151 *> U1 is REAL array, dimension (LDU1,P)
152 *> On entry, an LDU1-by-P matrix. On exit, U1 is postmultiplied
153 *> by the left singular vector matrix common to [ B11 ; 0 ] and
154 *> [ B12 0 0 ; 0 -I 0 0 ].
155 *> \endverbatim
156 *>
157 *> \param[in] LDU1
158 *> \verbatim
159 *> LDU1 is INTEGER
160 *> The leading dimension of the array U1.
161 *> \endverbatim
162 *>
163 *> \param[in,out] U2
164 *> \verbatim
165 *> U2 is REAL array, dimension (LDU2,M-P)
166 *> On entry, an LDU2-by-(M-P) matrix. On exit, U2 is
167 *> postmultiplied by the left singular vector matrix common to
168 *> [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
169 *> \endverbatim
170 *>
171 *> \param[in] LDU2
172 *> \verbatim
173 *> LDU2 is INTEGER
174 *> The leading dimension of the array U2.
175 *> \endverbatim
176 *>
177 *> \param[in,out] V1T
178 *> \verbatim
179 *> V1T is REAL array, dimension (LDV1T,Q)
180 *> On entry, a LDV1T-by-Q matrix. On exit, V1T is premultiplied
181 *> by the transpose of the right singular vector
182 *> matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
183 *> \endverbatim
184 *>
185 *> \param[in] LDV1T
186 *> \verbatim
187 *> LDV1T is INTEGER
188 *> The leading dimension of the array V1T.
189 *> \endverbatim
190 *>
191 *> \param[in,out] V2T
192 *> \verbatim
193 *> V2T is REAL array, dimenison (LDV2T,M-Q)
194 *> On entry, a LDV2T-by-(M-Q) matrix. On exit, V2T is
195 *> premultiplied by the transpose of the right
196 *> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
197 *> [ B22 0 0 ; 0 0 I ].
198 *> \endverbatim
199 *>
200 *> \param[in] LDV2T
201 *> \verbatim
202 *> LDV2T is INTEGER
203 *> The leading dimension of the array V2T.
204 *> \endverbatim
205 *>
206 *> \param[out] B11D
207 *> \verbatim
208 *> B11D is REAL array, dimension (Q)
209 *> When SBBCSD converges, B11D contains the cosines of THETA(1),
210 *> ..., THETA(Q). If SBBCSD fails to converge, then B11D
211 *> contains the diagonal of the partially reduced top-left
212 *> block.
213 *> \endverbatim
214 *>
215 *> \param[out] B11E
216 *> \verbatim
217 *> B11E is REAL array, dimension (Q-1)
218 *> When SBBCSD converges, B11E contains zeros. If SBBCSD fails
219 *> to converge, then B11E contains the superdiagonal of the
220 *> partially reduced top-left block.
221 *> \endverbatim
222 *>
223 *> \param[out] B12D
224 *> \verbatim
225 *> B12D is REAL array, dimension (Q)
226 *> When SBBCSD converges, B12D contains the negative sines of
227 *> THETA(1), ..., THETA(Q). If SBBCSD fails to converge, then
228 *> B12D contains the diagonal of the partially reduced top-right
229 *> block.
230 *> \endverbatim
231 *>
232 *> \param[out] B12E
233 *> \verbatim
234 *> B12E is REAL array, dimension (Q-1)
235 *> When SBBCSD converges, B12E contains zeros. If SBBCSD fails
236 *> to converge, then B12E contains the subdiagonal of the
237 *> partially reduced top-right block.
238 *> \endverbatim
239 *>
240 *> \param[out] B21D
241 *> \verbatim
242 *> B21D is REAL array, dimension (Q)
243 *> When CBBCSD converges, B21D contains the negative sines of
244 *> THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then
245 *> B21D contains the diagonal of the partially reduced bottom-left
246 *> block.
247 *> \endverbatim
248 *>
249 *> \param[out] B21E
250 *> \verbatim
251 *> B21E is REAL array, dimension (Q-1)
252 *> When CBBCSD converges, B21E contains zeros. If CBBCSD fails
253 *> to converge, then B21E contains the subdiagonal of the
254 *> partially reduced bottom-left block.
255 *> \endverbatim
256 *>
257 *> \param[out] B22D
258 *> \verbatim
259 *> B22D is REAL array, dimension (Q)
260 *> When CBBCSD converges, B22D contains the negative sines of
261 *> THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then
262 *> B22D contains the diagonal of the partially reduced bottom-right
263 *> block.
264 *> \endverbatim
265 *>
266 *> \param[out] B22E
267 *> \verbatim
268 *> B22E is REAL array, dimension (Q-1)
269 *> When CBBCSD converges, B22E contains zeros. If CBBCSD fails
270 *> to converge, then B22E contains the subdiagonal of the
271 *> partially reduced bottom-right block.
272 *> \endverbatim
273 *>
274 *> \param[out] WORK
275 *> \verbatim
276 *> WORK is REAL array, dimension (MAX(1,LWORK))
277 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
278 *> \endverbatim
279 *>
280 *> \param[in] LWORK
281 *> \verbatim
282 *> LWORK is INTEGER
283 *> The dimension of the array WORK. LWORK >= MAX(1,8*Q).
284 *>
285 *> If LWORK = -1, then a workspace query is assumed; the
286 *> routine only calculates the optimal size of the WORK array,
287 *> returns this value as the first entry of the work array, and
288 *> no error message related to LWORK is issued by XERBLA.
289 *> \endverbatim
290 *>
291 *> \param[out] INFO
292 *> \verbatim
293 *> INFO is INTEGER
294 *> = 0: successful exit.
295 *> < 0: if INFO = -i, the i-th argument had an illegal value.
296 *> > 0: if SBBCSD did not converge, INFO specifies the number
297 *> of nonzero entries in PHI, and B11D, B11E, etc.,
298 *> contain the partially reduced matrix.
299 *> \endverbatim
300 *
301 *> \par Internal Parameters:
302 * =========================
303 *>
304 *> \verbatim
305 *> TOLMUL REAL, default = MAX(10,MIN(100,EPS**(-1/8)))
306 *> TOLMUL controls the convergence criterion of the QR loop.
307 *> Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
308 *> are within TOLMUL*EPS of either bound.
309 *> \endverbatim
310 *
311 *> \par References:
312 * ================
313 *>
314 *> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
315 *> Algorithms, 50(1):33-65, 2009.
316 *
317 * Authors:
318 * ========
319 *
320 *> \author Univ. of Tennessee
321 *> \author Univ. of California Berkeley
322 *> \author Univ. of Colorado Denver
323 *> \author NAG Ltd.
324 *
325 *> \date November 2011
326 *
327 *> \ingroup realOTHERcomputational
328 *
329 * =====================================================================
330  SUBROUTINE sbbcsd( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
331  $ theta, phi, u1, ldu1, u2, ldu2, v1t, ldv1t,
332  $ v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,
333  $ b22d, b22e, work, lwork, info )
334 *
335 * -- LAPACK computational routine (version 3.4.0) --
336 * -- LAPACK is a software package provided by Univ. of Tennessee, --
337 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
338 * November 2011
339 *
340 * .. Scalar Arguments ..
341  CHARACTER jobu1, jobu2, jobv1t, jobv2t, trans
342  INTEGER info, ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q
343 * ..
344 * .. Array Arguments ..
345  REAL b11d( * ), b11e( * ), b12d( * ), b12e( * ),
346  $ b21d( * ), b21e( * ), b22d( * ), b22e( * ),
347  $ phi( * ), theta( * ), work( * )
348  REAL u1( ldu1, * ), u2( ldu2, * ), v1t( ldv1t, * ),
349  $ v2t( ldv2t, * )
350 * ..
351 *
352 * ===================================================================
353 *
354 * .. Parameters ..
355  INTEGER maxitr
356  parameter( maxitr = 6 )
357  REAL hundred, meighth, one, piover2, ten, zero
358  parameter( hundred = 100.0e0, meighth = -0.125e0,
359  $ one = 1.0e0, piover2 = 1.57079632679489662e0,
360  $ ten = 10.0e0, zero = 0.0e0 )
361  REAL negonecomplex
362  parameter( negonecomplex = -1.0e0 )
363 * ..
364 * .. Local Scalars ..
365  LOGICAL colmajor, lquery, restart11, restart12,
366  $ restart21, restart22, wantu1, wantu2, wantv1t,
367  $ wantv2t
368  INTEGER i, imin, imax, iter, iu1cs, iu1sn, iu2cs,
369  $ iu2sn, iv1tcs, iv1tsn, iv2tcs, iv2tsn, j,
370  $ lworkmin, lworkopt, maxit, mini
371  REAL b11bulge, b12bulge, b21bulge, b22bulge, dummy,
372  $ eps, mu, nu, r, sigma11, sigma21,
373  $ temp, thetamax, thetamin, thresh, tol, tolmul,
374  $ unfl, x1, x2, y1, y2
375 *
376 * .. External Subroutines ..
377  EXTERNAL slasr, sscal, sswap, slartgp, slartgs, slas2,
378  $ xerbla
379 * ..
380 * .. External Functions ..
381  REAL slamch
382  LOGICAL lsame
383  EXTERNAL lsame, slamch
384 * ..
385 * .. Intrinsic Functions ..
386  INTRINSIC abs, atan2, cos, max, min, sin, sqrt
387 * ..
388 * .. Executable Statements ..
389 *
390 * Test input arguments
391 *
392  info = 0
393  lquery = lwork .EQ. -1
394  wantu1 = lsame( jobu1, 'Y' )
395  wantu2 = lsame( jobu2, 'Y' )
396  wantv1t = lsame( jobv1t, 'Y' )
397  wantv2t = lsame( jobv2t, 'Y' )
398  colmajor = .NOT. lsame( trans, 'T' )
399 *
400  IF( m .LT. 0 ) THEN
401  info = -6
402  ELSE IF( p .LT. 0 .OR. p .GT. m ) THEN
403  info = -7
404  ELSE IF( q .LT. 0 .OR. q .GT. m ) THEN
405  info = -8
406  ELSE IF( q .GT. p .OR. q .GT. m-p .OR. q .GT. m-q ) THEN
407  info = -8
408  ELSE IF( wantu1 .AND. ldu1 .LT. p ) THEN
409  info = -12
410  ELSE IF( wantu2 .AND. ldu2 .LT. m-p ) THEN
411  info = -14
412  ELSE IF( wantv1t .AND. ldv1t .LT. q ) THEN
413  info = -16
414  ELSE IF( wantv2t .AND. ldv2t .LT. m-q ) THEN
415  info = -18
416  END IF
417 *
418 * Quick return if Q = 0
419 *
420  IF( info .EQ. 0 .AND. q .EQ. 0 ) THEN
421  lworkmin = 1
422  work(1) = lworkmin
423  return
424  END IF
425 *
426 * Compute workspace
427 *
428  IF( info .EQ. 0 ) THEN
429  iu1cs = 1
430  iu1sn = iu1cs + q
431  iu2cs = iu1sn + q
432  iu2sn = iu2cs + q
433  iv1tcs = iu2sn + q
434  iv1tsn = iv1tcs + q
435  iv2tcs = iv1tsn + q
436  iv2tsn = iv2tcs + q
437  lworkopt = iv2tsn + q - 1
438  lworkmin = lworkopt
439  work(1) = lworkopt
440  IF( lwork .LT. lworkmin .AND. .NOT. lquery ) THEN
441  info = -28
442  END IF
443  END IF
444 *
445  IF( info .NE. 0 ) THEN
446  CALL xerbla( 'SBBCSD', -info )
447  return
448  ELSE IF( lquery ) THEN
449  return
450  END IF
451 *
452 * Get machine constants
453 *
454  eps = slamch( 'Epsilon' )
455  unfl = slamch( 'Safe minimum' )
456  tolmul = max( ten, min( hundred, eps**meighth ) )
457  tol = tolmul*eps
458  thresh = max( tol, maxitr*q*q*unfl )
459 *
460 * Test for negligible sines or cosines
461 *
462  DO i = 1, q
463  IF( theta(i) .LT. thresh ) THEN
464  theta(i) = zero
465  ELSE IF( theta(i) .GT. piover2-thresh ) THEN
466  theta(i) = piover2
467  END IF
468  END DO
469  DO i = 1, q-1
470  IF( phi(i) .LT. thresh ) THEN
471  phi(i) = zero
472  ELSE IF( phi(i) .GT. piover2-thresh ) THEN
473  phi(i) = piover2
474  END IF
475  END DO
476 *
477 * Initial deflation
478 *
479  imax = q
480  DO WHILE( ( imax .GT. 1 ) .AND. ( phi(imax-1) .EQ. zero ) )
481  imax = imax - 1
482  END DO
483  imin = imax - 1
484  IF ( imin .GT. 1 ) THEN
485  DO WHILE( phi(imin-1) .NE. zero )
486  imin = imin - 1
487  IF ( imin .LE. 1 ) exit
488  END DO
489  END IF
490 *
491 * Initialize iteration counter
492 *
493  maxit = maxitr*q*q
494  iter = 0
495 *
496 * Begin main iteration loop
497 *
498  DO WHILE( imax .GT. 1 )
499 *
500 * Compute the matrix entries
501 *
502  b11d(imin) = cos( theta(imin) )
503  b21d(imin) = -sin( theta(imin) )
504  DO i = imin, imax - 1
505  b11e(i) = -sin( theta(i) ) * sin( phi(i) )
506  b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) )
507  b12d(i) = sin( theta(i) ) * cos( phi(i) )
508  b12e(i) = cos( theta(i+1) ) * sin( phi(i) )
509  b21e(i) = -cos( theta(i) ) * sin( phi(i) )
510  b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) )
511  b22d(i) = cos( theta(i) ) * cos( phi(i) )
512  b22e(i) = -sin( theta(i+1) ) * sin( phi(i) )
513  END DO
514  b12d(imax) = sin( theta(imax) )
515  b22d(imax) = cos( theta(imax) )
516 *
517 * Abort if not converging; otherwise, increment ITER
518 *
519  IF( iter .GT. maxit ) THEN
520  info = 0
521  DO i = 1, q
522  IF( phi(i) .NE. zero )
523  $ info = info + 1
524  END DO
525  return
526  END IF
527 *
528  iter = iter + imax - imin
529 *
530 * Compute shifts
531 *
532  thetamax = theta(imin)
533  thetamin = theta(imin)
534  DO i = imin+1, imax
535  IF( theta(i) > thetamax )
536  $ thetamax = theta(i)
537  IF( theta(i) < thetamin )
538  $ thetamin = theta(i)
539  END DO
540 *
541  IF( thetamax .GT. piover2 - thresh ) THEN
542 *
543 * Zero on diagonals of B11 and B22; induce deflation with a
544 * zero shift
545 *
546  mu = zero
547  nu = one
548 *
549  ELSE IF( thetamin .LT. thresh ) THEN
550 *
551 * Zero on diagonals of B12 and B22; induce deflation with a
552 * zero shift
553 *
554  mu = one
555  nu = zero
556 *
557  ELSE
558 *
559 * Compute shifts for B11 and B21 and use the lesser
560 *
561  CALL slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,
562  $ dummy )
563  CALL slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,
564  $ dummy )
565 *
566  IF( sigma11 .LE. sigma21 ) THEN
567  mu = sigma11
568  nu = sqrt( one - mu**2 )
569  IF( mu .LT. thresh ) THEN
570  mu = zero
571  nu = one
572  END IF
573  ELSE
574  nu = sigma21
575  mu = sqrt( 1.0 - nu**2 )
576  IF( nu .LT. thresh ) THEN
577  mu = one
578  nu = zero
579  END IF
580  END IF
581  END IF
582 *
583 * Rotate to produce bulges in B11 and B21
584 *
585  IF( mu .LE. nu ) THEN
586  CALL slartgs( b11d(imin), b11e(imin), mu,
587  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1) )
588  ELSE
589  CALL slartgs( b21d(imin), b21e(imin), nu,
590  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1) )
591  END IF
592 *
593  temp = work(iv1tcs+imin-1)*b11d(imin) +
594  $ work(iv1tsn+imin-1)*b11e(imin)
595  b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -
596  $ work(iv1tsn+imin-1)*b11d(imin)
597  b11d(imin) = temp
598  b11bulge = work(iv1tsn+imin-1)*b11d(imin+1)
599  b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1)
600  temp = work(iv1tcs+imin-1)*b21d(imin) +
601  $ work(iv1tsn+imin-1)*b21e(imin)
602  b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -
603  $ work(iv1tsn+imin-1)*b21d(imin)
604  b21d(imin) = temp
605  b21bulge = work(iv1tsn+imin-1)*b21d(imin+1)
606  b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1)
607 *
608 * Compute THETA(IMIN)
609 *
610  theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),
611  $ sqrt( b11d(imin)**2+b11bulge**2 ) )
612 *
613 * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
614 *
615  IF( b11d(imin)**2+b11bulge**2 .GT. thresh**2 ) THEN
616  CALL slartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),
617  $ work(iu1cs+imin-1), r )
618  ELSE IF( mu .LE. nu ) THEN
619  CALL slartgs( b11e( imin ), b11d( imin + 1 ), mu,
620  $ work(iu1cs+imin-1), work(iu1sn+imin-1) )
621  ELSE
622  CALL slartgs( b12d( imin ), b12e( imin ), nu,
623  $ work(iu1cs+imin-1), work(iu1sn+imin-1) )
624  END IF
625  IF( b21d(imin)**2+b21bulge**2 .GT. thresh**2 ) THEN
626  CALL slartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),
627  $ work(iu2cs+imin-1), r )
628  ELSE IF( nu .LT. mu ) THEN
629  CALL slartgs( b21e( imin ), b21d( imin + 1 ), nu,
630  $ work(iu2cs+imin-1), work(iu2sn+imin-1) )
631  ELSE
632  CALL slartgs( b22d(imin), b22e(imin), mu,
633  $ work(iu2cs+imin-1), work(iu2sn+imin-1) )
634  END IF
635  work(iu2cs+imin-1) = -work(iu2cs+imin-1)
636  work(iu2sn+imin-1) = -work(iu2sn+imin-1)
637 *
638  temp = work(iu1cs+imin-1)*b11e(imin) +
639  $ work(iu1sn+imin-1)*b11d(imin+1)
640  b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -
641  $ work(iu1sn+imin-1)*b11e(imin)
642  b11e(imin) = temp
643  IF( imax .GT. imin+1 ) THEN
644  b11bulge = work(iu1sn+imin-1)*b11e(imin+1)
645  b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1)
646  END IF
647  temp = work(iu1cs+imin-1)*b12d(imin) +
648  $ work(iu1sn+imin-1)*b12e(imin)
649  b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -
650  $ work(iu1sn+imin-1)*b12d(imin)
651  b12d(imin) = temp
652  b12bulge = work(iu1sn+imin-1)*b12d(imin+1)
653  b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1)
654  temp = work(iu2cs+imin-1)*b21e(imin) +
655  $ work(iu2sn+imin-1)*b21d(imin+1)
656  b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -
657  $ work(iu2sn+imin-1)*b21e(imin)
658  b21e(imin) = temp
659  IF( imax .GT. imin+1 ) THEN
660  b21bulge = work(iu2sn+imin-1)*b21e(imin+1)
661  b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1)
662  END IF
663  temp = work(iu2cs+imin-1)*b22d(imin) +
664  $ work(iu2sn+imin-1)*b22e(imin)
665  b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -
666  $ work(iu2sn+imin-1)*b22d(imin)
667  b22d(imin) = temp
668  b22bulge = work(iu2sn+imin-1)*b22d(imin+1)
669  b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1)
670 *
671 * Inner loop: chase bulges from B11(IMIN,IMIN+2),
672 * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
673 * bottom-right
674 *
675  DO i = imin+1, imax-1
676 *
677 * Compute PHI(I-1)
678 *
679  x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1)
680  x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge
681  y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1)
682  y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge
683 *
684  phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) )
685 *
686 * Determine if there are bulges to chase or if a new direct
687 * summand has been reached
688 *
689  restart11 = b11e(i-1)**2 + b11bulge**2 .LE. thresh**2
690  restart21 = b21e(i-1)**2 + b21bulge**2 .LE. thresh**2
691  restart12 = b12d(i-1)**2 + b12bulge**2 .LE. thresh**2
692  restart22 = b22d(i-1)**2 + b22bulge**2 .LE. thresh**2
693 *
694 * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
695 * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
696 * chasing by applying the original shift again.
697 *
698  IF( .NOT. restart11 .AND. .NOT. restart21 ) THEN
699  CALL slartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),
700  $ r )
701  ELSE IF( .NOT. restart11 .AND. restart21 ) THEN
702  CALL slartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),
703  $ work(iv1tcs+i-1), r )
704  ELSE IF( restart11 .AND. .NOT. restart21 ) THEN
705  CALL slartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),
706  $ work(iv1tcs+i-1), r )
707  ELSE IF( mu .LE. nu ) THEN
708  CALL slartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),
709  $ work(iv1tsn+i-1) )
710  ELSE
711  CALL slartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),
712  $ work(iv1tsn+i-1) )
713  END IF
714  work(iv1tcs+i-1) = -work(iv1tcs+i-1)
715  work(iv1tsn+i-1) = -work(iv1tsn+i-1)
716  IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
717  CALL slartgp( y2, y1, work(iv2tsn+i-1-1),
718  $ work(iv2tcs+i-1-1), r )
719  ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
720  CALL slartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),
721  $ work(iv2tcs+i-1-1), r )
722  ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
723  CALL slartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),
724  $ work(iv2tcs+i-1-1), r )
725  ELSE IF( nu .LT. mu ) THEN
726  CALL slartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),
727  $ work(iv2tsn+i-1-1) )
728  ELSE
729  CALL slartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),
730  $ work(iv2tsn+i-1-1) )
731  END IF
732 *
733  temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i)
734  b11e(i) = work(iv1tcs+i-1)*b11e(i) -
735  $ work(iv1tsn+i-1)*b11d(i)
736  b11d(i) = temp
737  b11bulge = work(iv1tsn+i-1)*b11d(i+1)
738  b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1)
739  temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i)
740  b21e(i) = work(iv1tcs+i-1)*b21e(i) -
741  $ work(iv1tsn+i-1)*b21d(i)
742  b21d(i) = temp
743  b21bulge = work(iv1tsn+i-1)*b21d(i+1)
744  b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1)
745  temp = work(iv2tcs+i-1-1)*b12e(i-1) +
746  $ work(iv2tsn+i-1-1)*b12d(i)
747  b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -
748  $ work(iv2tsn+i-1-1)*b12e(i-1)
749  b12e(i-1) = temp
750  b12bulge = work(iv2tsn+i-1-1)*b12e(i)
751  b12e(i) = work(iv2tcs+i-1-1)*b12e(i)
752  temp = work(iv2tcs+i-1-1)*b22e(i-1) +
753  $ work(iv2tsn+i-1-1)*b22d(i)
754  b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -
755  $ work(iv2tsn+i-1-1)*b22e(i-1)
756  b22e(i-1) = temp
757  b22bulge = work(iv2tsn+i-1-1)*b22e(i)
758  b22e(i) = work(iv2tcs+i-1-1)*b22e(i)
759 *
760 * Compute THETA(I)
761 *
762  x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1)
763  x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge
764  y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1)
765  y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge
766 *
767  theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) )
768 *
769 * Determine if there are bulges to chase or if a new direct
770 * summand has been reached
771 *
772  restart11 = b11d(i)**2 + b11bulge**2 .LE. thresh**2
773  restart12 = b12e(i-1)**2 + b12bulge**2 .LE. thresh**2
774  restart21 = b21d(i)**2 + b21bulge**2 .LE. thresh**2
775  restart22 = b22e(i-1)**2 + b22bulge**2 .LE. thresh**2
776 *
777 * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
778 * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
779 * chasing by applying the original shift again.
780 *
781  IF( .NOT. restart11 .AND. .NOT. restart12 ) THEN
782  CALL slartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),
783  $ r )
784  ELSE IF( .NOT. restart11 .AND. restart12 ) THEN
785  CALL slartgp( b11bulge, b11d(i), work(iu1sn+i-1),
786  $ work(iu1cs+i-1), r )
787  ELSE IF( restart11 .AND. .NOT. restart12 ) THEN
788  CALL slartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),
789  $ work(iu1cs+i-1), r )
790  ELSE IF( mu .LE. nu ) THEN
791  CALL slartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),
792  $ work(iu1sn+i-1) )
793  ELSE
794  CALL slartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),
795  $ work(iu1sn+i-1) )
796  END IF
797  IF( .NOT. restart21 .AND. .NOT. restart22 ) THEN
798  CALL slartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),
799  $ r )
800  ELSE IF( .NOT. restart21 .AND. restart22 ) THEN
801  CALL slartgp( b21bulge, b21d(i), work(iu2sn+i-1),
802  $ work(iu2cs+i-1), r )
803  ELSE IF( restart21 .AND. .NOT. restart22 ) THEN
804  CALL slartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),
805  $ work(iu2cs+i-1), r )
806  ELSE IF( nu .LT. mu ) THEN
807  CALL slartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),
808  $ work(iu2sn+i-1) )
809  ELSE
810  CALL slartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),
811  $ work(iu2sn+i-1) )
812  END IF
813  work(iu2cs+i-1) = -work(iu2cs+i-1)
814  work(iu2sn+i-1) = -work(iu2sn+i-1)
815 *
816  temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1)
817  b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -
818  $ work(iu1sn+i-1)*b11e(i)
819  b11e(i) = temp
820  IF( i .LT. imax - 1 ) THEN
821  b11bulge = work(iu1sn+i-1)*b11e(i+1)
822  b11e(i+1) = work(iu1cs+i-1)*b11e(i+1)
823  END IF
824  temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1)
825  b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -
826  $ work(iu2sn+i-1)*b21e(i)
827  b21e(i) = temp
828  IF( i .LT. imax - 1 ) THEN
829  b21bulge = work(iu2sn+i-1)*b21e(i+1)
830  b21e(i+1) = work(iu2cs+i-1)*b21e(i+1)
831  END IF
832  temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i)
833  b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i)
834  b12d(i) = temp
835  b12bulge = work(iu1sn+i-1)*b12d(i+1)
836  b12d(i+1) = work(iu1cs+i-1)*b12d(i+1)
837  temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i)
838  b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i)
839  b22d(i) = temp
840  b22bulge = work(iu2sn+i-1)*b22d(i+1)
841  b22d(i+1) = work(iu2cs+i-1)*b22d(i+1)
842 *
843  END DO
844 *
845 * Compute PHI(IMAX-1)
846 *
847  x1 = sin(theta(imax-1))*b11e(imax-1) +
848  $ cos(theta(imax-1))*b21e(imax-1)
849  y1 = sin(theta(imax-1))*b12d(imax-1) +
850  $ cos(theta(imax-1))*b22d(imax-1)
851  y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge
852 *
853  phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) )
854 *
855 * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
856 *
857  restart12 = b12d(imax-1)**2 + b12bulge**2 .LE. thresh**2
858  restart22 = b22d(imax-1)**2 + b22bulge**2 .LE. thresh**2
859 *
860  IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
861  CALL slartgp( y2, y1, work(iv2tsn+imax-1-1),
862  $ work(iv2tcs+imax-1-1), r )
863  ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
864  CALL slartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),
865  $ work(iv2tcs+imax-1-1), r )
866  ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
867  CALL slartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),
868  $ work(iv2tcs+imax-1-1), r )
869  ELSE IF( nu .LT. mu ) THEN
870  CALL slartgs( b12e(imax-1), b12d(imax), nu,
871  $ work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )
872  ELSE
873  CALL slartgs( b22e(imax-1), b22d(imax), mu,
874  $ work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )
875  END IF
876 *
877  temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +
878  $ work(iv2tsn+imax-1-1)*b12d(imax)
879  b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -
880  $ work(iv2tsn+imax-1-1)*b12e(imax-1)
881  b12e(imax-1) = temp
882  temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +
883  $ work(iv2tsn+imax-1-1)*b22d(imax)
884  b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -
885  $ work(iv2tsn+imax-1-1)*b22e(imax-1)
886  b22e(imax-1) = temp
887 *
888 * Update singular vectors
889 *
890  IF( wantu1 ) THEN
891  IF( colmajor ) THEN
892  CALL slasr( 'R', 'V', 'F', p, imax-imin+1,
893  $ work(iu1cs+imin-1), work(iu1sn+imin-1),
894  $ u1(1,imin), ldu1 )
895  ELSE
896  CALL slasr( 'L', 'V', 'F', imax-imin+1, p,
897  $ work(iu1cs+imin-1), work(iu1sn+imin-1),
898  $ u1(imin,1), ldu1 )
899  END IF
900  END IF
901  IF( wantu2 ) THEN
902  IF( colmajor ) THEN
903  CALL slasr( 'R', 'V', 'F', m-p, imax-imin+1,
904  $ work(iu2cs+imin-1), work(iu2sn+imin-1),
905  $ u2(1,imin), ldu2 )
906  ELSE
907  CALL slasr( 'L', 'V', 'F', imax-imin+1, m-p,
908  $ work(iu2cs+imin-1), work(iu2sn+imin-1),
909  $ u2(imin,1), ldu2 )
910  END IF
911  END IF
912  IF( wantv1t ) THEN
913  IF( colmajor ) THEN
914  CALL slasr( 'L', 'V', 'F', imax-imin+1, q,
915  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1),
916  $ v1t(imin,1), ldv1t )
917  ELSE
918  CALL slasr( 'R', 'V', 'F', q, imax-imin+1,
919  $ work(iv1tcs+imin-1), work(iv1tsn+imin-1),
920  $ v1t(1,imin), ldv1t )
921  END IF
922  END IF
923  IF( wantv2t ) THEN
924  IF( colmajor ) THEN
925  CALL slasr( 'L', 'V', 'F', imax-imin+1, m-q,
926  $ work(iv2tcs+imin-1), work(iv2tsn+imin-1),
927  $ v2t(imin,1), ldv2t )
928  ELSE
929  CALL slasr( 'R', 'V', 'F', m-q, imax-imin+1,
930  $ work(iv2tcs+imin-1), work(iv2tsn+imin-1),
931  $ v2t(1,imin), ldv2t )
932  END IF
933  END IF
934 *
935 * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
936 *
937  IF( b11e(imax-1)+b21e(imax-1) .GT. 0 ) THEN
938  b11d(imax) = -b11d(imax)
939  b21d(imax) = -b21d(imax)
940  IF( wantv1t ) THEN
941  IF( colmajor ) THEN
942  CALL sscal( q, negonecomplex, v1t(imax,1), ldv1t )
943  ELSE
944  CALL sscal( q, negonecomplex, v1t(1,imax), 1 )
945  END IF
946  END IF
947  END IF
948 *
949 * Compute THETA(IMAX)
950 *
951  x1 = cos(phi(imax-1))*b11d(imax) +
952  $ sin(phi(imax-1))*b12e(imax-1)
953  y1 = cos(phi(imax-1))*b21d(imax) +
954  $ sin(phi(imax-1))*b22e(imax-1)
955 *
956  theta(imax) = atan2( abs(y1), abs(x1) )
957 *
958 * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
959 * and B22(IMAX,IMAX-1)
960 *
961  IF( b11d(imax)+b12e(imax-1) .LT. 0 ) THEN
962  b12d(imax) = -b12d(imax)
963  IF( wantu1 ) THEN
964  IF( colmajor ) THEN
965  CALL sscal( p, negonecomplex, u1(1,imax), 1 )
966  ELSE
967  CALL sscal( p, negonecomplex, u1(imax,1), ldu1 )
968  END IF
969  END IF
970  END IF
971  IF( b21d(imax)+b22e(imax-1) .GT. 0 ) THEN
972  b22d(imax) = -b22d(imax)
973  IF( wantu2 ) THEN
974  IF( colmajor ) THEN
975  CALL sscal( m-p, negonecomplex, u2(1,imax), 1 )
976  ELSE
977  CALL sscal( m-p, negonecomplex, u2(imax,1), ldu2 )
978  END IF
979  END IF
980  END IF
981 *
982 * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
983 *
984  IF( b12d(imax)+b22d(imax) .LT. 0 ) THEN
985  IF( wantv2t ) THEN
986  IF( colmajor ) THEN
987  CALL sscal( m-q, negonecomplex, v2t(imax,1), ldv2t )
988  ELSE
989  CALL sscal( m-q, negonecomplex, v2t(1,imax), 1 )
990  END IF
991  END IF
992  END IF
993 *
994 * Test for negligible sines or cosines
995 *
996  DO i = imin, imax
997  IF( theta(i) .LT. thresh ) THEN
998  theta(i) = zero
999  ELSE IF( theta(i) .GT. piover2-thresh ) THEN
1000  theta(i) = piover2
1001  END IF
1002  END DO
1003  DO i = imin, imax-1
1004  IF( phi(i) .LT. thresh ) THEN
1005  phi(i) = zero
1006  ELSE IF( phi(i) .GT. piover2-thresh ) THEN
1007  phi(i) = piover2
1008  END IF
1009  END DO
1010 *
1011 * Deflate
1012 *
1013  IF (imax .GT. 1) THEN
1014  DO WHILE( phi(imax-1) .EQ. zero )
1015  imax = imax - 1
1016  IF (imax .LE. 1) exit
1017  END DO
1018  END IF
1019  IF( imin .GT. imax - 1 )
1020  $ imin = imax - 1
1021  IF (imin .GT. 1) THEN
1022  DO WHILE (phi(imin-1) .NE. zero)
1023  imin = imin - 1
1024  IF (imin .LE. 1) exit
1025  END DO
1026  END IF
1027 *
1028 * Repeat main iteration loop
1029 *
1030  END DO
1031 *
1032 * Postprocessing: order THETA from least to greatest
1033 *
1034  DO i = 1, q
1035 *
1036  mini = i
1037  thetamin = theta(i)
1038  DO j = i+1, q
1039  IF( theta(j) .LT. thetamin ) THEN
1040  mini = j
1041  thetamin = theta(j)
1042  END IF
1043  END DO
1044 *
1045  IF( mini .NE. i ) THEN
1046  theta(mini) = theta(i)
1047  theta(i) = thetamin
1048  IF( colmajor ) THEN
1049  IF( wantu1 )
1050  $ CALL sswap( p, u1(1,i), 1, u1(1,mini), 1 )
1051  IF( wantu2 )
1052  $ CALL sswap( m-p, u2(1,i), 1, u2(1,mini), 1 )
1053  IF( wantv1t )
1054  $ CALL sswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t )
1055  IF( wantv2t )
1056  $ CALL sswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),
1057  $ ldv2t )
1058  ELSE
1059  IF( wantu1 )
1060  $ CALL sswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 )
1061  IF( wantu2 )
1062  $ CALL sswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 )
1063  IF( wantv1t )
1064  $ CALL sswap( q, v1t(1,i), 1, v1t(1,mini), 1 )
1065  IF( wantv2t )
1066  $ CALL sswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 )
1067  END IF
1068  END IF
1069 *
1070  END DO
1071 *
1072  return
1073 *
1074 * End of SBBCSD
1075 *
1076  END
1077