LAPACK  3.5.0
LAPACK: Linear Algebra PACKage
 All Classes Files Functions Variables Typedefs Macros
slasd3.f
Go to the documentation of this file.
1 *> \brief \b SLASD3 finds all square roots of the roots of the secular equation, as defined by the values in D and Z, and then updates the singular vectors by matrix multiplication. Used by sbdsdc.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SLASD3 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasd3.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasd3.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasd3.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
22 * LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,
23 * INFO )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,
27 * $ SQRE
28 * ..
29 * .. Array Arguments ..
30 * INTEGER CTOT( * ), IDXC( * )
31 * REAL D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),
32 * $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
33 * $ Z( * )
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> SLASD3 finds all the square roots of the roots of the secular
43 *> equation, as defined by the values in D and Z. It makes the
44 *> appropriate calls to SLASD4 and then updates the singular
45 *> vectors by matrix multiplication.
46 *>
47 *> This code makes very mild assumptions about floating point
48 *> arithmetic. It will work on machines with a guard digit in
49 *> add/subtract, or on those binary machines without guard digits
50 *> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
51 *> It could conceivably fail on hexadecimal or decimal machines
52 *> without guard digits, but we know of none.
53 *>
54 *> SLASD3 is called from SLASD1.
55 *> \endverbatim
56 *
57 * Arguments:
58 * ==========
59 *
60 *> \param[in] NL
61 *> \verbatim
62 *> NL is INTEGER
63 *> The row dimension of the upper block. NL >= 1.
64 *> \endverbatim
65 *>
66 *> \param[in] NR
67 *> \verbatim
68 *> NR is INTEGER
69 *> The row dimension of the lower block. NR >= 1.
70 *> \endverbatim
71 *>
72 *> \param[in] SQRE
73 *> \verbatim
74 *> SQRE is INTEGER
75 *> = 0: the lower block is an NR-by-NR square matrix.
76 *> = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
77 *>
78 *> The bidiagonal matrix has N = NL + NR + 1 rows and
79 *> M = N + SQRE >= N columns.
80 *> \endverbatim
81 *>
82 *> \param[in] K
83 *> \verbatim
84 *> K is INTEGER
85 *> The size of the secular equation, 1 =< K = < N.
86 *> \endverbatim
87 *>
88 *> \param[out] D
89 *> \verbatim
90 *> D is REAL array, dimension(K)
91 *> On exit the square roots of the roots of the secular equation,
92 *> in ascending order.
93 *> \endverbatim
94 *>
95 *> \param[out] Q
96 *> \verbatim
97 *> Q is REAL array,
98 *> dimension at least (LDQ,K).
99 *> \endverbatim
100 *>
101 *> \param[in] LDQ
102 *> \verbatim
103 *> LDQ is INTEGER
104 *> The leading dimension of the array Q. LDQ >= K.
105 *> \endverbatim
106 *>
107 *> \param[in,out] DSIGMA
108 *> \verbatim
109 *> DSIGMA is REAL array, dimension(K)
110 *> The first K elements of this array contain the old roots
111 *> of the deflated updating problem. These are the poles
112 *> of the secular equation.
113 *> \endverbatim
114 *>
115 *> \param[out] U
116 *> \verbatim
117 *> U is REAL array, dimension (LDU, N)
118 *> The last N - K columns of this matrix contain the deflated
119 *> left singular vectors.
120 *> \endverbatim
121 *>
122 *> \param[in] LDU
123 *> \verbatim
124 *> LDU is INTEGER
125 *> The leading dimension of the array U. LDU >= N.
126 *> \endverbatim
127 *>
128 *> \param[in] U2
129 *> \verbatim
130 *> U2 is REAL array, dimension (LDU2, N)
131 *> The first K columns of this matrix contain the non-deflated
132 *> left singular vectors for the split problem.
133 *> \endverbatim
134 *>
135 *> \param[in] LDU2
136 *> \verbatim
137 *> LDU2 is INTEGER
138 *> The leading dimension of the array U2. LDU2 >= N.
139 *> \endverbatim
140 *>
141 *> \param[out] VT
142 *> \verbatim
143 *> VT is REAL array, dimension (LDVT, M)
144 *> The last M - K columns of VT**T contain the deflated
145 *> right singular vectors.
146 *> \endverbatim
147 *>
148 *> \param[in] LDVT
149 *> \verbatim
150 *> LDVT is INTEGER
151 *> The leading dimension of the array VT. LDVT >= N.
152 *> \endverbatim
153 *>
154 *> \param[in,out] VT2
155 *> \verbatim
156 *> VT2 is REAL array, dimension (LDVT2, N)
157 *> The first K columns of VT2**T contain the non-deflated
158 *> right singular vectors for the split problem.
159 *> \endverbatim
160 *>
161 *> \param[in] LDVT2
162 *> \verbatim
163 *> LDVT2 is INTEGER
164 *> The leading dimension of the array VT2. LDVT2 >= N.
165 *> \endverbatim
166 *>
167 *> \param[in] IDXC
168 *> \verbatim
169 *> IDXC is INTEGER array, dimension (N)
170 *> The permutation used to arrange the columns of U (and rows of
171 *> VT) into three groups: the first group contains non-zero
172 *> entries only at and above (or before) NL +1; the second
173 *> contains non-zero entries only at and below (or after) NL+2;
174 *> and the third is dense. The first column of U and the row of
175 *> VT are treated separately, however.
176 *>
177 *> The rows of the singular vectors found by SLASD4
178 *> must be likewise permuted before the matrix multiplies can
179 *> take place.
180 *> \endverbatim
181 *>
182 *> \param[in] CTOT
183 *> \verbatim
184 *> CTOT is INTEGER array, dimension (4)
185 *> A count of the total number of the various types of columns
186 *> in U (or rows in VT), as described in IDXC. The fourth column
187 *> type is any column which has been deflated.
188 *> \endverbatim
189 *>
190 *> \param[in,out] Z
191 *> \verbatim
192 *> Z is REAL array, dimension (K)
193 *> The first K elements of this array contain the components
194 *> of the deflation-adjusted updating row vector.
195 *> \endverbatim
196 *>
197 *> \param[out] INFO
198 *> \verbatim
199 *> INFO is INTEGER
200 *> = 0: successful exit.
201 *> < 0: if INFO = -i, the i-th argument had an illegal value.
202 *> > 0: if INFO = 1, a singular value did not converge
203 *> \endverbatim
204 *
205 * Authors:
206 * ========
207 *
208 *> \author Univ. of Tennessee
209 *> \author Univ. of California Berkeley
210 *> \author Univ. of Colorado Denver
211 *> \author NAG Ltd.
212 *
213 *> \date September 2012
214 *
215 *> \ingroup auxOTHERauxiliary
216 *
217 *> \par Contributors:
218 * ==================
219 *>
220 *> Ming Gu and Huan Ren, Computer Science Division, University of
221 *> California at Berkeley, USA
222 *>
223 * =====================================================================
224  SUBROUTINE slasd3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
225  $ ldu2, vt, ldvt, vt2, ldvt2, idxc, ctot, z,
226  $ info )
227 *
228 * -- LAPACK auxiliary routine (version 3.4.2) --
229 * -- LAPACK is a software package provided by Univ. of Tennessee, --
230 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
231 * September 2012
232 *
233 * .. Scalar Arguments ..
234  INTEGER info, k, ldq, ldu, ldu2, ldvt, ldvt2, nl, nr,
235  $ sqre
236 * ..
237 * .. Array Arguments ..
238  INTEGER ctot( * ), idxc( * )
239  REAL d( * ), dsigma( * ), q( ldq, * ), u( ldu, * ),
240  $ u2( ldu2, * ), vt( ldvt, * ), vt2( ldvt2, * ),
241  $ z( * )
242 * ..
243 *
244 * =====================================================================
245 *
246 * .. Parameters ..
247  REAL one, zero, negone
248  parameter( one = 1.0e+0, zero = 0.0e+0,
249  $ negone = -1.0e+0 )
250 * ..
251 * .. Local Scalars ..
252  INTEGER ctemp, i, j, jc, ktemp, m, n, nlp1, nlp2, nrp1
253  REAL rho, temp
254 * ..
255 * .. External Functions ..
256  REAL slamc3, snrm2
257  EXTERNAL slamc3, snrm2
258 * ..
259 * .. External Subroutines ..
260  EXTERNAL scopy, sgemm, slacpy, slascl, slasd4, xerbla
261 * ..
262 * .. Intrinsic Functions ..
263  INTRINSIC abs, sign, sqrt
264 * ..
265 * .. Executable Statements ..
266 *
267 * Test the input parameters.
268 *
269  info = 0
270 *
271  IF( nl.LT.1 ) THEN
272  info = -1
273  ELSE IF( nr.LT.1 ) THEN
274  info = -2
275  ELSE IF( ( sqre.NE.1 ) .AND. ( sqre.NE.0 ) ) THEN
276  info = -3
277  END IF
278 *
279  n = nl + nr + 1
280  m = n + sqre
281  nlp1 = nl + 1
282  nlp2 = nl + 2
283 *
284  IF( ( k.LT.1 ) .OR. ( k.GT.n ) ) THEN
285  info = -4
286  ELSE IF( ldq.LT.k ) THEN
287  info = -7
288  ELSE IF( ldu.LT.n ) THEN
289  info = -10
290  ELSE IF( ldu2.LT.n ) THEN
291  info = -12
292  ELSE IF( ldvt.LT.m ) THEN
293  info = -14
294  ELSE IF( ldvt2.LT.m ) THEN
295  info = -16
296  END IF
297  IF( info.NE.0 ) THEN
298  CALL xerbla( 'SLASD3', -info )
299  RETURN
300  END IF
301 *
302 * Quick return if possible
303 *
304  IF( k.EQ.1 ) THEN
305  d( 1 ) = abs( z( 1 ) )
306  CALL scopy( m, vt2( 1, 1 ), ldvt2, vt( 1, 1 ), ldvt )
307  IF( z( 1 ).GT.zero ) THEN
308  CALL scopy( n, u2( 1, 1 ), 1, u( 1, 1 ), 1 )
309  ELSE
310  DO 10 i = 1, n
311  u( i, 1 ) = -u2( i, 1 )
312  10 CONTINUE
313  END IF
314  RETURN
315  END IF
316 *
317 * Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
318 * be computed with high relative accuracy (barring over/underflow).
319 * This is a problem on machines without a guard digit in
320 * add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
321 * The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
322 * which on any of these machines zeros out the bottommost
323 * bit of DSIGMA(I) if it is 1; this makes the subsequent
324 * subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
325 * occurs. On binary machines with a guard digit (almost all
326 * machines) it does not change DSIGMA(I) at all. On hexadecimal
327 * and decimal machines with a guard digit, it slightly
328 * changes the bottommost bits of DSIGMA(I). It does not account
329 * for hexadecimal or decimal machines without guard digits
330 * (we know of none). We use a subroutine call to compute
331 * 2*DSIGMA(I) to prevent optimizing compilers from eliminating
332 * this code.
333 *
334  DO 20 i = 1, k
335  dsigma( i ) = slamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
336  20 CONTINUE
337 *
338 * Keep a copy of Z.
339 *
340  CALL scopy( k, z, 1, q, 1 )
341 *
342 * Normalize Z.
343 *
344  rho = snrm2( k, z, 1 )
345  CALL slascl( 'G', 0, 0, rho, one, k, 1, z, k, info )
346  rho = rho*rho
347 *
348 * Find the new singular values.
349 *
350  DO 30 j = 1, k
351  CALL slasd4( k, j, dsigma, z, u( 1, j ), rho, d( j ),
352  $ vt( 1, j ), info )
353 *
354 * If the zero finder fails, the computation is terminated.
355 *
356  IF( info.NE.0 ) THEN
357  RETURN
358  END IF
359  30 CONTINUE
360 *
361 * Compute updated Z.
362 *
363  DO 60 i = 1, k
364  z( i ) = u( i, k )*vt( i, k )
365  DO 40 j = 1, i - 1
366  z( i ) = z( i )*( u( i, j )*vt( i, j ) /
367  $ ( dsigma( i )-dsigma( j ) ) /
368  $ ( dsigma( i )+dsigma( j ) ) )
369  40 CONTINUE
370  DO 50 j = i, k - 1
371  z( i ) = z( i )*( u( i, j )*vt( i, j ) /
372  $ ( dsigma( i )-dsigma( j+1 ) ) /
373  $ ( dsigma( i )+dsigma( j+1 ) ) )
374  50 CONTINUE
375  z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1 ) )
376  60 CONTINUE
377 *
378 * Compute left singular vectors of the modified diagonal matrix,
379 * and store related information for the right singular vectors.
380 *
381  DO 90 i = 1, k
382  vt( 1, i ) = z( 1 ) / u( 1, i ) / vt( 1, i )
383  u( 1, i ) = negone
384  DO 70 j = 2, k
385  vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
386  u( j, i ) = dsigma( j )*vt( j, i )
387  70 CONTINUE
388  temp = snrm2( k, u( 1, i ), 1 )
389  q( 1, i ) = u( 1, i ) / temp
390  DO 80 j = 2, k
391  jc = idxc( j )
392  q( j, i ) = u( jc, i ) / temp
393  80 CONTINUE
394  90 CONTINUE
395 *
396 * Update the left singular vector matrix.
397 *
398  IF( k.EQ.2 ) THEN
399  CALL sgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,
400  $ ldu )
401  go to 100
402  END IF
403  IF( ctot( 1 ).GT.0 ) THEN
404  CALL sgemm( 'N', 'N', nl, k, ctot( 1 ), one, u2( 1, 2 ), ldu2,
405  $ q( 2, 1 ), ldq, zero, u( 1, 1 ), ldu )
406  IF( ctot( 3 ).GT.0 ) THEN
407  ktemp = 2 + ctot( 1 ) + ctot( 2 )
408  CALL sgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),
409  $ ldu2, q( ktemp, 1 ), ldq, one, u( 1, 1 ), ldu )
410  END IF
411  ELSE IF( ctot( 3 ).GT.0 ) THEN
412  ktemp = 2 + ctot( 1 ) + ctot( 2 )
413  CALL sgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),
414  $ ldu2, q( ktemp, 1 ), ldq, zero, u( 1, 1 ), ldu )
415  ELSE
416  CALL slacpy( 'F', nl, k, u2, ldu2, u, ldu )
417  END IF
418  CALL scopy( k, q( 1, 1 ), ldq, u( nlp1, 1 ), ldu )
419  ktemp = 2 + ctot( 1 )
420  ctemp = ctot( 2 ) + ctot( 3 )
421  CALL sgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,
422  $ q( ktemp, 1 ), ldq, zero, u( nlp2, 1 ), ldu )
423 *
424 * Generate the right singular vectors.
425 *
426  100 CONTINUE
427  DO 120 i = 1, k
428  temp = snrm2( k, vt( 1, i ), 1 )
429  q( i, 1 ) = vt( 1, i ) / temp
430  DO 110 j = 2, k
431  jc = idxc( j )
432  q( i, j ) = vt( jc, i ) / temp
433  110 CONTINUE
434  120 CONTINUE
435 *
436 * Update the right singular vector matrix.
437 *
438  IF( k.EQ.2 ) THEN
439  CALL sgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,
440  $ vt, ldvt )
441  RETURN
442  END IF
443  ktemp = 1 + ctot( 1 )
444  CALL sgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1, 1 ), ldq,
445  $ vt2( 1, 1 ), ldvt2, zero, vt( 1, 1 ), ldvt )
446  ktemp = 2 + ctot( 1 ) + ctot( 2 )
447  IF( ktemp.LE.ldvt2 )
448  $ CALL sgemm( 'N', 'N', k, nlp1, ctot( 3 ), one, q( 1, ktemp ),
449  $ ldq, vt2( ktemp, 1 ), ldvt2, one, vt( 1, 1 ),
450  $ ldvt )
451 *
452  ktemp = ctot( 1 ) + 1
453  nrp1 = nr + sqre
454  IF( ktemp.GT.1 ) THEN
455  DO 130 i = 1, k
456  q( i, ktemp ) = q( i, 1 )
457  130 CONTINUE
458  DO 140 i = nlp2, m
459  vt2( ktemp, i ) = vt2( 1, i )
460  140 CONTINUE
461  END IF
462  ctemp = 1 + ctot( 2 ) + ctot( 3 )
463  CALL sgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1, ktemp ), ldq,
464  $ vt2( ktemp, nlp2 ), ldvt2, zero, vt( 1, nlp2 ), ldvt )
465 *
466  RETURN
467 *
468 * End of SLASD3
469 *
470  END