LAPACK  3.6.0
LAPACK: Linear Algebra PACKage
dlasd3.f
Go to the documentation of this file.
1 *> \brief \b DLASD3 finds all square roots of the roots of the secular equation, as defined by the values in D and Z, and then updates the singular vectors by matrix multiplication. Used by sbdsdc.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DLASD3 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd3.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd3.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd3.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
22 * LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,
23 * INFO )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,
27 * $ SQRE
28 * ..
29 * .. Array Arguments ..
30 * INTEGER CTOT( * ), IDXC( * )
31 * DOUBLE PRECISION D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),
32 * $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
33 * $ Z( * )
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> DLASD3 finds all the square roots of the roots of the secular
43 *> equation, as defined by the values in D and Z. It makes the
44 *> appropriate calls to DLASD4 and then updates the singular
45 *> vectors by matrix multiplication.
46 *>
47 *> This code makes very mild assumptions about floating point
48 *> arithmetic. It will work on machines with a guard digit in
49 *> add/subtract, or on those binary machines without guard digits
50 *> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
51 *> It could conceivably fail on hexadecimal or decimal machines
52 *> without guard digits, but we know of none.
53 *>
54 *> DLASD3 is called from DLASD1.
55 *> \endverbatim
56 *
57 * Arguments:
58 * ==========
59 *
60 *> \param[in] NL
61 *> \verbatim
62 *> NL is INTEGER
63 *> The row dimension of the upper block. NL >= 1.
64 *> \endverbatim
65 *>
66 *> \param[in] NR
67 *> \verbatim
68 *> NR is INTEGER
69 *> The row dimension of the lower block. NR >= 1.
70 *> \endverbatim
71 *>
72 *> \param[in] SQRE
73 *> \verbatim
74 *> SQRE is INTEGER
75 *> = 0: the lower block is an NR-by-NR square matrix.
76 *> = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
77 *>
78 *> The bidiagonal matrix has N = NL + NR + 1 rows and
79 *> M = N + SQRE >= N columns.
80 *> \endverbatim
81 *>
82 *> \param[in] K
83 *> \verbatim
84 *> K is INTEGER
85 *> The size of the secular equation, 1 =< K = < N.
86 *> \endverbatim
87 *>
88 *> \param[out] D
89 *> \verbatim
90 *> D is DOUBLE PRECISION array, dimension(K)
91 *> On exit the square roots of the roots of the secular equation,
92 *> in ascending order.
93 *> \endverbatim
94 *>
95 *> \param[out] Q
96 *> \verbatim
97 *> Q is DOUBLE PRECISION array,
98 *> dimension at least (LDQ,K).
99 *> \endverbatim
100 *>
101 *> \param[in] LDQ
102 *> \verbatim
103 *> LDQ is INTEGER
104 *> The leading dimension of the array Q. LDQ >= K.
105 *> \endverbatim
106 *>
107 *> \param[in] DSIGMA
108 *> \verbatim
109 *> DSIGMA is DOUBLE PRECISION array, dimension(K)
110 *> The first K elements of this array contain the old roots
111 *> of the deflated updating problem. These are the poles
112 *> of the secular equation.
113 *> \endverbatim
114 *>
115 *> \param[out] U
116 *> \verbatim
117 *> U is DOUBLE PRECISION array, dimension (LDU, N)
118 *> The last N - K columns of this matrix contain the deflated
119 *> left singular vectors.
120 *> \endverbatim
121 *>
122 *> \param[in] LDU
123 *> \verbatim
124 *> LDU is INTEGER
125 *> The leading dimension of the array U. LDU >= N.
126 *> \endverbatim
127 *>
128 *> \param[in,out] U2
129 *> \verbatim
130 *> U2 is DOUBLE PRECISION array, dimension (LDU2, N)
131 *> The first K columns of this matrix contain the non-deflated
132 *> left singular vectors for the split problem.
133 *> \endverbatim
134 *>
135 *> \param[in] LDU2
136 *> \verbatim
137 *> LDU2 is INTEGER
138 *> The leading dimension of the array U2. LDU2 >= N.
139 *> \endverbatim
140 *>
141 *> \param[out] VT
142 *> \verbatim
143 *> VT is DOUBLE PRECISION array, dimension (LDVT, M)
144 *> The last M - K columns of VT**T contain the deflated
145 *> right singular vectors.
146 *> \endverbatim
147 *>
148 *> \param[in] LDVT
149 *> \verbatim
150 *> LDVT is INTEGER
151 *> The leading dimension of the array VT. LDVT >= N.
152 *> \endverbatim
153 *>
154 *> \param[in,out] VT2
155 *> \verbatim
156 *> VT2 is DOUBLE PRECISION array, dimension (LDVT2, N)
157 *> The first K columns of VT2**T contain the non-deflated
158 *> right singular vectors for the split problem.
159 *> \endverbatim
160 *>
161 *> \param[in] LDVT2
162 *> \verbatim
163 *> LDVT2 is INTEGER
164 *> The leading dimension of the array VT2. LDVT2 >= N.
165 *> \endverbatim
166 *>
167 *> \param[in] IDXC
168 *> \verbatim
169 *> IDXC is INTEGER array, dimension ( N )
170 *> The permutation used to arrange the columns of U (and rows of
171 *> VT) into three groups: the first group contains non-zero
172 *> entries only at and above (or before) NL +1; the second
173 *> contains non-zero entries only at and below (or after) NL+2;
174 *> and the third is dense. The first column of U and the row of
175 *> VT are treated separately, however.
176 *>
177 *> The rows of the singular vectors found by DLASD4
178 *> must be likewise permuted before the matrix multiplies can
179 *> take place.
180 *> \endverbatim
181 *>
182 *> \param[in] CTOT
183 *> \verbatim
184 *> CTOT is INTEGER array, dimension ( 4 )
185 *> A count of the total number of the various types of columns
186 *> in U (or rows in VT), as described in IDXC. The fourth column
187 *> type is any column which has been deflated.
188 *> \endverbatim
189 *>
190 *> \param[in] Z
191 *> \verbatim
192 *> Z is DOUBLE PRECISION array, dimension (K)
193 *> The first K elements of this array contain the components
194 *> of the deflation-adjusted updating row vector.
195 *> \endverbatim
196 *>
197 *> \param[out] INFO
198 *> \verbatim
199 *> INFO is INTEGER
200 *> = 0: successful exit.
201 *> < 0: if INFO = -i, the i-th argument had an illegal value.
202 *> > 0: if INFO = 1, a singular value did not converge
203 *> \endverbatim
204 *
205 * Authors:
206 * ========
207 *
208 *> \author Univ. of Tennessee
209 *> \author Univ. of California Berkeley
210 *> \author Univ. of Colorado Denver
211 *> \author NAG Ltd.
212 *
213 *> \date November 2015
214 *
215 *> \ingroup auxOTHERauxiliary
216 *
217 *> \par Contributors:
218 * ==================
219 *>
220 *> Ming Gu and Huan Ren, Computer Science Division, University of
221 *> California at Berkeley, USA
222 *>
223 * =====================================================================
224  SUBROUTINE dlasd3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
225  $ ldu2, vt, ldvt, vt2, ldvt2, idxc, ctot, z,
226  $ info )
227 *
228 * -- LAPACK auxiliary routine (version 3.6.0) --
229 * -- LAPACK is a software package provided by Univ. of Tennessee, --
230 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
231 * November 2015
232 *
233 * .. Scalar Arguments ..
234  INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,
235  $ sqre
236 * ..
237 * .. Array Arguments ..
238  INTEGER CTOT( * ), IDXC( * )
239  DOUBLE PRECISION D( * ), DSIGMA( * ), Q( ldq, * ), U( ldu, * ),
240  $ u2( ldu2, * ), vt( ldvt, * ), vt2( ldvt2, * ),
241  $ z( * )
242 * ..
243 *
244 * =====================================================================
245 *
246 * .. Parameters ..
247  DOUBLE PRECISION ONE, ZERO, NEGONE
248  parameter( one = 1.0d+0, zero = 0.0d+0,
249  $ negone = -1.0d+0 )
250 * ..
251 * .. Local Scalars ..
252  INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1
253  DOUBLE PRECISION RHO, TEMP
254 * ..
255 * .. External Functions ..
256  DOUBLE PRECISION DLAMC3, DNRM2
257  EXTERNAL dlamc3, dnrm2
258 * ..
259 * .. External Subroutines ..
260  EXTERNAL dcopy, dgemm, dlacpy, dlascl, dlasd4, xerbla
261 * ..
262 * .. Intrinsic Functions ..
263  INTRINSIC abs, sign, sqrt
264 * ..
265 * .. Executable Statements ..
266 *
267 * Test the input parameters.
268 *
269  info = 0
270 *
271  IF( nl.LT.1 ) THEN
272  info = -1
273  ELSE IF( nr.LT.1 ) THEN
274  info = -2
275  ELSE IF( ( sqre.NE.1 ) .AND. ( sqre.NE.0 ) ) THEN
276  info = -3
277  END IF
278 *
279  n = nl + nr + 1
280  m = n + sqre
281  nlp1 = nl + 1
282  nlp2 = nl + 2
283 *
284  IF( ( k.LT.1 ) .OR. ( k.GT.n ) ) THEN
285  info = -4
286  ELSE IF( ldq.LT.k ) THEN
287  info = -7
288  ELSE IF( ldu.LT.n ) THEN
289  info = -10
290  ELSE IF( ldu2.LT.n ) THEN
291  info = -12
292  ELSE IF( ldvt.LT.m ) THEN
293  info = -14
294  ELSE IF( ldvt2.LT.m ) THEN
295  info = -16
296  END IF
297  IF( info.NE.0 ) THEN
298  CALL xerbla( 'DLASD3', -info )
299  RETURN
300  END IF
301 *
302 * Quick return if possible
303 *
304  IF( k.EQ.1 ) THEN
305  d( 1 ) = abs( z( 1 ) )
306  CALL dcopy( m, vt2( 1, 1 ), ldvt2, vt( 1, 1 ), ldvt )
307  IF( z( 1 ).GT.zero ) THEN
308  CALL dcopy( n, u2( 1, 1 ), 1, u( 1, 1 ), 1 )
309  ELSE
310  DO 10 i = 1, n
311  u( i, 1 ) = -u2( i, 1 )
312  10 CONTINUE
313  END IF
314  RETURN
315  END IF
316 *
317 * Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
318 * be computed with high relative accuracy (barring over/underflow).
319 * This is a problem on machines without a guard digit in
320 * add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
321 * The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
322 * which on any of these machines zeros out the bottommost
323 * bit of DSIGMA(I) if it is 1; this makes the subsequent
324 * subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
325 * occurs. On binary machines with a guard digit (almost all
326 * machines) it does not change DSIGMA(I) at all. On hexadecimal
327 * and decimal machines with a guard digit, it slightly
328 * changes the bottommost bits of DSIGMA(I). It does not account
329 * for hexadecimal or decimal machines without guard digits
330 * (we know of none). We use a subroutine call to compute
331 * 2*DSIGMA(I) to prevent optimizing compilers from eliminating
332 * this code.
333 *
334  DO 20 i = 1, k
335  dsigma( i ) = dlamc3( dsigma( i ), dsigma( i ) ) - dsigma( i )
336  20 CONTINUE
337 *
338 * Keep a copy of Z.
339 *
340  CALL dcopy( k, z, 1, q, 1 )
341 *
342 * Normalize Z.
343 *
344  rho = dnrm2( k, z, 1 )
345  CALL dlascl( 'G', 0, 0, rho, one, k, 1, z, k, info )
346  rho = rho*rho
347 *
348 * Find the new singular values.
349 *
350  DO 30 j = 1, k
351  CALL dlasd4( k, j, dsigma, z, u( 1, j ), rho, d( j ),
352  $ vt( 1, j ), info )
353 *
354 * If the zero finder fails, report the convergence failure.
355 *
356  IF( info.NE.0 ) THEN
357  RETURN
358  END IF
359  30 CONTINUE
360 *
361 * Compute updated Z.
362 *
363  DO 60 i = 1, k
364  z( i ) = u( i, k )*vt( i, k )
365  DO 40 j = 1, i - 1
366  z( i ) = z( i )*( u( i, j )*vt( i, j ) /
367  $ ( dsigma( i )-dsigma( j ) ) /
368  $ ( dsigma( i )+dsigma( j ) ) )
369  40 CONTINUE
370  DO 50 j = i, k - 1
371  z( i ) = z( i )*( u( i, j )*vt( i, j ) /
372  $ ( dsigma( i )-dsigma( j+1 ) ) /
373  $ ( dsigma( i )+dsigma( j+1 ) ) )
374  50 CONTINUE
375  z( i ) = sign( sqrt( abs( z( i ) ) ), q( i, 1 ) )
376  60 CONTINUE
377 *
378 * Compute left singular vectors of the modified diagonal matrix,
379 * and store related information for the right singular vectors.
380 *
381  DO 90 i = 1, k
382  vt( 1, i ) = z( 1 ) / u( 1, i ) / vt( 1, i )
383  u( 1, i ) = negone
384  DO 70 j = 2, k
385  vt( j, i ) = z( j ) / u( j, i ) / vt( j, i )
386  u( j, i ) = dsigma( j )*vt( j, i )
387  70 CONTINUE
388  temp = dnrm2( k, u( 1, i ), 1 )
389  q( 1, i ) = u( 1, i ) / temp
390  DO 80 j = 2, k
391  jc = idxc( j )
392  q( j, i ) = u( jc, i ) / temp
393  80 CONTINUE
394  90 CONTINUE
395 *
396 * Update the left singular vector matrix.
397 *
398  IF( k.EQ.2 ) THEN
399  CALL dgemm( 'N', 'N', n, k, k, one, u2, ldu2, q, ldq, zero, u,
400  $ ldu )
401  GO TO 100
402  END IF
403  IF( ctot( 1 ).GT.0 ) THEN
404  CALL dgemm( 'N', 'N', nl, k, ctot( 1 ), one, u2( 1, 2 ), ldu2,
405  $ q( 2, 1 ), ldq, zero, u( 1, 1 ), ldu )
406  IF( ctot( 3 ).GT.0 ) THEN
407  ktemp = 2 + ctot( 1 ) + ctot( 2 )
408  CALL dgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),
409  $ ldu2, q( ktemp, 1 ), ldq, one, u( 1, 1 ), ldu )
410  END IF
411  ELSE IF( ctot( 3 ).GT.0 ) THEN
412  ktemp = 2 + ctot( 1 ) + ctot( 2 )
413  CALL dgemm( 'N', 'N', nl, k, ctot( 3 ), one, u2( 1, ktemp ),
414  $ ldu2, q( ktemp, 1 ), ldq, zero, u( 1, 1 ), ldu )
415  ELSE
416  CALL dlacpy( 'F', nl, k, u2, ldu2, u, ldu )
417  END IF
418  CALL dcopy( k, q( 1, 1 ), ldq, u( nlp1, 1 ), ldu )
419  ktemp = 2 + ctot( 1 )
420  ctemp = ctot( 2 ) + ctot( 3 )
421  CALL dgemm( 'N', 'N', nr, k, ctemp, one, u2( nlp2, ktemp ), ldu2,
422  $ q( ktemp, 1 ), ldq, zero, u( nlp2, 1 ), ldu )
423 *
424 * Generate the right singular vectors.
425 *
426  100 CONTINUE
427  DO 120 i = 1, k
428  temp = dnrm2( k, vt( 1, i ), 1 )
429  q( i, 1 ) = vt( 1, i ) / temp
430  DO 110 j = 2, k
431  jc = idxc( j )
432  q( i, j ) = vt( jc, i ) / temp
433  110 CONTINUE
434  120 CONTINUE
435 *
436 * Update the right singular vector matrix.
437 *
438  IF( k.EQ.2 ) THEN
439  CALL dgemm( 'N', 'N', k, m, k, one, q, ldq, vt2, ldvt2, zero,
440  $ vt, ldvt )
441  RETURN
442  END IF
443  ktemp = 1 + ctot( 1 )
444  CALL dgemm( 'N', 'N', k, nlp1, ktemp, one, q( 1, 1 ), ldq,
445  $ vt2( 1, 1 ), ldvt2, zero, vt( 1, 1 ), ldvt )
446  ktemp = 2 + ctot( 1 ) + ctot( 2 )
447  IF( ktemp.LE.ldvt2 )
448  $ CALL dgemm( 'N', 'N', k, nlp1, ctot( 3 ), one, q( 1, ktemp ),
449  $ ldq, vt2( ktemp, 1 ), ldvt2, one, vt( 1, 1 ),
450  $ ldvt )
451 *
452  ktemp = ctot( 1 ) + 1
453  nrp1 = nr + sqre
454  IF( ktemp.GT.1 ) THEN
455  DO 130 i = 1, k
456  q( i, ktemp ) = q( i, 1 )
457  130 CONTINUE
458  DO 140 i = nlp2, m
459  vt2( ktemp, i ) = vt2( 1, i )
460  140 CONTINUE
461  END IF
462  ctemp = 1 + ctot( 2 ) + ctot( 3 )
463  CALL dgemm( 'N', 'N', k, nrp1, ctemp, one, q( 1, ktemp ), ldq,
464  $ vt2( ktemp, nlp2 ), ldvt2, zero, vt( 1, nlp2 ), ldvt )
465 *
466  RETURN
467 *
468 * End of DLASD3
469 *
470  END
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:141
subroutine dlasd3(NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, INFO)
DLASD3 finds all square roots of the roots of the secular equation, as defined by the values in D and...
Definition: dlasd3.f:227
subroutine dlasd4(N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO)
DLASD4 computes the square root of the i-th updated eigenvalue of a positive symmetric rank-one modif...
Definition: dlasd4.f:155
subroutine dcopy(N, DX, INCX, DY, INCY)
DCOPY
Definition: dcopy.f:53