LAPACK  3.6.0
LAPACK: Linear Algebra PACKage
sgejsv.f
Go to the documentation of this file.
1 *> \brief \b SGEJSV
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGEJSV + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgejsv.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgejsv.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgejsv.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
22 * M, N, A, LDA, SVA, U, LDU, V, LDV,
23 * WORK, LWORK, IWORK, INFO )
24 *
25 * .. Scalar Arguments ..
26 * IMPLICIT NONE
27 * INTEGER INFO, LDA, LDU, LDV, LWORK, M, N
28 * ..
29 * .. Array Arguments ..
30 * REAL A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
31 * $ WORK( LWORK )
32 * INTEGER IWORK( * )
33 * CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> SGEJSV computes the singular value decomposition (SVD) of a real M-by-N
43 *> matrix [A], where M >= N. The SVD of [A] is written as
44 *>
45 *> [A] = [U] * [SIGMA] * [V]^t,
46 *>
47 *> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
48 *> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
49 *> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
50 *> the singular values of [A]. The columns of [U] and [V] are the left and
51 *> the right singular vectors of [A], respectively. The matrices [U] and [V]
52 *> are computed and stored in the arrays U and V, respectively. The diagonal
53 *> of [SIGMA] is computed and stored in the array SVA.
54 *> SGEJSV can sometimes compute tiny singular values and their singular vectors much
55 *> more accurately than other SVD routines, see below under Further Details.
56 
57 *> \endverbatim
58 *
59 * Arguments:
60 * ==========
61 *
62 *> \param[in] JOBA
63 *> \verbatim
64 *> JOBA is CHARACTER*1
65 *> Specifies the level of accuracy:
66 *> = 'C': This option works well (high relative accuracy) if A = B * D,
67 *> with well-conditioned B and arbitrary diagonal matrix D.
68 *> The accuracy cannot be spoiled by COLUMN scaling. The
69 *> accuracy of the computed output depends on the condition of
70 *> B, and the procedure aims at the best theoretical accuracy.
71 *> The relative error max_{i=1:N}|d sigma_i| / sigma_i is
72 *> bounded by f(M,N)*epsilon* cond(B), independent of D.
73 *> The input matrix is preprocessed with the QRF with column
74 *> pivoting. This initial preprocessing and preconditioning by
75 *> a rank revealing QR factorization is common for all values of
76 *> JOBA. Additional actions are specified as follows:
77 *> = 'E': Computation as with 'C' with an additional estimate of the
78 *> condition number of B. It provides a realistic error bound.
79 *> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
80 *> D1, D2, and well-conditioned matrix C, this option gives
81 *> higher accuracy than the 'C' option. If the structure of the
82 *> input matrix is not known, and relative accuracy is
83 *> desirable, then this option is advisable. The input matrix A
84 *> is preprocessed with QR factorization with FULL (row and
85 *> column) pivoting.
86 *> = 'G' Computation as with 'F' with an additional estimate of the
87 *> condition number of B, where A=D*B. If A has heavily weighted
88 *> rows, then using this condition number gives too pessimistic
89 *> error bound.
90 *> = 'A': Small singular values are the noise and the matrix is treated
91 *> as numerically rank defficient. The error in the computed
92 *> singular values is bounded by f(m,n)*epsilon*||A||.
93 *> The computed SVD A = U * S * V^t restores A up to
94 *> f(m,n)*epsilon*||A||.
95 *> This gives the procedure the licence to discard (set to zero)
96 *> all singular values below N*epsilon*||A||.
97 *> = 'R': Similar as in 'A'. Rank revealing property of the initial
98 *> QR factorization is used do reveal (using triangular factor)
99 *> a gap sigma_{r+1} < epsilon * sigma_r in which case the
100 *> numerical RANK is declared to be r. The SVD is computed with
101 *> absolute error bounds, but more accurately than with 'A'.
102 *> \endverbatim
103 *>
104 *> \param[in] JOBU
105 *> \verbatim
106 *> JOBU is CHARACTER*1
107 *> Specifies whether to compute the columns of U:
108 *> = 'U': N columns of U are returned in the array U.
109 *> = 'F': full set of M left sing. vectors is returned in the array U.
110 *> = 'W': U may be used as workspace of length M*N. See the description
111 *> of U.
112 *> = 'N': U is not computed.
113 *> \endverbatim
114 *>
115 *> \param[in] JOBV
116 *> \verbatim
117 *> JOBV is CHARACTER*1
118 *> Specifies whether to compute the matrix V:
119 *> = 'V': N columns of V are returned in the array V; Jacobi rotations
120 *> are not explicitly accumulated.
121 *> = 'J': N columns of V are returned in the array V, but they are
122 *> computed as the product of Jacobi rotations. This option is
123 *> allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
124 *> = 'W': V may be used as workspace of length N*N. See the description
125 *> of V.
126 *> = 'N': V is not computed.
127 *> \endverbatim
128 *>
129 *> \param[in] JOBR
130 *> \verbatim
131 *> JOBR is CHARACTER*1
132 *> Specifies the RANGE for the singular values. Issues the licence to
133 *> set to zero small positive singular values if they are outside
134 *> specified range. If A .NE. 0 is scaled so that the largest singular
135 *> value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
136 *> the licence to kill columns of A whose norm in c*A is less than
137 *> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
138 *> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
139 *> = 'N': Do not kill small columns of c*A. This option assumes that
140 *> BLAS and QR factorizations and triangular solvers are
141 *> implemented to work in that range. If the condition of A
142 *> is greater than BIG, use SGESVJ.
143 *> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
144 *> (roughly, as described above). This option is recommended.
145 *> ===========================
146 *> For computing the singular values in the FULL range [SFMIN,BIG]
147 *> use SGESVJ.
148 *> \endverbatim
149 *>
150 *> \param[in] JOBT
151 *> \verbatim
152 *> JOBT is CHARACTER*1
153 *> If the matrix is square then the procedure may determine to use
154 *> transposed A if A^t seems to be better with respect to convergence.
155 *> If the matrix is not square, JOBT is ignored. This is subject to
156 *> changes in the future.
157 *> The decision is based on two values of entropy over the adjoint
158 *> orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
159 *> = 'T': transpose if entropy test indicates possibly faster
160 *> convergence of Jacobi process if A^t is taken as input. If A is
161 *> replaced with A^t, then the row pivoting is included automatically.
162 *> = 'N': do not speculate.
163 *> This option can be used to compute only the singular values, or the
164 *> full SVD (U, SIGMA and V). For only one set of singular vectors
165 *> (U or V), the caller should provide both U and V, as one of the
166 *> matrices is used as workspace if the matrix A is transposed.
167 *> The implementer can easily remove this constraint and make the
168 *> code more complicated. See the descriptions of U and V.
169 *> \endverbatim
170 *>
171 *> \param[in] JOBP
172 *> \verbatim
173 *> JOBP is CHARACTER*1
174 *> Issues the licence to introduce structured perturbations to drown
175 *> denormalized numbers. This licence should be active if the
176 *> denormals are poorly implemented, causing slow computation,
177 *> especially in cases of fast convergence (!). For details see [1,2].
178 *> For the sake of simplicity, this perturbations are included only
179 *> when the full SVD or only the singular values are requested. The
180 *> implementer/user can easily add the perturbation for the cases of
181 *> computing one set of singular vectors.
182 *> = 'P': introduce perturbation
183 *> = 'N': do not perturb
184 *> \endverbatim
185 *>
186 *> \param[in] M
187 *> \verbatim
188 *> M is INTEGER
189 *> The number of rows of the input matrix A. M >= 0.
190 *> \endverbatim
191 *>
192 *> \param[in] N
193 *> \verbatim
194 *> N is INTEGER
195 *> The number of columns of the input matrix A. M >= N >= 0.
196 *> \endverbatim
197 *>
198 *> \param[in,out] A
199 *> \verbatim
200 *> A is REAL array, dimension (LDA,N)
201 *> On entry, the M-by-N matrix A.
202 *> \endverbatim
203 *>
204 *> \param[in] LDA
205 *> \verbatim
206 *> LDA is INTEGER
207 *> The leading dimension of the array A. LDA >= max(1,M).
208 *> \endverbatim
209 *>
210 *> \param[out] SVA
211 *> \verbatim
212 *> SVA is REAL array, dimension (N)
213 *> On exit,
214 *> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
215 *> computation SVA contains Euclidean column norms of the
216 *> iterated matrices in the array A.
217 *> - For WORK(1) .NE. WORK(2): The singular values of A are
218 *> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
219 *> sigma_max(A) overflows or if small singular values have been
220 *> saved from underflow by scaling the input matrix A.
221 *> - If JOBR='R' then some of the singular values may be returned
222 *> as exact zeros obtained by "set to zero" because they are
223 *> below the numerical rank threshold or are denormalized numbers.
224 *> \endverbatim
225 *>
226 *> \param[out] U
227 *> \verbatim
228 *> U is REAL array, dimension ( LDU, N )
229 *> If JOBU = 'U', then U contains on exit the M-by-N matrix of
230 *> the left singular vectors.
231 *> If JOBU = 'F', then U contains on exit the M-by-M matrix of
232 *> the left singular vectors, including an ONB
233 *> of the orthogonal complement of the Range(A).
234 *> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
235 *> then U is used as workspace if the procedure
236 *> replaces A with A^t. In that case, [V] is computed
237 *> in U as left singular vectors of A^t and then
238 *> copied back to the V array. This 'W' option is just
239 *> a reminder to the caller that in this case U is
240 *> reserved as workspace of length N*N.
241 *> If JOBU = 'N' U is not referenced.
242 *> \endverbatim
243 *>
244 *> \param[in] LDU
245 *> \verbatim
246 *> LDU is INTEGER
247 *> The leading dimension of the array U, LDU >= 1.
248 *> IF JOBU = 'U' or 'F' or 'W', then LDU >= M.
249 *> \endverbatim
250 *>
251 *> \param[out] V
252 *> \verbatim
253 *> V is REAL array, dimension ( LDV, N )
254 *> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
255 *> the right singular vectors;
256 *> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
257 *> then V is used as workspace if the pprocedure
258 *> replaces A with A^t. In that case, [U] is computed
259 *> in V as right singular vectors of A^t and then
260 *> copied back to the U array. This 'W' option is just
261 *> a reminder to the caller that in this case V is
262 *> reserved as workspace of length N*N.
263 *> If JOBV = 'N' V is not referenced.
264 *> \endverbatim
265 *>
266 *> \param[in] LDV
267 *> \verbatim
268 *> LDV is INTEGER
269 *> The leading dimension of the array V, LDV >= 1.
270 *> If JOBV = 'V' or 'J' or 'W', then LDV >= N.
271 *> \endverbatim
272 *>
273 *> \param[out] WORK
274 *> \verbatim
275 *> WORK is REAL array, dimension at least LWORK.
276 *> On exit,
277 *> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
278 *> that SCALE*SVA(1:N) are the computed singular values
279 *> of A. (See the description of SVA().)
280 *> WORK(2) = See the description of WORK(1).
281 *> WORK(3) = SCONDA is an estimate for the condition number of
282 *> column equilibrated A. (If JOBA .EQ. 'E' or 'G')
283 *> SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1).
284 *> It is computed using SPOCON. It holds
285 *> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
286 *> where R is the triangular factor from the QRF of A.
287 *> However, if R is truncated and the numerical rank is
288 *> determined to be strictly smaller than N, SCONDA is
289 *> returned as -1, thus indicating that the smallest
290 *> singular values might be lost.
291 *>
292 *> If full SVD is needed, the following two condition numbers are
293 *> useful for the analysis of the algorithm. They are provied for
294 *> a developer/implementer who is familiar with the details of
295 *> the method.
296 *>
297 *> WORK(4) = an estimate of the scaled condition number of the
298 *> triangular factor in the first QR factorization.
299 *> WORK(5) = an estimate of the scaled condition number of the
300 *> triangular factor in the second QR factorization.
301 *> The following two parameters are computed if JOBT .EQ. 'T'.
302 *> They are provided for a developer/implementer who is familiar
303 *> with the details of the method.
304 *>
305 *> WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
306 *> of diag(A^t*A) / Trace(A^t*A) taken as point in the
307 *> probability simplex.
308 *> WORK(7) = the entropy of A*A^t.
309 *> \endverbatim
310 *>
311 *> \param[in] LWORK
312 *> \verbatim
313 *> LWORK is INTEGER
314 *> Length of WORK to confirm proper allocation of work space.
315 *> LWORK depends on the job:
316 *>
317 *> If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
318 *> -> .. no scaled condition estimate required (JOBE.EQ.'N'):
319 *> LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
320 *> ->> For optimal performance (blocked code) the optimal value
321 *> is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
322 *> block size for DGEQP3 and DGEQRF.
323 *> In general, optimal LWORK is computed as
324 *> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7).
325 *> -> .. an estimate of the scaled condition number of A is
326 *> required (JOBA='E', 'G'). In this case, LWORK is the maximum
327 *> of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7).
328 *> ->> For optimal performance (blocked code) the optimal value
329 *> is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7).
330 *> In general, the optimal length LWORK is computed as
331 *> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF),
332 *> N+N*N+LWORK(DPOCON),7).
333 *>
334 *> If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
335 *> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
336 *> -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
337 *> where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ,
338 *> DORMLQ. In general, the optimal length LWORK is computed as
339 *> LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON),
340 *> N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)).
341 *>
342 *> If SIGMA and the left singular vectors are needed
343 *> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
344 *> -> For optimal performance:
345 *> if JOBU.EQ.'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
346 *> if JOBU.EQ.'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
347 *> where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR.
348 *> In general, the optimal length LWORK is computed as
349 *> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON),
350 *> 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)).
351 *> Here LWORK(DORMQR) equals N*NB (for JOBU.EQ.'U') or
352 *> M*NB (for JOBU.EQ.'F').
353 *>
354 *> If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and
355 *> -> if JOBV.EQ.'V'
356 *> the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N).
357 *> -> if JOBV.EQ.'J' the minimal requirement is
358 *> LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6).
359 *> -> For optimal performance, LWORK should be additionally
360 *> larger than N+M*NB, where NB is the optimal block size
361 *> for DORMQR.
362 *> \endverbatim
363 *>
364 *> \param[out] IWORK
365 *> \verbatim
366 *> IWORK is INTEGER array, dimension M+3*N.
367 *> On exit,
368 *> IWORK(1) = the numerical rank determined after the initial
369 *> QR factorization with pivoting. See the descriptions
370 *> of JOBA and JOBR.
371 *> IWORK(2) = the number of the computed nonzero singular values
372 *> IWORK(3) = if nonzero, a warning message:
373 *> If IWORK(3).EQ.1 then some of the column norms of A
374 *> were denormalized floats. The requested high accuracy
375 *> is not warranted by the data.
376 *> \endverbatim
377 *>
378 *> \param[out] INFO
379 *> \verbatim
380 *> INFO is INTEGER
381 *> < 0 : if INFO = -i, then the i-th argument had an illegal value.
382 *> = 0 : successfull exit;
383 *> > 0 : SGEJSV did not converge in the maximal allowed number
384 *> of sweeps. The computed values may be inaccurate.
385 *> \endverbatim
386 *
387 * Authors:
388 * ========
389 *
390 *> \author Univ. of Tennessee
391 *> \author Univ. of California Berkeley
392 *> \author Univ. of Colorado Denver
393 *> \author NAG Ltd.
394 *
395 *> \date November 2015
396 *
397 *> \ingroup realGEsing
398 *
399 *> \par Further Details:
400 * =====================
401 *>
402 *> \verbatim
403 *>
404 *> SGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3,
405 *> SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an
406 *> additional row pivoting can be used as a preprocessor, which in some
407 *> cases results in much higher accuracy. An example is matrix A with the
408 *> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
409 *> diagonal matrices and C is well-conditioned matrix. In that case, complete
410 *> pivoting in the first QR factorizations provides accuracy dependent on the
411 *> condition number of C, and independent of D1, D2. Such higher accuracy is
412 *> not completely understood theoretically, but it works well in practice.
413 *> Further, if A can be written as A = B*D, with well-conditioned B and some
414 *> diagonal D, then the high accuracy is guaranteed, both theoretically and
415 *> in software, independent of D. For more details see [1], [2].
416 *> The computational range for the singular values can be the full range
417 *> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
418 *> & LAPACK routines called by SGEJSV are implemented to work in that range.
419 *> If that is not the case, then the restriction for safe computation with
420 *> the singular values in the range of normalized IEEE numbers is that the
421 *> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
422 *> overflow. This code (SGEJSV) is best used in this restricted range,
423 *> meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are
424 *> returned as zeros. See JOBR for details on this.
425 *> Further, this implementation is somewhat slower than the one described
426 *> in [1,2] due to replacement of some non-LAPACK components, and because
427 *> the choice of some tuning parameters in the iterative part (SGESVJ) is
428 *> left to the implementer on a particular machine.
429 *> The rank revealing QR factorization (in this code: SGEQP3) should be
430 *> implemented as in [3]. We have a new version of SGEQP3 under development
431 *> that is more robust than the current one in LAPACK, with a cleaner cut in
432 *> rank defficient cases. It will be available in the SIGMA library [4].
433 *> If M is much larger than N, it is obvious that the inital QRF with
434 *> column pivoting can be preprocessed by the QRF without pivoting. That
435 *> well known trick is not used in SGEJSV because in some cases heavy row
436 *> weighting can be treated with complete pivoting. The overhead in cases
437 *> M much larger than N is then only due to pivoting, but the benefits in
438 *> terms of accuracy have prevailed. The implementer/user can incorporate
439 *> this extra QRF step easily. The implementer can also improve data movement
440 *> (matrix transpose, matrix copy, matrix transposed copy) - this
441 *> implementation of SGEJSV uses only the simplest, naive data movement.
442 *> \endverbatim
443 *
444 *> \par Contributors:
445 * ==================
446 *>
447 *> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
448 *
449 *> \par References:
450 * ================
451 *>
452 *> \verbatim
453 *>
454 *> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
455 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
456 *> LAPACK Working note 169.
457 *> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
458 *> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
459 *> LAPACK Working note 170.
460 *> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
461 *> factorization software - a case study.
462 *> ACM Trans. math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
463 *> LAPACK Working note 176.
464 *> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
465 *> QSVD, (H,K)-SVD computations.
466 *> Department of Mathematics, University of Zagreb, 2008.
467 *> \endverbatim
468 *
469 *> \par Bugs, examples and comments:
470 * =================================
471 *>
472 *> Please report all bugs and send interesting examples and/or comments to
473 *> drmac@math.hr. Thank you.
474 *>
475 * =====================================================================
476  SUBROUTINE sgejsv( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
477  $ m, n, a, lda, sva, u, ldu, v, ldv,
478  $ work, lwork, iwork, info )
479 *
480 * -- LAPACK computational routine (version 3.6.0) --
481 * -- LAPACK is a software package provided by Univ. of Tennessee, --
482 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
483 * November 2015
484 *
485 * .. Scalar Arguments ..
486  IMPLICIT NONE
487  INTEGER INFO, LDA, LDU, LDV, LWORK, M, N
488 * ..
489 * .. Array Arguments ..
490  REAL A( lda, * ), SVA( n ), U( ldu, * ), V( ldv, * ),
491  $ work( lwork )
492  INTEGER IWORK( * )
493  CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
494 * ..
495 *
496 * ===========================================================================
497 *
498 * .. Local Parameters ..
499  REAL ZERO, ONE
500  parameter( zero = 0.0e0, one = 1.0e0 )
501 * ..
502 * .. Local Scalars ..
503  REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
504  $ condr1, condr2, entra, entrat, epsln, maxprj, scalem,
505  $ sconda, sfmin, small, temp1, uscal1, uscal2, xsc
506  INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
507  LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LSVEC,
508  $ l2aber, l2kill, l2pert, l2rank, l2tran,
509  $ noscal, rowpiv, rsvec, transp
510 * ..
511 * .. Intrinsic Functions ..
512  INTRINSIC abs, alog, max, min, float, nint, sign, sqrt
513 * ..
514 * .. External Functions ..
515  REAL SLAMCH, SNRM2
516  INTEGER ISAMAX
517  LOGICAL LSAME
518  EXTERNAL isamax, lsame, slamch, snrm2
519 * ..
520 * .. External Subroutines ..
521  EXTERNAL scopy, sgelqf, sgeqp3, sgeqrf, slacpy, slascl,
524 *
525  EXTERNAL sgesvj
526 * ..
527 *
528 * Test the input arguments
529 *
530  lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
531  jracc = lsame( jobv, 'J' )
532  rsvec = lsame( jobv, 'V' ) .OR. jracc
533  rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
534  l2rank = lsame( joba, 'R' )
535  l2aber = lsame( joba, 'A' )
536  errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
537  l2tran = lsame( jobt, 'T' )
538  l2kill = lsame( jobr, 'R' )
539  defr = lsame( jobr, 'N' )
540  l2pert = lsame( jobp, 'P' )
541 *
542  IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
543  $ errest .OR. lsame( joba, 'C' ) )) THEN
544  info = - 1
545  ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
546  $ lsame( jobu, 'W' )) ) THEN
547  info = - 2
548  ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
549  $ lsame( jobv, 'W' )) .OR. ( jracc .AND. (.NOT.lsvec) ) ) THEN
550  info = - 3
551  ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
552  info = - 4
553  ELSE IF ( .NOT. ( l2tran .OR. lsame( jobt, 'N' ) ) ) THEN
554  info = - 5
555  ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
556  info = - 6
557  ELSE IF ( m .LT. 0 ) THEN
558  info = - 7
559  ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
560  info = - 8
561  ELSE IF ( lda .LT. m ) THEN
562  info = - 10
563  ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
564  info = - 13
565  ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
566  info = - 14
567  ELSE IF ( (.NOT.(lsvec .OR. rsvec .OR. errest).AND.
568  $ (lwork .LT. max(7,4*n+1,2*m+n))) .OR.
569  $ (.NOT.(lsvec .OR. rsvec) .AND. errest .AND.
570  $ (lwork .LT. max(7,4*n+n*n,2*m+n))) .OR.
571  $ (lsvec .AND. (.NOT.rsvec) .AND. (lwork .LT. max(7,2*m+n,4*n+1)))
572  $ .OR.
573  $ (rsvec .AND. (.NOT.lsvec) .AND. (lwork .LT. max(7,2*m+n,4*n+1)))
574  $ .OR.
575  $ (lsvec .AND. rsvec .AND. (.NOT.jracc) .AND.
576  $ (lwork.LT.max(2*m+n,6*n+2*n*n)))
577  $ .OR. (lsvec .AND. rsvec .AND. jracc .AND.
578  $ lwork.LT.max(2*m+n,4*n+n*n,2*n+n*n+6)))
579  $ THEN
580  info = - 17
581  ELSE
582 * #:)
583  info = 0
584  END IF
585 *
586  IF ( info .NE. 0 ) THEN
587 * #:(
588  CALL xerbla( 'SGEJSV', - info )
589  RETURN
590  END IF
591 *
592 * Quick return for void matrix (Y3K safe)
593 * #:)
594  IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) RETURN
595 *
596 * Determine whether the matrix U should be M x N or M x M
597 *
598  IF ( lsvec ) THEN
599  n1 = n
600  IF ( lsame( jobu, 'F' ) ) n1 = m
601  END IF
602 *
603 * Set numerical parameters
604 *
605 *! NOTE: Make sure SLAMCH() does not fail on the target architecture.
606 *
607  epsln = slamch('Epsilon')
608  sfmin = slamch('SafeMinimum')
609  small = sfmin / epsln
610  big = slamch('O')
611 * BIG = ONE / SFMIN
612 *
613 * Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
614 *
615 *(!) If necessary, scale SVA() to protect the largest norm from
616 * overflow. It is possible that this scaling pushes the smallest
617 * column norm left from the underflow threshold (extreme case).
618 *
619  scalem = one / sqrt(float(m)*float(n))
620  noscal = .true.
621  goscal = .true.
622  DO 1874 p = 1, n
623  aapp = zero
624  aaqq = one
625  CALL slassq( m, a(1,p), 1, aapp, aaqq )
626  IF ( aapp .GT. big ) THEN
627  info = - 9
628  CALL xerbla( 'SGEJSV', -info )
629  RETURN
630  END IF
631  aaqq = sqrt(aaqq)
632  IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
633  sva(p) = aapp * aaqq
634  ELSE
635  noscal = .false.
636  sva(p) = aapp * ( aaqq * scalem )
637  IF ( goscal ) THEN
638  goscal = .false.
639  CALL sscal( p-1, scalem, sva, 1 )
640  END IF
641  END IF
642  1874 CONTINUE
643 *
644  IF ( noscal ) scalem = one
645 *
646  aapp = zero
647  aaqq = big
648  DO 4781 p = 1, n
649  aapp = max( aapp, sva(p) )
650  IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
651  4781 CONTINUE
652 *
653 * Quick return for zero M x N matrix
654 * #:)
655  IF ( aapp .EQ. zero ) THEN
656  IF ( lsvec ) CALL slaset( 'G', m, n1, zero, one, u, ldu )
657  IF ( rsvec ) CALL slaset( 'G', n, n, zero, one, v, ldv )
658  work(1) = one
659  work(2) = one
660  IF ( errest ) work(3) = one
661  IF ( lsvec .AND. rsvec ) THEN
662  work(4) = one
663  work(5) = one
664  END IF
665  IF ( l2tran ) THEN
666  work(6) = zero
667  work(7) = zero
668  END IF
669  iwork(1) = 0
670  iwork(2) = 0
671  iwork(3) = 0
672  RETURN
673  END IF
674 *
675 * Issue warning if denormalized column norms detected. Override the
676 * high relative accuracy request. Issue licence to kill columns
677 * (set them to zero) whose norm is less than sigma_max / BIG (roughly).
678 * #:(
679  warning = 0
680  IF ( aaqq .LE. sfmin ) THEN
681  l2rank = .true.
682  l2kill = .true.
683  warning = 1
684  END IF
685 *
686 * Quick return for one-column matrix
687 * #:)
688  IF ( n .EQ. 1 ) THEN
689 *
690  IF ( lsvec ) THEN
691  CALL slascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
692  CALL slacpy( 'A', m, 1, a, lda, u, ldu )
693 * computing all M left singular vectors of the M x 1 matrix
694  IF ( n1 .NE. n ) THEN
695  CALL sgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr )
696  CALL sorgqr( m,n1,1, u,ldu,work,work(n+1),lwork-n,ierr )
697  CALL scopy( m, a(1,1), 1, u(1,1), 1 )
698  END IF
699  END IF
700  IF ( rsvec ) THEN
701  v(1,1) = one
702  END IF
703  IF ( sva(1) .LT. (big*scalem) ) THEN
704  sva(1) = sva(1) / scalem
705  scalem = one
706  END IF
707  work(1) = one / scalem
708  work(2) = one
709  IF ( sva(1) .NE. zero ) THEN
710  iwork(1) = 1
711  IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
712  iwork(2) = 1
713  ELSE
714  iwork(2) = 0
715  END IF
716  ELSE
717  iwork(1) = 0
718  iwork(2) = 0
719  END IF
720  IF ( errest ) work(3) = one
721  IF ( lsvec .AND. rsvec ) THEN
722  work(4) = one
723  work(5) = one
724  END IF
725  IF ( l2tran ) THEN
726  work(6) = zero
727  work(7) = zero
728  END IF
729  RETURN
730 *
731  END IF
732 *
733  transp = .false.
734  l2tran = l2tran .AND. ( m .EQ. n )
735 *
736  aatmax = -one
737  aatmin = big
738  IF ( rowpiv .OR. l2tran ) THEN
739 *
740 * Compute the row norms, needed to determine row pivoting sequence
741 * (in the case of heavily row weighted A, row pivoting is strongly
742 * advised) and to collect information needed to compare the
743 * structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.).
744 *
745  IF ( l2tran ) THEN
746  DO 1950 p = 1, m
747  xsc = zero
748  temp1 = one
749  CALL slassq( n, a(p,1), lda, xsc, temp1 )
750 * SLASSQ gets both the ell_2 and the ell_infinity norm
751 * in one pass through the vector
752  work(m+n+p) = xsc * scalem
753  work(n+p) = xsc * (scalem*sqrt(temp1))
754  aatmax = max( aatmax, work(n+p) )
755  IF (work(n+p) .NE. zero) aatmin = min(aatmin,work(n+p))
756  1950 CONTINUE
757  ELSE
758  DO 1904 p = 1, m
759  work(m+n+p) = scalem*abs( a(p,isamax(n,a(p,1),lda)) )
760  aatmax = max( aatmax, work(m+n+p) )
761  aatmin = min( aatmin, work(m+n+p) )
762  1904 CONTINUE
763  END IF
764 *
765  END IF
766 *
767 * For square matrix A try to determine whether A^t would be better
768 * input for the preconditioned Jacobi SVD, with faster convergence.
769 * The decision is based on an O(N) function of the vector of column
770 * and row norms of A, based on the Shannon entropy. This should give
771 * the right choice in most cases when the difference actually matters.
772 * It may fail and pick the slower converging side.
773 *
774  entra = zero
775  entrat = zero
776  IF ( l2tran ) THEN
777 *
778  xsc = zero
779  temp1 = one
780  CALL slassq( n, sva, 1, xsc, temp1 )
781  temp1 = one / temp1
782 *
783  entra = zero
784  DO 1113 p = 1, n
785  big1 = ( ( sva(p) / xsc )**2 ) * temp1
786  IF ( big1 .NE. zero ) entra = entra + big1 * alog(big1)
787  1113 CONTINUE
788  entra = - entra / alog(float(n))
789 *
790 * Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex.
791 * It is derived from the diagonal of A^t * A. Do the same with the
792 * diagonal of A * A^t, compute the entropy of the corresponding
793 * probability distribution. Note that A * A^t and A^t * A have the
794 * same trace.
795 *
796  entrat = zero
797  DO 1114 p = n+1, n+m
798  big1 = ( ( work(p) / xsc )**2 ) * temp1
799  IF ( big1 .NE. zero ) entrat = entrat + big1 * alog(big1)
800  1114 CONTINUE
801  entrat = - entrat / alog(float(m))
802 *
803 * Analyze the entropies and decide A or A^t. Smaller entropy
804 * usually means better input for the algorithm.
805 *
806  transp = ( entrat .LT. entra )
807 *
808 * If A^t is better than A, transpose A.
809 *
810  IF ( transp ) THEN
811 * In an optimal implementation, this trivial transpose
812 * should be replaced with faster transpose.
813  DO 1115 p = 1, n - 1
814  DO 1116 q = p + 1, n
815  temp1 = a(q,p)
816  a(q,p) = a(p,q)
817  a(p,q) = temp1
818  1116 CONTINUE
819  1115 CONTINUE
820  DO 1117 p = 1, n
821  work(m+n+p) = sva(p)
822  sva(p) = work(n+p)
823  1117 CONTINUE
824  temp1 = aapp
825  aapp = aatmax
826  aatmax = temp1
827  temp1 = aaqq
828  aaqq = aatmin
829  aatmin = temp1
830  kill = lsvec
831  lsvec = rsvec
832  rsvec = kill
833  IF ( lsvec ) n1 = n
834 *
835  rowpiv = .true.
836  END IF
837 *
838  END IF
839 * END IF L2TRAN
840 *
841 * Scale the matrix so that its maximal singular value remains less
842 * than SQRT(BIG) -- the matrix is scaled so that its maximal column
843 * has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
844 * SQRT(BIG) instead of BIG is the fact that SGEJSV uses LAPACK and
845 * BLAS routines that, in some implementations, are not capable of
846 * working in the full interval [SFMIN,BIG] and that they may provoke
847 * overflows in the intermediate results. If the singular values spread
848 * from SFMIN to BIG, then SGESVJ will compute them. So, in that case,
849 * one should use SGESVJ instead of SGEJSV.
850 *
851  big1 = sqrt( big )
852  temp1 = sqrt( big / float(n) )
853 *
854  CALL slascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
855  IF ( aaqq .GT. (aapp * sfmin) ) THEN
856  aaqq = ( aaqq / aapp ) * temp1
857  ELSE
858  aaqq = ( aaqq * temp1 ) / aapp
859  END IF
860  temp1 = temp1 * scalem
861  CALL slascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
862 *
863 * To undo scaling at the end of this procedure, multiply the
864 * computed singular values with USCAL2 / USCAL1.
865 *
866  uscal1 = temp1
867  uscal2 = aapp
868 *
869  IF ( l2kill ) THEN
870 * L2KILL enforces computation of nonzero singular values in
871 * the restricted range of condition number of the initial A,
872 * sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
873  xsc = sqrt( sfmin )
874  ELSE
875  xsc = small
876 *
877 * Now, if the condition number of A is too big,
878 * sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
879 * as a precaution measure, the full SVD is computed using SGESVJ
880 * with accumulated Jacobi rotations. This provides numerically
881 * more robust computation, at the cost of slightly increased run
882 * time. Depending on the concrete implementation of BLAS and LAPACK
883 * (i.e. how they behave in presence of extreme ill-conditioning) the
884 * implementor may decide to remove this switch.
885  IF ( ( aaqq.LT.sqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
886  jracc = .true.
887  END IF
888 *
889  END IF
890  IF ( aaqq .LT. xsc ) THEN
891  DO 700 p = 1, n
892  IF ( sva(p) .LT. xsc ) THEN
893  CALL slaset( 'A', m, 1, zero, zero, a(1,p), lda )
894  sva(p) = zero
895  END IF
896  700 CONTINUE
897  END IF
898 *
899 * Preconditioning using QR factorization with pivoting
900 *
901  IF ( rowpiv ) THEN
902 * Optional row permutation (Bjoerck row pivoting):
903 * A result by Cox and Higham shows that the Bjoerck's
904 * row pivoting combined with standard column pivoting
905 * has similar effect as Powell-Reid complete pivoting.
906 * The ell-infinity norms of A are made nonincreasing.
907  DO 1952 p = 1, m - 1
908  q = isamax( m-p+1, work(m+n+p), 1 ) + p - 1
909  iwork(2*n+p) = q
910  IF ( p .NE. q ) THEN
911  temp1 = work(m+n+p)
912  work(m+n+p) = work(m+n+q)
913  work(m+n+q) = temp1
914  END IF
915  1952 CONTINUE
916  CALL slaswp( n, a, lda, 1, m-1, iwork(2*n+1), 1 )
917  END IF
918 *
919 * End of the preparation phase (scaling, optional sorting and
920 * transposing, optional flushing of small columns).
921 *
922 * Preconditioning
923 *
924 * If the full SVD is needed, the right singular vectors are computed
925 * from a matrix equation, and for that we need theoretical analysis
926 * of the Businger-Golub pivoting. So we use SGEQP3 as the first RR QRF.
927 * In all other cases the first RR QRF can be chosen by other criteria
928 * (eg speed by replacing global with restricted window pivoting, such
929 * as in SGEQPX from TOMS # 782). Good results will be obtained using
930 * SGEQPX with properly (!) chosen numerical parameters.
931 * Any improvement of SGEQP3 improves overal performance of SGEJSV.
932 *
933 * A * P1 = Q1 * [ R1^t 0]^t:
934  DO 1963 p = 1, n
935 * .. all columns are free columns
936  iwork(p) = 0
937  1963 CONTINUE
938  CALL sgeqp3( m,n,a,lda, iwork,work, work(n+1),lwork-n, ierr )
939 *
940 * The upper triangular matrix R1 from the first QRF is inspected for
941 * rank deficiency and possibilities for deflation, or possible
942 * ill-conditioning. Depending on the user specified flag L2RANK,
943 * the procedure explores possibilities to reduce the numerical
944 * rank by inspecting the computed upper triangular factor. If
945 * L2RANK or L2ABER are up, then SGEJSV will compute the SVD of
946 * A + dA, where ||dA|| <= f(M,N)*EPSLN.
947 *
948  nr = 1
949  IF ( l2aber ) THEN
950 * Standard absolute error bound suffices. All sigma_i with
951 * sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
952 * agressive enforcement of lower numerical rank by introducing a
953 * backward error of the order of N*EPSLN*||A||.
954  temp1 = sqrt(float(n))*epsln
955  DO 3001 p = 2, n
956  IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
957  nr = nr + 1
958  ELSE
959  GO TO 3002
960  END IF
961  3001 CONTINUE
962  3002 CONTINUE
963  ELSE IF ( l2rank ) THEN
964 * .. similarly as above, only slightly more gentle (less agressive).
965 * Sudden drop on the diagonal of R1 is used as the criterion for
966 * close-to-rank-defficient.
967  temp1 = sqrt(sfmin)
968  DO 3401 p = 2, n
969  IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
970  $ ( abs(a(p,p)) .LT. small ) .OR.
971  $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
972  nr = nr + 1
973  3401 CONTINUE
974  3402 CONTINUE
975 *
976  ELSE
977 * The goal is high relative accuracy. However, if the matrix
978 * has high scaled condition number the relative accuracy is in
979 * general not feasible. Later on, a condition number estimator
980 * will be deployed to estimate the scaled condition number.
981 * Here we just remove the underflowed part of the triangular
982 * factor. This prevents the situation in which the code is
983 * working hard to get the accuracy not warranted by the data.
984  temp1 = sqrt(sfmin)
985  DO 3301 p = 2, n
986  IF ( ( abs(a(p,p)) .LT. small ) .OR.
987  $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
988  nr = nr + 1
989  3301 CONTINUE
990  3302 CONTINUE
991 *
992  END IF
993 *
994  almort = .false.
995  IF ( nr .EQ. n ) THEN
996  maxprj = one
997  DO 3051 p = 2, n
998  temp1 = abs(a(p,p)) / sva(iwork(p))
999  maxprj = min( maxprj, temp1 )
1000  3051 CONTINUE
1001  IF ( maxprj**2 .GE. one - float(n)*epsln ) almort = .true.
1002  END IF
1003 *
1004 *
1005  sconda = - one
1006  condr1 = - one
1007  condr2 = - one
1008 *
1009  IF ( errest ) THEN
1010  IF ( n .EQ. nr ) THEN
1011  IF ( rsvec ) THEN
1012 * .. V is available as workspace
1013  CALL slacpy( 'U', n, n, a, lda, v, ldv )
1014  DO 3053 p = 1, n
1015  temp1 = sva(iwork(p))
1016  CALL sscal( p, one/temp1, v(1,p), 1 )
1017  3053 CONTINUE
1018  CALL spocon( 'U', n, v, ldv, one, temp1,
1019  $ work(n+1), iwork(2*n+m+1), ierr )
1020  ELSE IF ( lsvec ) THEN
1021 * .. U is available as workspace
1022  CALL slacpy( 'U', n, n, a, lda, u, ldu )
1023  DO 3054 p = 1, n
1024  temp1 = sva(iwork(p))
1025  CALL sscal( p, one/temp1, u(1,p), 1 )
1026  3054 CONTINUE
1027  CALL spocon( 'U', n, u, ldu, one, temp1,
1028  $ work(n+1), iwork(2*n+m+1), ierr )
1029  ELSE
1030  CALL slacpy( 'U', n, n, a, lda, work(n+1), n )
1031  DO 3052 p = 1, n
1032  temp1 = sva(iwork(p))
1033  CALL sscal( p, one/temp1, work(n+(p-1)*n+1), 1 )
1034  3052 CONTINUE
1035 * .. the columns of R are scaled to have unit Euclidean lengths.
1036  CALL spocon( 'U', n, work(n+1), n, one, temp1,
1037  $ work(n+n*n+1), iwork(2*n+m+1), ierr )
1038  END IF
1039  sconda = one / sqrt(temp1)
1040 * SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1).
1041 * N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1042  ELSE
1043  sconda = - one
1044  END IF
1045  END IF
1046 *
1047  l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. sqrt(big1) )
1048 * If there is no violent scaling, artificial perturbation is not needed.
1049 *
1050 * Phase 3:
1051 *
1052  IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1053 *
1054 * Singular Values only
1055 *
1056 * .. transpose A(1:NR,1:N)
1057  DO 1946 p = 1, min( n-1, nr )
1058  CALL scopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1059  1946 CONTINUE
1060 *
1061 * The following two DO-loops introduce small relative perturbation
1062 * into the strict upper triangle of the lower triangular matrix.
1063 * Small entries below the main diagonal are also changed.
1064 * This modification is useful if the computing environment does not
1065 * provide/allow FLUSH TO ZERO underflow, for it prevents many
1066 * annoying denormalized numbers in case of strongly scaled matrices.
1067 * The perturbation is structured so that it does not introduce any
1068 * new perturbation of the singular values, and it does not destroy
1069 * the job done by the preconditioner.
1070 * The licence for this perturbation is in the variable L2PERT, which
1071 * should be .FALSE. if FLUSH TO ZERO underflow is active.
1072 *
1073  IF ( .NOT. almort ) THEN
1074 *
1075  IF ( l2pert ) THEN
1076 * XSC = SQRT(SMALL)
1077  xsc = epsln / float(n)
1078  DO 4947 q = 1, nr
1079  temp1 = xsc*abs(a(q,q))
1080  DO 4949 p = 1, n
1081  IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1082  $ .OR. ( p .LT. q ) )
1083  $ a(p,q) = sign( temp1, a(p,q) )
1084  4949 CONTINUE
1085  4947 CONTINUE
1086  ELSE
1087  CALL slaset( 'U', nr-1,nr-1, zero,zero, a(1,2),lda )
1088  END IF
1089 *
1090 * .. second preconditioning using the QR factorization
1091 *
1092  CALL sgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr )
1093 *
1094 * .. and transpose upper to lower triangular
1095  DO 1948 p = 1, nr - 1
1096  CALL scopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1097  1948 CONTINUE
1098 *
1099  END IF
1100 *
1101 * Row-cyclic Jacobi SVD algorithm with column pivoting
1102 *
1103 * .. again some perturbation (a "background noise") is added
1104 * to drown denormals
1105  IF ( l2pert ) THEN
1106 * XSC = SQRT(SMALL)
1107  xsc = epsln / float(n)
1108  DO 1947 q = 1, nr
1109  temp1 = xsc*abs(a(q,q))
1110  DO 1949 p = 1, nr
1111  IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1112  $ .OR. ( p .LT. q ) )
1113  $ a(p,q) = sign( temp1, a(p,q) )
1114  1949 CONTINUE
1115  1947 CONTINUE
1116  ELSE
1117  CALL slaset( 'U', nr-1, nr-1, zero, zero, a(1,2), lda )
1118  END IF
1119 *
1120 * .. and one-sided Jacobi rotations are started on a lower
1121 * triangular matrix (plus perturbation which is ignored in
1122 * the part which destroys triangular form (confusing?!))
1123 *
1124  CALL sgesvj( 'L', 'NoU', 'NoV', nr, nr, a, lda, sva,
1125  $ n, v, ldv, work, lwork, info )
1126 *
1127  scalem = work(1)
1128  numrank = nint(work(2))
1129 *
1130 *
1131  ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1132 *
1133 * -> Singular Values and Right Singular Vectors <-
1134 *
1135  IF ( almort ) THEN
1136 *
1137 * .. in this case NR equals N
1138  DO 1998 p = 1, nr
1139  CALL scopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1140  1998 CONTINUE
1141  CALL slaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1142 *
1143  CALL sgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,
1144  $ work, lwork, info )
1145  scalem = work(1)
1146  numrank = nint(work(2))
1147 
1148  ELSE
1149 *
1150 * .. two more QR factorizations ( one QRF is not enough, two require
1151 * accumulated product of Jacobi rotations, three are perfect )
1152 *
1153  CALL slaset( 'Lower', nr-1, nr-1, zero, zero, a(2,1), lda )
1154  CALL sgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr)
1155  CALL slacpy( 'Lower', nr, nr, a, lda, v, ldv )
1156  CALL slaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1157  CALL sgeqrf( nr, nr, v, ldv, work(n+1), work(2*n+1),
1158  $ lwork-2*n, ierr )
1159  DO 8998 p = 1, nr
1160  CALL scopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1161  8998 CONTINUE
1162  CALL slaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1163 *
1164  CALL sgesvj( 'Lower', 'U','N', nr, nr, v,ldv, sva, nr, u,
1165  $ ldu, work(n+1), lwork-n, info )
1166  scalem = work(n+1)
1167  numrank = nint(work(n+2))
1168  IF ( nr .LT. n ) THEN
1169  CALL slaset( 'A',n-nr, nr, zero,zero, v(nr+1,1), ldv )
1170  CALL slaset( 'A',nr, n-nr, zero,zero, v(1,nr+1), ldv )
1171  CALL slaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv )
1172  END IF
1173 *
1174  CALL sormlq( 'Left', 'Transpose', n, n, nr, a, lda, work,
1175  $ v, ldv, work(n+1), lwork-n, ierr )
1176 *
1177  END IF
1178 *
1179  DO 8991 p = 1, n
1180  CALL scopy( n, v(p,1), ldv, a(iwork(p),1), lda )
1181  8991 CONTINUE
1182  CALL slacpy( 'All', n, n, a, lda, v, ldv )
1183 *
1184  IF ( transp ) THEN
1185  CALL slacpy( 'All', n, n, v, ldv, u, ldu )
1186  END IF
1187 *
1188  ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1189 *
1190 * .. Singular Values and Left Singular Vectors ..
1191 *
1192 * .. second preconditioning step to avoid need to accumulate
1193 * Jacobi rotations in the Jacobi iterations.
1194  DO 1965 p = 1, nr
1195  CALL scopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1196  1965 CONTINUE
1197  CALL slaset( 'Upper', nr-1, nr-1, zero, zero, u(1,2), ldu )
1198 *
1199  CALL sgeqrf( n, nr, u, ldu, work(n+1), work(2*n+1),
1200  $ lwork-2*n, ierr )
1201 *
1202  DO 1967 p = 1, nr - 1
1203  CALL scopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1204  1967 CONTINUE
1205  CALL slaset( 'Upper', nr-1, nr-1, zero, zero, u(1,2), ldu )
1206 *
1207  CALL sgesvj( 'Lower', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1208  $ lda, work(n+1), lwork-n, info )
1209  scalem = work(n+1)
1210  numrank = nint(work(n+2))
1211 *
1212  IF ( nr .LT. m ) THEN
1213  CALL slaset( 'A', m-nr, nr,zero, zero, u(nr+1,1), ldu )
1214  IF ( nr .LT. n1 ) THEN
1215  CALL slaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1), ldu )
1216  CALL slaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu )
1217  END IF
1218  END IF
1219 *
1220  CALL sormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1221  $ ldu, work(n+1), lwork-n, ierr )
1222 *
1223  IF ( rowpiv )
1224  $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1225 *
1226  DO 1974 p = 1, n1
1227  xsc = one / snrm2( m, u(1,p), 1 )
1228  CALL sscal( m, xsc, u(1,p), 1 )
1229  1974 CONTINUE
1230 *
1231  IF ( transp ) THEN
1232  CALL slacpy( 'All', n, n, u, ldu, v, ldv )
1233  END IF
1234 *
1235  ELSE
1236 *
1237 * .. Full SVD ..
1238 *
1239  IF ( .NOT. jracc ) THEN
1240 *
1241  IF ( .NOT. almort ) THEN
1242 *
1243 * Second Preconditioning Step (QRF [with pivoting])
1244 * Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1245 * equivalent to an LQF CALL. Since in many libraries the QRF
1246 * seems to be better optimized than the LQF, we do explicit
1247 * transpose and use the QRF. This is subject to changes in an
1248 * optimized implementation of SGEJSV.
1249 *
1250  DO 1968 p = 1, nr
1251  CALL scopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1252  1968 CONTINUE
1253 *
1254 * .. the following two loops perturb small entries to avoid
1255 * denormals in the second QR factorization, where they are
1256 * as good as zeros. This is done to avoid painfully slow
1257 * computation with denormals. The relative size of the perturbation
1258 * is a parameter that can be changed by the implementer.
1259 * This perturbation device will be obsolete on machines with
1260 * properly implemented arithmetic.
1261 * To switch it off, set L2PERT=.FALSE. To remove it from the
1262 * code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1263 * The following two loops should be blocked and fused with the
1264 * transposed copy above.
1265 *
1266  IF ( l2pert ) THEN
1267  xsc = sqrt(small)
1268  DO 2969 q = 1, nr
1269  temp1 = xsc*abs( v(q,q) )
1270  DO 2968 p = 1, n
1271  IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1272  $ .OR. ( p .LT. q ) )
1273  $ v(p,q) = sign( temp1, v(p,q) )
1274  IF ( p .LT. q ) v(p,q) = - v(p,q)
1275  2968 CONTINUE
1276  2969 CONTINUE
1277  ELSE
1278  CALL slaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv )
1279  END IF
1280 *
1281 * Estimate the row scaled condition number of R1
1282 * (If R1 is rectangular, N > NR, then the condition number
1283 * of the leading NR x NR submatrix is estimated.)
1284 *
1285  CALL slacpy( 'L', nr, nr, v, ldv, work(2*n+1), nr )
1286  DO 3950 p = 1, nr
1287  temp1 = snrm2(nr-p+1,work(2*n+(p-1)*nr+p),1)
1288  CALL sscal(nr-p+1,one/temp1,work(2*n+(p-1)*nr+p),1)
1289  3950 CONTINUE
1290  CALL spocon('Lower',nr,work(2*n+1),nr,one,temp1,
1291  $ work(2*n+nr*nr+1),iwork(m+2*n+1),ierr)
1292  condr1 = one / sqrt(temp1)
1293 * .. here need a second oppinion on the condition number
1294 * .. then assume worst case scenario
1295 * R1 is OK for inverse <=> CONDR1 .LT. FLOAT(N)
1296 * more conservative <=> CONDR1 .LT. SQRT(FLOAT(N))
1297 *
1298  cond_ok = sqrt(float(nr))
1299 *[TP] COND_OK is a tuning parameter.
1300 
1301  IF ( condr1 .LT. cond_ok ) THEN
1302 * .. the second QRF without pivoting. Note: in an optimized
1303 * implementation, this QRF should be implemented as the QRF
1304 * of a lower triangular matrix.
1305 * R1^t = Q2 * R2
1306  CALL sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),
1307  $ lwork-2*n, ierr )
1308 *
1309  IF ( l2pert ) THEN
1310  xsc = sqrt(small)/epsln
1311  DO 3959 p = 2, nr
1312  DO 3958 q = 1, p - 1
1313  temp1 = xsc * min(abs(v(p,p)),abs(v(q,q)))
1314  IF ( abs(v(q,p)) .LE. temp1 )
1315  $ v(q,p) = sign( temp1, v(q,p) )
1316  3958 CONTINUE
1317  3959 CONTINUE
1318  END IF
1319 *
1320  IF ( nr .NE. n )
1321  $ CALL slacpy( 'A', n, nr, v, ldv, work(2*n+1), n )
1322 * .. save ...
1323 *
1324 * .. this transposed copy should be better than naive
1325  DO 1969 p = 1, nr - 1
1326  CALL scopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1327  1969 CONTINUE
1328 *
1329  condr2 = condr1
1330 *
1331  ELSE
1332 *
1333 * .. ill-conditioned case: second QRF with pivoting
1334 * Note that windowed pivoting would be equaly good
1335 * numerically, and more run-time efficient. So, in
1336 * an optimal implementation, the next call to SGEQP3
1337 * should be replaced with eg. CALL SGEQPX (ACM TOMS #782)
1338 * with properly (carefully) chosen parameters.
1339 *
1340 * R1^t * P2 = Q2 * R2
1341  DO 3003 p = 1, nr
1342  iwork(n+p) = 0
1343  3003 CONTINUE
1344  CALL sgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),
1345  $ work(2*n+1), lwork-2*n, ierr )
1346 ** CALL SGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
1347 ** $ LWORK-2*N, IERR )
1348  IF ( l2pert ) THEN
1349  xsc = sqrt(small)
1350  DO 3969 p = 2, nr
1351  DO 3968 q = 1, p - 1
1352  temp1 = xsc * min(abs(v(p,p)),abs(v(q,q)))
1353  IF ( abs(v(q,p)) .LE. temp1 )
1354  $ v(q,p) = sign( temp1, v(q,p) )
1355  3968 CONTINUE
1356  3969 CONTINUE
1357  END IF
1358 *
1359  CALL slacpy( 'A', n, nr, v, ldv, work(2*n+1), n )
1360 *
1361  IF ( l2pert ) THEN
1362  xsc = sqrt(small)
1363  DO 8970 p = 2, nr
1364  DO 8971 q = 1, p - 1
1365  temp1 = xsc * min(abs(v(p,p)),abs(v(q,q)))
1366  v(p,q) = - sign( temp1, v(q,p) )
1367  8971 CONTINUE
1368  8970 CONTINUE
1369  ELSE
1370  CALL slaset( 'L',nr-1,nr-1,zero,zero,v(2,1),ldv )
1371  END IF
1372 * Now, compute R2 = L3 * Q3, the LQ factorization.
1373  CALL sgelqf( nr, nr, v, ldv, work(2*n+n*nr+1),
1374  $ work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1375 * .. and estimate the condition number
1376  CALL slacpy( 'L',nr,nr,v,ldv,work(2*n+n*nr+nr+1),nr )
1377  DO 4950 p = 1, nr
1378  temp1 = snrm2( p, work(2*n+n*nr+nr+p), nr )
1379  CALL sscal( p, one/temp1, work(2*n+n*nr+nr+p), nr )
1380  4950 CONTINUE
1381  CALL spocon( 'L',nr,work(2*n+n*nr+nr+1),nr,one,temp1,
1382  $ work(2*n+n*nr+nr+nr*nr+1),iwork(m+2*n+1),ierr )
1383  condr2 = one / sqrt(temp1)
1384 *
1385  IF ( condr2 .GE. cond_ok ) THEN
1386 * .. save the Householder vectors used for Q3
1387 * (this overwrittes the copy of R2, as it will not be
1388 * needed in this branch, but it does not overwritte the
1389 * Huseholder vectors of Q2.).
1390  CALL slacpy( 'U', nr, nr, v, ldv, work(2*n+1), n )
1391 * .. and the rest of the information on Q3 is in
1392 * WORK(2*N+N*NR+1:2*N+N*NR+N)
1393  END IF
1394 *
1395  END IF
1396 *
1397  IF ( l2pert ) THEN
1398  xsc = sqrt(small)
1399  DO 4968 q = 2, nr
1400  temp1 = xsc * v(q,q)
1401  DO 4969 p = 1, q - 1
1402 * V(p,q) = - SIGN( TEMP1, V(q,p) )
1403  v(p,q) = - sign( temp1, v(p,q) )
1404  4969 CONTINUE
1405  4968 CONTINUE
1406  ELSE
1407  CALL slaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv )
1408  END IF
1409 *
1410 * Second preconditioning finished; continue with Jacobi SVD
1411 * The input matrix is lower trinagular.
1412 *
1413 * Recover the right singular vectors as solution of a well
1414 * conditioned triangular matrix equation.
1415 *
1416  IF ( condr1 .LT. cond_ok ) THEN
1417 *
1418  CALL sgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,
1419  $ ldu,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,info )
1420  scalem = work(2*n+n*nr+nr+1)
1421  numrank = nint(work(2*n+n*nr+nr+2))
1422  DO 3970 p = 1, nr
1423  CALL scopy( nr, v(1,p), 1, u(1,p), 1 )
1424  CALL sscal( nr, sva(p), v(1,p), 1 )
1425  3970 CONTINUE
1426 
1427 * .. pick the right matrix equation and solve it
1428 *
1429  IF ( nr .EQ. n ) THEN
1430 * :)) .. best case, R1 is inverted. The solution of this matrix
1431 * equation is Q2*V2 = the product of the Jacobi rotations
1432 * used in SGESVJ, premultiplied with the orthogonal matrix
1433 * from the second QR factorization.
1434  CALL strsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv )
1435  ELSE
1436 * .. R1 is well conditioned, but non-square. Transpose(R2)
1437 * is inverted to get the product of the Jacobi rotations
1438 * used in SGESVJ. The Q-factor from the second QR
1439 * factorization is then built in explicitly.
1440  CALL strsm('L','U','T','N',nr,nr,one,work(2*n+1),
1441  $ n,v,ldv)
1442  IF ( nr .LT. n ) THEN
1443  CALL slaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv)
1444  CALL slaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv)
1445  CALL slaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv)
1446  END IF
1447  CALL sormqr('L','N',n,n,nr,work(2*n+1),n,work(n+1),
1448  $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1449  END IF
1450 *
1451  ELSE IF ( condr2 .LT. cond_ok ) THEN
1452 *
1453 * :) .. the input matrix A is very likely a relative of
1454 * the Kahan matrix :)
1455 * The matrix R2 is inverted. The solution of the matrix equation
1456 * is Q3^T*V3 = the product of the Jacobi rotations (appplied to
1457 * the lower triangular L3 from the LQ factorization of
1458 * R2=L3*Q3), pre-multiplied with the transposed Q3.
1459  CALL sgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,
1460  $ ldu, work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, info )
1461  scalem = work(2*n+n*nr+nr+1)
1462  numrank = nint(work(2*n+n*nr+nr+2))
1463  DO 3870 p = 1, nr
1464  CALL scopy( nr, v(1,p), 1, u(1,p), 1 )
1465  CALL sscal( nr, sva(p), u(1,p), 1 )
1466  3870 CONTINUE
1467  CALL strsm('L','U','N','N',nr,nr,one,work(2*n+1),n,u,ldu)
1468 * .. apply the permutation from the second QR factorization
1469  DO 873 q = 1, nr
1470  DO 872 p = 1, nr
1471  work(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1472  872 CONTINUE
1473  DO 874 p = 1, nr
1474  u(p,q) = work(2*n+n*nr+nr+p)
1475  874 CONTINUE
1476  873 CONTINUE
1477  IF ( nr .LT. n ) THEN
1478  CALL slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1479  CALL slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1480  CALL slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1481  END IF
1482  CALL sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1483  $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1484  ELSE
1485 * Last line of defense.
1486 * #:( This is a rather pathological case: no scaled condition
1487 * improvement after two pivoted QR factorizations. Other
1488 * possibility is that the rank revealing QR factorization
1489 * or the condition estimator has failed, or the COND_OK
1490 * is set very close to ONE (which is unnecessary). Normally,
1491 * this branch should never be executed, but in rare cases of
1492 * failure of the RRQR or condition estimator, the last line of
1493 * defense ensures that SGEJSV completes the task.
1494 * Compute the full SVD of L3 using SGESVJ with explicit
1495 * accumulation of Jacobi rotations.
1496  CALL sgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,
1497  $ ldu, work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, info )
1498  scalem = work(2*n+n*nr+nr+1)
1499  numrank = nint(work(2*n+n*nr+nr+2))
1500  IF ( nr .LT. n ) THEN
1501  CALL slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1502  CALL slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1503  CALL slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1504  END IF
1505  CALL sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1506  $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1507 *
1508  CALL sormlq( 'L', 'T', nr, nr, nr, work(2*n+1), n,
1509  $ work(2*n+n*nr+1), u, ldu, work(2*n+n*nr+nr+1),
1510  $ lwork-2*n-n*nr-nr, ierr )
1511  DO 773 q = 1, nr
1512  DO 772 p = 1, nr
1513  work(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1514  772 CONTINUE
1515  DO 774 p = 1, nr
1516  u(p,q) = work(2*n+n*nr+nr+p)
1517  774 CONTINUE
1518  773 CONTINUE
1519 *
1520  END IF
1521 *
1522 * Permute the rows of V using the (column) permutation from the
1523 * first QRF. Also, scale the columns to make them unit in
1524 * Euclidean norm. This applies to all cases.
1525 *
1526  temp1 = sqrt(float(n)) * epsln
1527  DO 1972 q = 1, n
1528  DO 972 p = 1, n
1529  work(2*n+n*nr+nr+iwork(p)) = v(p,q)
1530  972 CONTINUE
1531  DO 973 p = 1, n
1532  v(p,q) = work(2*n+n*nr+nr+p)
1533  973 CONTINUE
1534  xsc = one / snrm2( n, v(1,q), 1 )
1535  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1536  $ CALL sscal( n, xsc, v(1,q), 1 )
1537  1972 CONTINUE
1538 * At this moment, V contains the right singular vectors of A.
1539 * Next, assemble the left singular vector matrix U (M x N).
1540  IF ( nr .LT. m ) THEN
1541  CALL slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu )
1542  IF ( nr .LT. n1 ) THEN
1543  CALL slaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1544  CALL slaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu)
1545  END IF
1546  END IF
1547 *
1548 * The Q matrix from the first QRF is built into the left singular
1549 * matrix U. This applies to all cases.
1550 *
1551  CALL sormqr( 'Left', 'No_Tr', m, n1, n, a, lda, work, u,
1552  $ ldu, work(n+1), lwork-n, ierr )
1553 
1554 * The columns of U are normalized. The cost is O(M*N) flops.
1555  temp1 = sqrt(float(m)) * epsln
1556  DO 1973 p = 1, nr
1557  xsc = one / snrm2( m, u(1,p), 1 )
1558  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1559  $ CALL sscal( m, xsc, u(1,p), 1 )
1560  1973 CONTINUE
1561 *
1562 * If the initial QRF is computed with row pivoting, the left
1563 * singular vectors must be adjusted.
1564 *
1565  IF ( rowpiv )
1566  $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1567 *
1568  ELSE
1569 *
1570 * .. the initial matrix A has almost orthogonal columns and
1571 * the second QRF is not needed
1572 *
1573  CALL slacpy( 'Upper', n, n, a, lda, work(n+1), n )
1574  IF ( l2pert ) THEN
1575  xsc = sqrt(small)
1576  DO 5970 p = 2, n
1577  temp1 = xsc * work( n + (p-1)*n + p )
1578  DO 5971 q = 1, p - 1
1579  work(n+(q-1)*n+p)=-sign(temp1,work(n+(p-1)*n+q))
1580  5971 CONTINUE
1581  5970 CONTINUE
1582  ELSE
1583  CALL slaset( 'Lower',n-1,n-1,zero,zero,work(n+2),n )
1584  END IF
1585 *
1586  CALL sgesvj( 'Upper', 'U', 'N', n, n, work(n+1), n, sva,
1587  $ n, u, ldu, work(n+n*n+1), lwork-n-n*n, info )
1588 *
1589  scalem = work(n+n*n+1)
1590  numrank = nint(work(n+n*n+2))
1591  DO 6970 p = 1, n
1592  CALL scopy( n, work(n+(p-1)*n+1), 1, u(1,p), 1 )
1593  CALL sscal( n, sva(p), work(n+(p-1)*n+1), 1 )
1594  6970 CONTINUE
1595 *
1596  CALL strsm( 'Left', 'Upper', 'NoTrans', 'No UD', n, n,
1597  $ one, a, lda, work(n+1), n )
1598  DO 6972 p = 1, n
1599  CALL scopy( n, work(n+p), n, v(iwork(p),1), ldv )
1600  6972 CONTINUE
1601  temp1 = sqrt(float(n))*epsln
1602  DO 6971 p = 1, n
1603  xsc = one / snrm2( n, v(1,p), 1 )
1604  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1605  $ CALL sscal( n, xsc, v(1,p), 1 )
1606  6971 CONTINUE
1607 *
1608 * Assemble the left singular vector matrix U (M x N).
1609 *
1610  IF ( n .LT. m ) THEN
1611  CALL slaset( 'A', m-n, n, zero, zero, u(n+1,1), ldu )
1612  IF ( n .LT. n1 ) THEN
1613  CALL slaset( 'A',n, n1-n, zero, zero, u(1,n+1),ldu )
1614  CALL slaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu )
1615  END IF
1616  END IF
1617  CALL sormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1618  $ ldu, work(n+1), lwork-n, ierr )
1619  temp1 = sqrt(float(m))*epsln
1620  DO 6973 p = 1, n1
1621  xsc = one / snrm2( m, u(1,p), 1 )
1622  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1623  $ CALL sscal( m, xsc, u(1,p), 1 )
1624  6973 CONTINUE
1625 *
1626  IF ( rowpiv )
1627  $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1628 *
1629  END IF
1630 *
1631 * end of the >> almost orthogonal case << in the full SVD
1632 *
1633  ELSE
1634 *
1635 * This branch deploys a preconditioned Jacobi SVD with explicitly
1636 * accumulated rotations. It is included as optional, mainly for
1637 * experimental purposes. It does perfom well, and can also be used.
1638 * In this implementation, this branch will be automatically activated
1639 * if the condition number sigma_max(A) / sigma_min(A) is predicted
1640 * to be greater than the overflow threshold. This is because the
1641 * a posteriori computation of the singular vectors assumes robust
1642 * implementation of BLAS and some LAPACK procedures, capable of working
1643 * in presence of extreme values. Since that is not always the case, ...
1644 *
1645  DO 7968 p = 1, nr
1646  CALL scopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1647  7968 CONTINUE
1648 *
1649  IF ( l2pert ) THEN
1650  xsc = sqrt(small/epsln)
1651  DO 5969 q = 1, nr
1652  temp1 = xsc*abs( v(q,q) )
1653  DO 5968 p = 1, n
1654  IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1655  $ .OR. ( p .LT. q ) )
1656  $ v(p,q) = sign( temp1, v(p,q) )
1657  IF ( p .LT. q ) v(p,q) = - v(p,q)
1658  5968 CONTINUE
1659  5969 CONTINUE
1660  ELSE
1661  CALL slaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv )
1662  END IF
1663 
1664  CALL sgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),
1665  $ lwork-2*n, ierr )
1666  CALL slacpy( 'L', n, nr, v, ldv, work(2*n+1), n )
1667 *
1668  DO 7969 p = 1, nr
1669  CALL scopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
1670  7969 CONTINUE
1671 
1672  IF ( l2pert ) THEN
1673  xsc = sqrt(small/epsln)
1674  DO 9970 q = 2, nr
1675  DO 9971 p = 1, q - 1
1676  temp1 = xsc * min(abs(u(p,p)),abs(u(q,q)))
1677  u(p,q) = - sign( temp1, u(q,p) )
1678  9971 CONTINUE
1679  9970 CONTINUE
1680  ELSE
1681  CALL slaset('U', nr-1, nr-1, zero, zero, u(1,2), ldu )
1682  END IF
1683 
1684  CALL sgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
1685  $ n, v, ldv, work(2*n+n*nr+1), lwork-2*n-n*nr, info )
1686  scalem = work(2*n+n*nr+1)
1687  numrank = nint(work(2*n+n*nr+2))
1688 
1689  IF ( nr .LT. n ) THEN
1690  CALL slaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1691  CALL slaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1692  CALL slaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1693  END IF
1694 
1695  CALL sormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1696  $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1697 *
1698 * Permute the rows of V using the (column) permutation from the
1699 * first QRF. Also, scale the columns to make them unit in
1700 * Euclidean norm. This applies to all cases.
1701 *
1702  temp1 = sqrt(float(n)) * epsln
1703  DO 7972 q = 1, n
1704  DO 8972 p = 1, n
1705  work(2*n+n*nr+nr+iwork(p)) = v(p,q)
1706  8972 CONTINUE
1707  DO 8973 p = 1, n
1708  v(p,q) = work(2*n+n*nr+nr+p)
1709  8973 CONTINUE
1710  xsc = one / snrm2( n, v(1,q), 1 )
1711  IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1712  $ CALL sscal( n, xsc, v(1,q), 1 )
1713  7972 CONTINUE
1714 *
1715 * At this moment, V contains the right singular vectors of A.
1716 * Next, assemble the left singular vector matrix U (M x N).
1717 *
1718  IF ( nr .LT. m ) THEN
1719  CALL slaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu )
1720  IF ( nr .LT. n1 ) THEN
1721  CALL slaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1),ldu )
1722  CALL slaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu )
1723  END IF
1724  END IF
1725 *
1726  CALL sormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1727  $ ldu, work(n+1), lwork-n, ierr )
1728 *
1729  IF ( rowpiv )
1730  $ CALL slaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1731 *
1732 *
1733  END IF
1734  IF ( transp ) THEN
1735 * .. swap U and V because the procedure worked on A^t
1736  DO 6974 p = 1, n
1737  CALL sswap( n, u(1,p), 1, v(1,p), 1 )
1738  6974 CONTINUE
1739  END IF
1740 *
1741  END IF
1742 * end of the full SVD
1743 *
1744 * Undo scaling, if necessary (and possible)
1745 *
1746  IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
1747  CALL slascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr )
1748  uscal1 = one
1749  uscal2 = one
1750  END IF
1751 *
1752  IF ( nr .LT. n ) THEN
1753  DO 3004 p = nr+1, n
1754  sva(p) = zero
1755  3004 CONTINUE
1756  END IF
1757 *
1758  work(1) = uscal2 * scalem
1759  work(2) = uscal1
1760  IF ( errest ) work(3) = sconda
1761  IF ( lsvec .AND. rsvec ) THEN
1762  work(4) = condr1
1763  work(5) = condr2
1764  END IF
1765  IF ( l2tran ) THEN
1766  work(6) = entra
1767  work(7) = entrat
1768  END IF
1769 *
1770  iwork(1) = nr
1771  iwork(2) = numrank
1772  iwork(3) = warning
1773 *
1774  RETURN
1775 * ..
1776 * .. END OF SGEJSV
1777 * ..
1778  END
1779 *
subroutine sormlq(SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMLQ
Definition: sormlq.f:170
subroutine sormqr(SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMQR
Definition: sormqr.f:170
subroutine scopy(N, SX, INCX, SY, INCY)
SCOPY
Definition: scopy.f:53
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:137
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:138
subroutine strsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRSM
Definition: strsm.f:183
subroutine spocon(UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, INFO)
SPOCON
Definition: spocon.f:123
subroutine sgesvj(JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, LDV, WORK, LWORK, INFO)
SGESVJ
Definition: sgesvj.f:325
subroutine sswap(N, SX, INCX, SY, INCY)
SSWAP
Definition: sswap.f:53
subroutine sscal(N, SA, SX, INCX)
SSCAL
Definition: sscal.f:55
subroutine slassq(N, X, INCX, SCALE, SUMSQ)
SLASSQ updates a sum of squares represented in scaled form.
Definition: slassq.f:105
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:105
subroutine sgeqp3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO)
SGEQP3
Definition: sgeqp3.f:153
subroutine sgejsv(JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, M, N, A, LDA, SVA, U, LDU, V, LDV, WORK, LWORK, IWORK, INFO)
SGEJSV
Definition: sgejsv.f:479
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:141
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:130
subroutine slaswp(N, A, LDA, K1, K2, IPIV, INCX)
SLASWP performs a series of row interchanges on a general rectangular matrix.
Definition: slaswp.f:116
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: slaset.f:112