001:       SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
002:      $                   WORK, LWORK, INFO )
003: *
004: *  -- LAPACK driver routine (version 3.2) --
005: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
006: *     November 2006
007: *
008: *     .. Scalar Arguments ..
009:       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
010:       REAL               RCOND
011: *     ..
012: *     .. Array Arguments ..
013:       INTEGER            JPVT( * )
014:       REAL               A( LDA, * ), B( LDB, * ), WORK( * )
015: *     ..
016: *
017: *  Purpose
018: *  =======
019: *
020: *  SGELSY computes the minimum-norm solution to a real linear least
021: *  squares problem:
022: *      minimize || A * X - B ||
023: *  using a complete orthogonal factorization of A.  A is an M-by-N
024: *  matrix which may be rank-deficient.
025: *
026: *  Several right hand side vectors b and solution vectors x can be
027: *  handled in a single call; they are stored as the columns of the
028: *  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
029: *  matrix X.
030: *
031: *  The routine first computes a QR factorization with column pivoting:
032: *      A * P = Q * [ R11 R12 ]
033: *                  [  0  R22 ]
034: *  with R11 defined as the largest leading submatrix whose estimated
035: *  condition number is less than 1/RCOND.  The order of R11, RANK,
036: *  is the effective rank of A.
037: *
038: *  Then, R22 is considered to be negligible, and R12 is annihilated
039: *  by orthogonal transformations from the right, arriving at the
040: *  complete orthogonal factorization:
041: *     A * P = Q * [ T11 0 ] * Z
042: *                 [  0  0 ]
043: *  The minimum-norm solution is then
044: *     X = P * Z' [ inv(T11)*Q1'*B ]
045: *                [        0       ]
046: *  where Q1 consists of the first RANK columns of Q.
047: *
048: *  This routine is basically identical to the original xGELSX except
049: *  three differences:
050: *    o The call to the subroutine xGEQPF has been substituted by the
051: *      the call to the subroutine xGEQP3. This subroutine is a Blas-3
052: *      version of the QR factorization with column pivoting.
053: *    o Matrix B (the right hand side) is updated with Blas-3.
054: *    o The permutation of matrix B (the right hand side) is faster and
055: *      more simple.
056: *
057: *  Arguments
058: *  =========
059: *
060: *  M       (input) INTEGER
061: *          The number of rows of the matrix A.  M >= 0.
062: *
063: *  N       (input) INTEGER
064: *          The number of columns of the matrix A.  N >= 0.
065: *
066: *  NRHS    (input) INTEGER
067: *          The number of right hand sides, i.e., the number of
068: *          columns of matrices B and X. NRHS >= 0.
069: *
070: *  A       (input/output) REAL array, dimension (LDA,N)
071: *          On entry, the M-by-N matrix A.
072: *          On exit, A has been overwritten by details of its
073: *          complete orthogonal factorization.
074: *
075: *  LDA     (input) INTEGER
076: *          The leading dimension of the array A.  LDA >= max(1,M).
077: *
078: *  B       (input/output) REAL array, dimension (LDB,NRHS)
079: *          On entry, the M-by-NRHS right hand side matrix B.
080: *          On exit, the N-by-NRHS solution matrix X.
081: *
082: *  LDB     (input) INTEGER
083: *          The leading dimension of the array B. LDB >= max(1,M,N).
084: *
085: *  JPVT    (input/output) INTEGER array, dimension (N)
086: *          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
087: *          to the front of AP, otherwise column i is a free column.
088: *          On exit, if JPVT(i) = k, then the i-th column of AP
089: *          was the k-th column of A.
090: *
091: *  RCOND   (input) REAL
092: *          RCOND is used to determine the effective rank of A, which
093: *          is defined as the order of the largest leading triangular
094: *          submatrix R11 in the QR factorization with pivoting of A,
095: *          whose estimated condition number < 1/RCOND.
096: *
097: *  RANK    (output) INTEGER
098: *          The effective rank of A, i.e., the order of the submatrix
099: *          R11.  This is the same as the order of the submatrix T11
100: *          in the complete orthogonal factorization of A.
101: *
102: *  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
103: *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
104: *
105: *  LWORK   (input) INTEGER
106: *          The dimension of the array WORK.
107: *          The unblocked strategy requires that:
108: *             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ),
109: *          where MN = min( M, N ).
110: *          The block algorithm requires that:
111: *             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ),
112: *          where NB is an upper bound on the blocksize returned
113: *          by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR,
114: *          and SORMRZ.
115: *
116: *          If LWORK = -1, then a workspace query is assumed; the routine
117: *          only calculates the optimal size of the WORK array, returns
118: *          this value as the first entry of the WORK array, and no error
119: *          message related to LWORK is issued by XERBLA.
120: *
121: *  INFO    (output) INTEGER
122: *          = 0: successful exit
123: *          < 0: If INFO = -i, the i-th argument had an illegal value.
124: *
125: *  Further Details
126: *  ===============
127: *
128: *  Based on contributions by
129: *    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
130: *    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
131: *    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
132: *
133: *  =====================================================================
134: *
135: *     .. Parameters ..
136:       INTEGER            IMAX, IMIN
137:       PARAMETER          ( IMAX = 1, IMIN = 2 )
138:       REAL               ZERO, ONE
139:       PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
140: *     ..
141: *     .. Local Scalars ..
142:       LOGICAL            LQUERY
143:       INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKMIN,
144:      $                   LWKOPT, MN, NB, NB1, NB2, NB3, NB4
145:       REAL               ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX,
146:      $                   SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE
147: *     ..
148: *     .. External Functions ..
149:       INTEGER            ILAENV
150:       REAL               SLAMCH, SLANGE
151:       EXTERNAL           ILAENV, SLAMCH, SLANGE
152: *     ..
153: *     .. External Subroutines ..
154:       EXTERNAL           SCOPY, SGEQP3, SLABAD, SLAIC1, SLASCL, SLASET,
155:      $                   SORMQR, SORMRZ, STRSM, STZRZF, XERBLA
156: *     ..
157: *     .. Intrinsic Functions ..
158:       INTRINSIC          ABS, MAX, MIN
159: *     ..
160: *     .. Executable Statements ..
161: *
162:       MN = MIN( M, N )
163:       ISMIN = MN + 1
164:       ISMAX = 2*MN + 1
165: *
166: *     Test the input arguments.
167: *
168:       INFO = 0
169:       LQUERY = ( LWORK.EQ.-1 )
170:       IF( M.LT.0 ) THEN
171:          INFO = -1
172:       ELSE IF( N.LT.0 ) THEN
173:          INFO = -2
174:       ELSE IF( NRHS.LT.0 ) THEN
175:          INFO = -3
176:       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
177:          INFO = -5
178:       ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
179:          INFO = -7
180:       END IF
181: *
182: *     Figure out optimal block size
183: *
184:       IF( INFO.EQ.0 ) THEN
185:          IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN
186:             LWKMIN = 1
187:             LWKOPT = 1
188:          ELSE
189:             NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
190:             NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 )
191:             NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, NRHS, -1 )
192:             NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, NRHS, -1 )
193:             NB = MAX( NB1, NB2, NB3, NB4 )
194:             LWKMIN = MN + MAX( 2*MN, N + 1, MN + NRHS )
195:             LWKOPT = MAX( LWKMIN,
196:      $                    MN + 2*N + NB*( N + 1 ), 2*MN + NB*NRHS )
197:          END IF
198:          WORK( 1 ) = LWKOPT
199: *
200:          IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
201:             INFO = -12
202:          END IF
203:       END IF
204: *
205:       IF( INFO.NE.0 ) THEN
206:          CALL XERBLA( 'SGELSY', -INFO )
207:          RETURN
208:       ELSE IF( LQUERY ) THEN
209:          RETURN
210:       END IF
211: *
212: *     Quick return if possible
213: *
214:       IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN
215:          RANK = 0
216:          RETURN
217:       END IF
218: *
219: *     Get machine parameters
220: *
221:       SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' )
222:       BIGNUM = ONE / SMLNUM
223:       CALL SLABAD( SMLNUM, BIGNUM )
224: *
225: *     Scale A, B if max entries outside range [SMLNUM,BIGNUM]
226: *
227:       ANRM = SLANGE( 'M', M, N, A, LDA, WORK )
228:       IASCL = 0
229:       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
230: *
231: *        Scale matrix norm up to SMLNUM
232: *
233:          CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
234:          IASCL = 1
235:       ELSE IF( ANRM.GT.BIGNUM ) THEN
236: *
237: *        Scale matrix norm down to BIGNUM
238: *
239:          CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
240:          IASCL = 2
241:       ELSE IF( ANRM.EQ.ZERO ) THEN
242: *
243: *        Matrix all zero. Return zero solution.
244: *
245:          CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
246:          RANK = 0
247:          GO TO 70
248:       END IF
249: *
250:       BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK )
251:       IBSCL = 0
252:       IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
253: *
254: *        Scale matrix norm up to SMLNUM
255: *
256:          CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
257:          IBSCL = 1
258:       ELSE IF( BNRM.GT.BIGNUM ) THEN
259: *
260: *        Scale matrix norm down to BIGNUM
261: *
262:          CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
263:          IBSCL = 2
264:       END IF
265: *
266: *     Compute QR factorization with column pivoting of A:
267: *        A * P = Q * R
268: *
269:       CALL SGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ),
270:      $             LWORK-MN, INFO )
271:       WSIZE = MN + WORK( MN+1 )
272: *
273: *     workspace: MN+2*N+NB*(N+1).
274: *     Details of Householder rotations stored in WORK(1:MN).
275: *
276: *     Determine RANK using incremental condition estimation
277: *
278:       WORK( ISMIN ) = ONE
279:       WORK( ISMAX ) = ONE
280:       SMAX = ABS( A( 1, 1 ) )
281:       SMIN = SMAX
282:       IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
283:          RANK = 0
284:          CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
285:          GO TO 70
286:       ELSE
287:          RANK = 1
288:       END IF
289: *
290:    10 CONTINUE
291:       IF( RANK.LT.MN ) THEN
292:          I = RANK + 1
293:          CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
294:      $                A( I, I ), SMINPR, S1, C1 )
295:          CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
296:      $                A( I, I ), SMAXPR, S2, C2 )
297: *
298:          IF( SMAXPR*RCOND.LE.SMINPR ) THEN
299:             DO 20 I = 1, RANK
300:                WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
301:                WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
302:    20       CONTINUE
303:             WORK( ISMIN+RANK ) = C1
304:             WORK( ISMAX+RANK ) = C2
305:             SMIN = SMINPR
306:             SMAX = SMAXPR
307:             RANK = RANK + 1
308:             GO TO 10
309:          END IF
310:       END IF
311: *
312: *     workspace: 3*MN.
313: *
314: *     Logically partition R = [ R11 R12 ]
315: *                             [  0  R22 ]
316: *     where R11 = R(1:RANK,1:RANK)
317: *
318: *     [R11,R12] = [ T11, 0 ] * Y
319: *
320:       IF( RANK.LT.N )
321:      $   CALL STZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ),
322:      $                LWORK-2*MN, INFO )
323: *
324: *     workspace: 2*MN.
325: *     Details of Householder rotations stored in WORK(MN+1:2*MN)
326: *
327: *     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
328: *
329:       CALL SORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ),
330:      $             B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO )
331:       WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) )
332: *
333: *     workspace: 2*MN+NB*NRHS.
334: *
335: *     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
336: *
337:       CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
338:      $            NRHS, ONE, A, LDA, B, LDB )
339: *
340:       DO 40 J = 1, NRHS
341:          DO 30 I = RANK + 1, N
342:             B( I, J ) = ZERO
343:    30    CONTINUE
344:    40 CONTINUE
345: *
346: *     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
347: *
348:       IF( RANK.LT.N ) THEN
349:          CALL SORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A,
350:      $                LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ),
351:      $                LWORK-2*MN, INFO )
352:       END IF
353: *
354: *     workspace: 2*MN+NRHS.
355: *
356: *     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
357: *
358:       DO 60 J = 1, NRHS
359:          DO 50 I = 1, N
360:             WORK( JPVT( I ) ) = B( I, J )
361:    50    CONTINUE
362:          CALL SCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 )
363:    60 CONTINUE
364: *
365: *     workspace: N.
366: *
367: *     Undo scaling
368: *
369:       IF( IASCL.EQ.1 ) THEN
370:          CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
371:          CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
372:      $                INFO )
373:       ELSE IF( IASCL.EQ.2 ) THEN
374:          CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
375:          CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
376:      $                INFO )
377:       END IF
378:       IF( IBSCL.EQ.1 ) THEN
379:          CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
380:       ELSE IF( IBSCL.EQ.2 ) THEN
381:          CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
382:       END IF
383: *
384:    70 CONTINUE
385:       WORK( 1 ) = LWKOPT
386: *
387:       RETURN
388: *
389: *     End of SGELSY
390: *
391:       END
392: