001:       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
002:      $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
003: *
004: *  -- LAPACK routine (version 3.2) --
005: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
006: *     November 2006
007: *
008: *     .. Scalar Arguments ..
009:       INTEGER            INFO, K, LDQ, N, N1
010:       DOUBLE PRECISION   RHO
011: *     ..
012: *     .. Array Arguments ..
013:       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
014:      $                   INDXQ( * )
015:       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
016:      $                   W( * ), Z( * )
017: *     ..
018: *
019: *  Purpose
020: *  =======
021: *
022: *  DLAED2 merges the two sets of eigenvalues together into a single
023: *  sorted set.  Then it tries to deflate the size of the problem.
024: *  There are two ways in which deflation can occur:  when two or more
025: *  eigenvalues are close together or if there is a tiny entry in the
026: *  Z vector.  For each such occurrence the order of the related secular
027: *  equation problem is reduced by one.
028: *
029: *  Arguments
030: *  =========
031: *
032: *  K      (output) INTEGER
033: *         The number of non-deflated eigenvalues, and the order of the
034: *         related secular equation. 0 <= K <=N.
035: *
036: *  N      (input) INTEGER
037: *         The dimension of the symmetric tridiagonal matrix.  N >= 0.
038: *
039: *  N1     (input) INTEGER
040: *         The location of the last eigenvalue in the leading sub-matrix.
041: *         min(1,N) <= N1 <= N/2.
042: *
043: *  D      (input/output) DOUBLE PRECISION array, dimension (N)
044: *         On entry, D contains the eigenvalues of the two submatrices to
045: *         be combined.
046: *         On exit, D contains the trailing (N-K) updated eigenvalues
047: *         (those which were deflated) sorted into increasing order.
048: *
049: *  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
050: *         On entry, Q contains the eigenvectors of two submatrices in
051: *         the two square blocks with corners at (1,1), (N1,N1)
052: *         and (N1+1, N1+1), (N,N).
053: *         On exit, Q contains the trailing (N-K) updated eigenvectors
054: *         (those which were deflated) in its last N-K columns.
055: *
056: *  LDQ    (input) INTEGER
057: *         The leading dimension of the array Q.  LDQ >= max(1,N).
058: *
059: *  INDXQ  (input/output) INTEGER array, dimension (N)
060: *         The permutation which separately sorts the two sub-problems
061: *         in D into ascending order.  Note that elements in the second
062: *         half of this permutation must first have N1 added to their
063: *         values. Destroyed on exit.
064: *
065: *  RHO    (input/output) DOUBLE PRECISION
066: *         On entry, the off-diagonal element associated with the rank-1
067: *         cut which originally split the two submatrices which are now
068: *         being recombined.
069: *         On exit, RHO has been modified to the value required by
070: *         DLAED3.
071: *
072: *  Z      (input) DOUBLE PRECISION array, dimension (N)
073: *         On entry, Z contains the updating vector (the last
074: *         row of the first sub-eigenvector matrix and the first row of
075: *         the second sub-eigenvector matrix).
076: *         On exit, the contents of Z have been destroyed by the updating
077: *         process.
078: *
079: *  DLAMDA (output) DOUBLE PRECISION array, dimension (N)
080: *         A copy of the first K eigenvalues which will be used by
081: *         DLAED3 to form the secular equation.
082: *
083: *  W      (output) DOUBLE PRECISION array, dimension (N)
084: *         The first k values of the final deflation-altered z-vector
085: *         which will be passed to DLAED3.
086: *
087: *  Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
088: *         A copy of the first K eigenvectors which will be used by
089: *         DLAED3 in a matrix multiply (DGEMM) to solve for the new
090: *         eigenvectors.
091: *
092: *  INDX   (workspace) INTEGER array, dimension (N)
093: *         The permutation used to sort the contents of DLAMDA into
094: *         ascending order.
095: *
096: *  INDXC  (output) INTEGER array, dimension (N)
097: *         The permutation used to arrange the columns of the deflated
098: *         Q matrix into three groups:  the first group contains non-zero
099: *         elements only at and above N1, the second contains
100: *         non-zero elements only below N1, and the third is dense.
101: *
102: *  INDXP  (workspace) INTEGER array, dimension (N)
103: *         The permutation used to place deflated values of D at the end
104: *         of the array.  INDXP(1:K) points to the nondeflated D-values
105: *         and INDXP(K+1:N) points to the deflated eigenvalues.
106: *
107: *  COLTYP (workspace/output) INTEGER array, dimension (N)
108: *         During execution, a label which will indicate which of the
109: *         following types a column in the Q2 matrix is:
110: *         1 : non-zero in the upper half only;
111: *         2 : dense;
112: *         3 : non-zero in the lower half only;
113: *         4 : deflated.
114: *         On exit, COLTYP(i) is the number of columns of type i,
115: *         for i=1 to 4 only.
116: *
117: *  INFO   (output) INTEGER
118: *          = 0:  successful exit.
119: *          < 0:  if INFO = -i, the i-th argument had an illegal value.
120: *
121: *  Further Details
122: *  ===============
123: *
124: *  Based on contributions by
125: *     Jeff Rutter, Computer Science Division, University of California
126: *     at Berkeley, USA
127: *  Modified by Francoise Tisseur, University of Tennessee.
128: *
129: *  =====================================================================
130: *
131: *     .. Parameters ..
132:       DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
133:       PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
134:      $                   TWO = 2.0D0, EIGHT = 8.0D0 )
135: *     ..
136: *     .. Local Arrays ..
137:       INTEGER            CTOT( 4 ), PSM( 4 )
138: *     ..
139: *     .. Local Scalars ..
140:       INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
141:      $                   N2, NJ, PJ
142:       DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
143: *     ..
144: *     .. External Functions ..
145:       INTEGER            IDAMAX
146:       DOUBLE PRECISION   DLAMCH, DLAPY2
147:       EXTERNAL           IDAMAX, DLAMCH, DLAPY2
148: *     ..
149: *     .. External Subroutines ..
150:       EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
151: *     ..
152: *     .. Intrinsic Functions ..
153:       INTRINSIC          ABS, MAX, MIN, SQRT
154: *     ..
155: *     .. Executable Statements ..
156: *
157: *     Test the input parameters.
158: *
159:       INFO = 0
160: *
161:       IF( N.LT.0 ) THEN
162:          INFO = -2
163:       ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
164:          INFO = -6
165:       ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
166:          INFO = -3
167:       END IF
168:       IF( INFO.NE.0 ) THEN
169:          CALL XERBLA( 'DLAED2', -INFO )
170:          RETURN
171:       END IF
172: *
173: *     Quick return if possible
174: *
175:       IF( N.EQ.0 )
176:      $   RETURN
177: *
178:       N2 = N - N1
179:       N1P1 = N1 + 1
180: *
181:       IF( RHO.LT.ZERO ) THEN
182:          CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
183:       END IF
184: *
185: *     Normalize z so that norm(z) = 1.  Since z is the concatenation of
186: *     two normalized vectors, norm2(z) = sqrt(2).
187: *
188:       T = ONE / SQRT( TWO )
189:       CALL DSCAL( N, T, Z, 1 )
190: *
191: *     RHO = ABS( norm(z)**2 * RHO )
192: *
193:       RHO = ABS( TWO*RHO )
194: *
195: *     Sort the eigenvalues into increasing order
196: *
197:       DO 10 I = N1P1, N
198:          INDXQ( I ) = INDXQ( I ) + N1
199:    10 CONTINUE
200: *
201: *     re-integrate the deflated parts from the last pass
202: *
203:       DO 20 I = 1, N
204:          DLAMDA( I ) = D( INDXQ( I ) )
205:    20 CONTINUE
206:       CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
207:       DO 30 I = 1, N
208:          INDX( I ) = INDXQ( INDXC( I ) )
209:    30 CONTINUE
210: *
211: *     Calculate the allowable deflation tolerance
212: *
213:       IMAX = IDAMAX( N, Z, 1 )
214:       JMAX = IDAMAX( N, D, 1 )
215:       EPS = DLAMCH( 'Epsilon' )
216:       TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
217: *
218: *     If the rank-1 modifier is small enough, no more needs to be done
219: *     except to reorganize Q so that its columns correspond with the
220: *     elements in D.
221: *
222:       IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
223:          K = 0
224:          IQ2 = 1
225:          DO 40 J = 1, N
226:             I = INDX( J )
227:             CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
228:             DLAMDA( J ) = D( I )
229:             IQ2 = IQ2 + N
230:    40    CONTINUE
231:          CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
232:          CALL DCOPY( N, DLAMDA, 1, D, 1 )
233:          GO TO 190
234:       END IF
235: *
236: *     If there are multiple eigenvalues then the problem deflates.  Here
237: *     the number of equal eigenvalues are found.  As each equal
238: *     eigenvalue is found, an elementary reflector is computed to rotate
239: *     the corresponding eigensubspace so that the corresponding
240: *     components of Z are zero in this new basis.
241: *
242:       DO 50 I = 1, N1
243:          COLTYP( I ) = 1
244:    50 CONTINUE
245:       DO 60 I = N1P1, N
246:          COLTYP( I ) = 3
247:    60 CONTINUE
248: *
249: *
250:       K = 0
251:       K2 = N + 1
252:       DO 70 J = 1, N
253:          NJ = INDX( J )
254:          IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
255: *
256: *           Deflate due to small z component.
257: *
258:             K2 = K2 - 1
259:             COLTYP( NJ ) = 4
260:             INDXP( K2 ) = NJ
261:             IF( J.EQ.N )
262:      $         GO TO 100
263:          ELSE
264:             PJ = NJ
265:             GO TO 80
266:          END IF
267:    70 CONTINUE
268:    80 CONTINUE
269:       J = J + 1
270:       NJ = INDX( J )
271:       IF( J.GT.N )
272:      $   GO TO 100
273:       IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
274: *
275: *        Deflate due to small z component.
276: *
277:          K2 = K2 - 1
278:          COLTYP( NJ ) = 4
279:          INDXP( K2 ) = NJ
280:       ELSE
281: *
282: *        Check if eigenvalues are close enough to allow deflation.
283: *
284:          S = Z( PJ )
285:          C = Z( NJ )
286: *
287: *        Find sqrt(a**2+b**2) without overflow or
288: *        destructive underflow.
289: *
290:          TAU = DLAPY2( C, S )
291:          T = D( NJ ) - D( PJ )
292:          C = C / TAU
293:          S = -S / TAU
294:          IF( ABS( T*C*S ).LE.TOL ) THEN
295: *
296: *           Deflation is possible.
297: *
298:             Z( NJ ) = TAU
299:             Z( PJ ) = ZERO
300:             IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
301:      $         COLTYP( NJ ) = 2
302:             COLTYP( PJ ) = 4
303:             CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
304:             T = D( PJ )*C**2 + D( NJ )*S**2
305:             D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
306:             D( PJ ) = T
307:             K2 = K2 - 1
308:             I = 1
309:    90       CONTINUE
310:             IF( K2+I.LE.N ) THEN
311:                IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
312:                   INDXP( K2+I-1 ) = INDXP( K2+I )
313:                   INDXP( K2+I ) = PJ
314:                   I = I + 1
315:                   GO TO 90
316:                ELSE
317:                   INDXP( K2+I-1 ) = PJ
318:                END IF
319:             ELSE
320:                INDXP( K2+I-1 ) = PJ
321:             END IF
322:             PJ = NJ
323:          ELSE
324:             K = K + 1
325:             DLAMDA( K ) = D( PJ )
326:             W( K ) = Z( PJ )
327:             INDXP( K ) = PJ
328:             PJ = NJ
329:          END IF
330:       END IF
331:       GO TO 80
332:   100 CONTINUE
333: *
334: *     Record the last eigenvalue.
335: *
336:       K = K + 1
337:       DLAMDA( K ) = D( PJ )
338:       W( K ) = Z( PJ )
339:       INDXP( K ) = PJ
340: *
341: *     Count up the total number of the various types of columns, then
342: *     form a permutation which positions the four column types into
343: *     four uniform groups (although one or more of these groups may be
344: *     empty).
345: *
346:       DO 110 J = 1, 4
347:          CTOT( J ) = 0
348:   110 CONTINUE
349:       DO 120 J = 1, N
350:          CT = COLTYP( J )
351:          CTOT( CT ) = CTOT( CT ) + 1
352:   120 CONTINUE
353: *
354: *     PSM(*) = Position in SubMatrix (of types 1 through 4)
355: *
356:       PSM( 1 ) = 1
357:       PSM( 2 ) = 1 + CTOT( 1 )
358:       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
359:       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
360:       K = N - CTOT( 4 )
361: *
362: *     Fill out the INDXC array so that the permutation which it induces
363: *     will place all type-1 columns first, all type-2 columns next,
364: *     then all type-3's, and finally all type-4's.
365: *
366:       DO 130 J = 1, N
367:          JS = INDXP( J )
368:          CT = COLTYP( JS )
369:          INDX( PSM( CT ) ) = JS
370:          INDXC( PSM( CT ) ) = J
371:          PSM( CT ) = PSM( CT ) + 1
372:   130 CONTINUE
373: *
374: *     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
375: *     and Q2 respectively.  The eigenvalues/vectors which were not
376: *     deflated go into the first K slots of DLAMDA and Q2 respectively,
377: *     while those which were deflated go into the last N - K slots.
378: *
379:       I = 1
380:       IQ1 = 1
381:       IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
382:       DO 140 J = 1, CTOT( 1 )
383:          JS = INDX( I )
384:          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
385:          Z( I ) = D( JS )
386:          I = I + 1
387:          IQ1 = IQ1 + N1
388:   140 CONTINUE
389: *
390:       DO 150 J = 1, CTOT( 2 )
391:          JS = INDX( I )
392:          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
393:          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
394:          Z( I ) = D( JS )
395:          I = I + 1
396:          IQ1 = IQ1 + N1
397:          IQ2 = IQ2 + N2
398:   150 CONTINUE
399: *
400:       DO 160 J = 1, CTOT( 3 )
401:          JS = INDX( I )
402:          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
403:          Z( I ) = D( JS )
404:          I = I + 1
405:          IQ2 = IQ2 + N2
406:   160 CONTINUE
407: *
408:       IQ1 = IQ2
409:       DO 170 J = 1, CTOT( 4 )
410:          JS = INDX( I )
411:          CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
412:          IQ2 = IQ2 + N
413:          Z( I ) = D( JS )
414:          I = I + 1
415:   170 CONTINUE
416: *
417: *     The deflated eigenvalues and their corresponding vectors go back
418: *     into the last N - K slots of D and Q respectively.
419: *
420:       CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, Q( 1, K+1 ), LDQ )
421:       CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
422: *
423: *     Copy CTOT into COLTYP for referencing in DLAED3.
424: *
425:       DO 180 J = 1, 4
426:          COLTYP( J ) = CTOT( J )
427:   180 CONTINUE
428: *
429:   190 CONTINUE
430:       RETURN
431: *
432: *     End of DLAED2
433: *
434:       END
435: