ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlaed2.f
Go to the documentation of this file.
1  SUBROUTINE pdlaed2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ,
2  $ RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM,
3  $ NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN,
4  $ NN1, NN2, IB1, IB2 )
5 *
6 * -- ScaLAPACK auxiliary routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * December 31, 1998
10 *
11 * .. Scalar Arguments ..
12  INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N,
13  $ N1, NB, NN, NN1, NN2, NPCOL
14  DOUBLE PRECISION RHO
15 * ..
16 * .. Array Arguments ..
17  INTEGER COLTYP( * ), CTOT( 0: NPCOL-1, 4 ),
18  $ INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ),
19  $ PSM( 0: NPCOL-1, 4 )
20  DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ),
21  $ Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * PDLAED2 sorts the two sets of eigenvalues together into a single
28 * sorted set. Then it tries to deflate the size of the problem.
29 * There are two ways in which deflation can occur: when two or more
30 * eigenvalues are close together or if there is a tiny entry in the
31 * Z vector. For each such occurrence the order of the related secular
32 * equation problem is reduced by one.
33 *
34 * Arguments
35 * =========
36 *
37 * ICTXT (global input) INTEGER
38 * The BLACS context handle, indicating the global context of
39 * the operation on the matrix. The context itself is global.
40 *
41 * K (output) INTEGER
42 * The number of non-deflated eigenvalues, and the order of the
43 * related secular equation. 0 <= K <=N.
44 *
45 * N (input) INTEGER
46 * The dimension of the symmetric tridiagonal matrix. N >= 0.
47 *
48 * N1 (input) INTEGER
49 * The location of the last eigenvalue in the leading sub-matrix.
50 * min(1,N) < N1 < N.
51 *
52 * NB (global input) INTEGER
53 * The blocking factor used to distribute the columns of the
54 * matrix. NB >= 1.
55 *
56 * D (input/output) DOUBLE PRECISION array, dimension (N)
57 * On entry, D contains the eigenvalues of the two submatrices to
58 * be combined.
59 * On exit, D contains the trailing (N-K) updated eigenvalues
60 * (those which were deflated) sorted into increasing order.
61 *
62 * DROW (global input) INTEGER
63 * The process row over which the first row of the matrix D is
64 * distributed. 0 <= DROW < NPROW.
65 *
66 * DCOL (global input) INTEGER
67 * The process column over which the first column of the
68 * matrix D is distributed. 0 <= DCOL < NPCOL.
69 *
70 * Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
71 * On entry, Q contains the eigenvectors of two submatrices in
72 * the two square blocks with corners at (1,1), (N1,N1)
73 * and (N1+1, N1+1), (N,N).
74 * On exit, Q contains the trailing (N-K) updated eigenvectors
75 * (those which were deflated) in its last N-K columns.
76 *
77 * LDQ (input) INTEGER
78 * The leading dimension of the array Q. LDQ >= max(1,NQ).
79 *
80 * RHO (global input/output) DOUBLE PRECISION
81 * On entry, the off-diagonal element associated with the rank-1
82 * cut which originally split the two submatrices which are now
83 * being recombined.
84 * On exit, RHO has been modified to the value required by
85 * PDLAED3.
86 *
87 * Z (global input) DOUBLE PRECISION array, dimension (N)
88 * On entry, Z contains the updating vector (the last
89 * row of the first sub-eigenvector matrix and the first row of
90 * the second sub-eigenvector matrix).
91 * On exit, the contents of Z have been destroyed by the updating
92 * process.
93 *
94 * DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
95 * A copy of the first K eigenvalues which will be used by
96 * SLAED3 to form the secular equation.
97 *
98 * W (global output) DOUBLE PRECISION array, dimension (N)
99 * The first k values of the final deflation-altered z-vector
100 * which will be passed to SLAED3.
101 *
102 * Q2 (output) DOUBLE PRECISION array, dimension (LDQ2, NQ)
103 * A copy of the first K eigenvectors which will be used by
104 *
105 * LDQ2 (input) INTEGER
106 * The leading dimension of the array Q2.
107 *
108 * QBUF (workspace) DOUBLE PRECISION array, dimension 3*N
109 *
110 * CTOT (workspace) INTEGER array, dimension( NPCOL, 4)
111 *
112 * PSM (workspace) INTEGER array, dimension( NPCOL, 4)
113 *
114 * NPCOL (global input) INTEGER
115 * The total number of columns over which the distributed
116 * submatrix is distributed.
117 *
118 * INDX (workspace) INTEGER array, dimension (N)
119 * The permutation used to sort the contents of DLAMDA into
120 * ascending order.
121 *
122 * INDXC (output) INTEGER array, dimension (N)
123 * The permutation used to arrange the columns of the deflated
124 * Q matrix into three groups: the first group contains non-zero
125 * elements only at and above N1, the second contains
126 * non-zero elements only below N1, and the third is dense.
127 *
128 * INDXP (workspace) INTEGER array, dimension (N)
129 * The permutation used to place deflated values of D at the end
130 * of the array. INDXP(1:K) points to the nondeflated D-values
131 * and INDXP(K+1:N) points to the deflated eigenvalues.
132 *
133 * INDCOL (workspace) INTEGER array, dimension (N)
134 *
135 * COLTYP (workspace/output) INTEGER array, dimension (N)
136 * During execution, a label which will indicate which of the
137 * following types a column in the Q2 matrix is:
138 * 1 : non-zero in the upper half only;
139 * 2 : dense;
140 * 3 : non-zero in the lower half only;
141 * 4 : deflated.
142 *
143 * NN (global output) INTEGER, the order of matrix U, (PDLAED1).
144 * NN1 (global output) INTEGER, the order of matrix Q1, (PDLAED1).
145 * NN2 (global output) INTEGER, the order of matrix Q2, (PDLAED1).
146 * IB1 (global output) INTEGER, pointeur on Q1, (PDLAED1).
147 * IB2 (global output) INTEGER, pointeur on Q2, (PDLAED1).
148 *
149 * =====================================================================
150 *
151 * .. Parameters ..
152  DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT
153  PARAMETER ( MONE = -1.0d0, zero = 0.0d0, one = 1.0d0,
154  $ two = 2.0d0, eight = 8.0d0 )
155 * ..
156 * .. Local Scalars ..
157  INTEGER COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2,
158  $ JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ,
159  $ NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ
160  DOUBLE PRECISION C, EPS, S, T, TAU, TOL
161 * ..
162 * .. External Functions ..
163  INTEGER IDAMAX, INDXG2L, INDXL2G, NUMROC
164  DOUBLE PRECISION DLAPY2, PDLAMCH
165  EXTERNAL IDAMAX, INDXG2L, INDXL2G, NUMROC, PDLAMCH,
166  $ dlapy2
167 * ..
168 * .. External Subroutines ..
169  EXTERNAL blacs_gridinfo, blacs_pinfo, dcopy, dgerv2d,
170  $ dgesd2d, dlapst, drot, dscal, infog1l
171 * ..
172 * .. Intrinsic Functions ..
173  INTRINSIC abs, max, min, mod, sqrt
174 * ..
175 * .. External Functions ..
176 * ..
177 * .. Local Arrays ..
178  INTEGER PTT( 4 )
179 * ..
180 * .. Executable Statements ..
181 *
182 * Quick return if possible
183 *
184  IF( n.EQ.0 )
185  $ RETURN
186 *
187  CALL blacs_pinfo( iam, nprocs )
188  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
189  np = numroc( n, nb, myrow, drow, nprow )
190 *
191  n2 = n - n1
192  n1p1 = n1 + 1
193 *
194  IF( rho.LT.zero ) THEN
195  CALL dscal( n2, mone, z( n1p1 ), 1 )
196  END IF
197 *
198 * Normalize z so that norm(z) = 1. Since z is the concatenation of
199 * two normalized vectors, norm2(z) = sqrt(2).
200 *
201  t = one / sqrt( two )
202  CALL dscal( n, t, z, 1 )
203 *
204 * RHO = ABS( norm(z)**2 * RHO )
205 *
206  rho = abs( two*rho )
207 *
208 * Calculate the allowable deflation tolerance
209 *
210  imax = idamax( n, z, 1 )
211  jmax = idamax( n, d, 1 )
212  eps = pdlamch( ictxt, 'Epsilon' )
213  tol = eight*eps*max( abs( d( jmax ) ), abs( z( imax ) ) )
214 *
215 * If the rank-1 modifier is small enough, no more needs to be done
216 * except to reorganize Q so that its columns correspond with the
217 * elements in D.
218 *
219  IF( rho*abs( z( imax ) ).LE.tol ) THEN
220  k = 0
221  GO TO 220
222  END IF
223 *
224 * If there are multiple eigenvalues then the problem deflates. Here
225 * the number of equal eigenvalues are found. As each equal
226 * eigenvalue is found, an elementary reflector is computed to rotate
227 * the corresponding eigensubspace so that the corresponding
228 * components of Z are zero in this new basis.
229 *
230 *
231  CALL dlapst( 'I', n, d, indx, info )
232 *
233  DO 10 i = 1, n1
234  coltyp( i ) = 1
235  10 CONTINUE
236  DO 20 i = n1p1, n
237  coltyp( i ) = 3
238  20 CONTINUE
239  col = dcol
240  DO 40 i = 1, n, nb
241  DO 30 j = 0, nb - 1
242  IF( i+j.LE.n )
243  $ indcol( i+j ) = col
244  30 CONTINUE
245  col = mod( col+1, npcol )
246  40 CONTINUE
247 *
248  k = 0
249  k2 = n + 1
250  DO 50 j = 1, n
251  nj = indx( j )
252  IF( rho*abs( z( nj ) ).LE.tol ) THEN
253 *
254 * Deflate due to small z component.
255 *
256  k2 = k2 - 1
257  coltyp( nj ) = 4
258  indxp( k2 ) = nj
259  IF( j.EQ.n )
260  $ GO TO 80
261  ELSE
262  pj = nj
263  GO TO 60
264  END IF
265  50 CONTINUE
266  60 CONTINUE
267  j = j + 1
268  nj = indx( j )
269  IF( j.GT.n )
270  $ GO TO 80
271  IF( rho*abs( z( nj ) ).LE.tol ) THEN
272 *
273 * Deflate due to small z component.
274 *
275  k2 = k2 - 1
276  coltyp( nj ) = 4
277  indxp( k2 ) = nj
278  ELSE
279 *
280 * Check if eigenvalues are close enough to allow deflation.
281 *
282  s = z( pj )
283  c = z( nj )
284 *
285 * Find sqrt(a**2+b**2) without overflow or
286 * destructive underflow.
287 *
288  tau = dlapy2( c, s )
289  t = d( nj ) - d( pj )
290  c = c / tau
291  s = -s / tau
292  IF( abs( t*c*s ).LE.tol ) THEN
293 *
294 * Deflation is possible.
295 *
296  z( nj ) = tau
297  z( pj ) = zero
298  IF( coltyp( nj ).NE.coltyp( pj ) )
299  $ coltyp( nj ) = 2
300  coltyp( pj ) = 4
301  CALL infog1l( nj, nb, npcol, mycol, dcol, njj, njcol )
302  CALL infog1l( pj, nb, npcol, mycol, dcol, pjj, pjcol )
303  IF( indcol( pj ).EQ.indcol( nj ) .AND. mycol.EQ.njcol ) THEN
304  CALL drot( np, q( 1, pjj ), 1, q( 1, njj ), 1, c, s )
305  ELSE IF( mycol.EQ.pjcol ) THEN
306  CALL dgesd2d( ictxt, np, 1, q( 1, pjj ), np, myrow,
307  $ njcol )
308  CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, njcol )
309  CALL drot( np, q( 1, pjj ), 1, qbuf, 1, c, s )
310  ELSE IF( mycol.EQ.njcol ) THEN
311  CALL dgesd2d( ictxt, np, 1, q( 1, njj ), np, myrow,
312  $ pjcol )
313  CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, pjcol )
314  CALL drot( np, qbuf, 1, q( 1, njj ), 1, c, s )
315  END IF
316  t = d( pj )*c**2 + d( nj )*s**2
317  d( nj ) = d( pj )*s**2 + d( nj )*c**2
318  d( pj ) = t
319  k2 = k2 - 1
320  i = 1
321  70 CONTINUE
322  IF( k2+i.LE.n ) THEN
323  IF( d( pj ).LT.d( indxp( k2+i ) ) ) THEN
324  indxp( k2+i-1 ) = indxp( k2+i )
325  indxp( k2+i ) = pj
326  i = i + 1
327  GO TO 70
328  ELSE
329  indxp( k2+i-1 ) = pj
330  END IF
331  ELSE
332  indxp( k2+i-1 ) = pj
333  END IF
334  pj = nj
335  ELSE
336  k = k + 1
337  dlamda( k ) = d( pj )
338  w( k ) = z( pj )
339  indxp( k ) = pj
340  pj = nj
341  END IF
342  END IF
343  GO TO 60
344  80 CONTINUE
345 *
346 * Record the last eigenvalue.
347 *
348  k = k + 1
349  dlamda( k ) = d( pj )
350  w( k ) = z( pj )
351  indxp( k ) = pj
352 *
353 * Count up the total number of the various types of columns, then
354 * form a permutation which positions the four column types into
355 * four uniform groups (although one or more of these groups may be
356 * empty).
357 *
358  DO 100 j = 1, 4
359  DO 90 i = 0, npcol - 1
360  ctot( i, j ) = 0
361  90 CONTINUE
362  ptt( j ) = 0
363  100 CONTINUE
364  DO 110 j = 1, n
365  ct = coltyp( j )
366  col = indcol( j )
367  ctot( col, ct ) = ctot( col, ct ) + 1
368  110 CONTINUE
369 *
370 * PSM(*) = Position in SubMatrix (of types 1 through 4)
371 *
372  DO 120 col = 0, npcol - 1
373  psm( col, 1 ) = 1
374  psm( col, 2 ) = 1 + ctot( col, 1 )
375  psm( col, 3 ) = psm( col, 2 ) + ctot( col, 2 )
376  psm( col, 4 ) = psm( col, 3 ) + ctot( col, 3 )
377  120 CONTINUE
378  ptt( 1 ) = 1
379  DO 140 i = 2, 4
380  ct = 0
381  DO 130 j = 0, npcol - 1
382  ct = ct + ctot( j, i-1 )
383  130 CONTINUE
384  ptt( i ) = ptt( i-1 ) + ct
385  140 CONTINUE
386 *
387 * Fill out the INDXC array so that the permutation which it induces
388 * will place all type-1 columns first, all type-2 columns next,
389 * then all type-3's, and finally all type-4's.
390 *
391  DO 150 j = 1, n
392  js = indxp( j )
393  col = indcol( js )
394  ct = coltyp( js )
395  i = indxl2g( psm( col, ct ), nb, col, dcol, npcol )
396  indx( j ) = i
397  indxc( ptt( ct ) ) = i
398  psm( col, ct ) = psm( col, ct ) + 1
399  ptt( ct ) = ptt( ct ) + 1
400  150 CONTINUE
401 *
402 *
403  DO 160 j = 1, n
404  js = indxp( j )
405  jjs = indxg2l( js, nb, j, j, npcol )
406  col = indcol( js )
407  IF( col.EQ.mycol ) THEN
408  i = indx( j )
409  jjq2 = indxg2l( i, nb, j, j, npcol )
410  CALL dcopy( np, q( 1, jjs ), 1, q2( 1, jjq2 ), 1 )
411  END IF
412  160 CONTINUE
413 *
414 *
415 * The deflated eigenvalues and their corresponding vectors go back
416 * into the last N - K slots of D and Q respectively.
417 *
418  CALL dcopy( n, d, 1, z, 1 )
419  DO 170 j = k + 1, n
420  js = indxp( j )
421  i = indx( j )
422  d( i ) = z( js )
423  170 CONTINUE
424 *
425  ptt( 1 ) = 1
426  DO 190 i = 2, 4
427  ct = 0
428  DO 180 j = 0, npcol - 1
429  ct = ct + ctot( j, i-1 )
430  180 CONTINUE
431  ptt( i ) = ptt( i-1 ) + ct
432  190 CONTINUE
433 *
434 *
435  ib1 = indxc( 1 )
436  ie1 = ib1
437  ib2 = indxc( ptt( 2 ) )
438  ie2 = ib2
439  DO 200 i = 2, ptt( 3 ) - 1
440  ib1 = min( ib1, indxc( i ) )
441  ie1 = max( ie1, indxc( i ) )
442  200 CONTINUE
443  DO 210 i = ptt( 2 ), ptt( 4 ) - 1
444  ib2 = min( ib2, indxc( i ) )
445  ie2 = max( ie2, indxc( i ) )
446  210 CONTINUE
447  nn1 = ie1 - ib1 + 1
448  nn2 = ie2 - ib2 + 1
449  nn = max( ie1, ie2 ) - min( ib1, ib2 ) + 1
450  220 CONTINUE
451  RETURN
452 *
453 * End of PDLAED2
454 *
455  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
dlapst
subroutine dlapst(ID, N, D, INDX, INFO)
Definition: dlapst.f:2
pdlaed2
subroutine pdlaed2(ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM, NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN, NN1, NN2, IB1, IB2)
Definition: pdlaed2.f:5
min
#define min(A, B)
Definition: pcgemr.c:181