SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pdlaed2()

subroutine pdlaed2 ( integer  ictxt,
integer  k,
integer  n,
integer  n1,
integer  nb,
double precision, dimension( * )  d,
integer  drow,
integer  dcol,
double precision, dimension( ldq, * )  q,
integer  ldq,
double precision  rho,
double precision, dimension( * )  z,
double precision, dimension( * )  w,
double precision, dimension( * )  dlamda,
double precision, dimension( ldq2, * )  q2,
integer  ldq2,
double precision, dimension( * )  qbuf,
integer, dimension( 0: npcol-1, 4 )  ctot,
integer, dimension( 0: npcol-1, 4 )  psm,
integer  npcol,
integer, dimension( * )  indx,
integer, dimension( * )  indxc,
integer, dimension( * )  indxp,
integer, dimension( n )  indcol,
integer, dimension( * )  coltyp,
integer  nn,
integer  nn1,
integer  nn2,
integer  ib1,
integer  ib2 
)

Definition at line 1 of file pdlaed2.f.

5*
6* -- ScaLAPACK auxiliary routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* December 31, 1998
10*
11* .. Scalar Arguments ..
12 INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N,
13 $ N1, NB, NN, NN1, NN2, NPCOL
14 DOUBLE PRECISION RHO
15* ..
16* .. Array Arguments ..
17 INTEGER COLTYP( * ), CTOT( 0: NPCOL-1, 4 ),
18 $ INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ),
19 $ PSM( 0: NPCOL-1, 4 )
20 DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ),
21 $ Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * )
22* ..
23*
24* Purpose
25* =======
26*
27* PDLAED2 sorts the two sets of eigenvalues together into a single
28* sorted set. Then it tries to deflate the size of the problem.
29* There are two ways in which deflation can occur: when two or more
30* eigenvalues are close together or if there is a tiny entry in the
31* Z vector. For each such occurrence the order of the related secular
32* equation problem is reduced by one.
33*
34* Arguments
35* =========
36*
37* ICTXT (global input) INTEGER
38* The BLACS context handle, indicating the global context of
39* the operation on the matrix. The context itself is global.
40*
41* K (output) INTEGER
42* The number of non-deflated eigenvalues, and the order of the
43* related secular equation. 0 <= K <=N.
44*
45* N (input) INTEGER
46* The dimension of the symmetric tridiagonal matrix. N >= 0.
47*
48* N1 (input) INTEGER
49* The location of the last eigenvalue in the leading sub-matrix.
50* min(1,N) < N1 < N.
51*
52* NB (global input) INTEGER
53* The blocking factor used to distribute the columns of the
54* matrix. NB >= 1.
55*
56* D (input/output) DOUBLE PRECISION array, dimension (N)
57* On entry, D contains the eigenvalues of the two submatrices to
58* be combined.
59* On exit, D contains the trailing (N-K) updated eigenvalues
60* (those which were deflated) sorted into increasing order.
61*
62* DROW (global input) INTEGER
63* The process row over which the first row of the matrix D is
64* distributed. 0 <= DROW < NPROW.
65*
66* DCOL (global input) INTEGER
67* The process column over which the first column of the
68* matrix D is distributed. 0 <= DCOL < NPCOL.
69*
70* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
71* On entry, Q contains the eigenvectors of two submatrices in
72* the two square blocks with corners at (1,1), (N1,N1)
73* and (N1+1, N1+1), (N,N).
74* On exit, Q contains the trailing (N-K) updated eigenvectors
75* (those which were deflated) in its last N-K columns.
76*
77* LDQ (input) INTEGER
78* The leading dimension of the array Q. LDQ >= max(1,NQ).
79*
80* RHO (global input/output) DOUBLE PRECISION
81* On entry, the off-diagonal element associated with the rank-1
82* cut which originally split the two submatrices which are now
83* being recombined.
84* On exit, RHO has been modified to the value required by
85* PDLAED3.
86*
87* Z (global input) DOUBLE PRECISION array, dimension (N)
88* On entry, Z contains the updating vector (the last
89* row of the first sub-eigenvector matrix and the first row of
90* the second sub-eigenvector matrix).
91* On exit, the contents of Z have been destroyed by the updating
92* process.
93*
94* DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
95* A copy of the first K eigenvalues which will be used by
96* SLAED3 to form the secular equation.
97*
98* W (global output) DOUBLE PRECISION array, dimension (N)
99* The first k values of the final deflation-altered z-vector
100* which will be passed to SLAED3.
101*
102* Q2 (output) DOUBLE PRECISION array, dimension (LDQ2, NQ)
103* A copy of the first K eigenvectors which will be used by
104*
105* LDQ2 (input) INTEGER
106* The leading dimension of the array Q2.
107*
108* QBUF (workspace) DOUBLE PRECISION array, dimension 3*N
109*
110* CTOT (workspace) INTEGER array, dimension( NPCOL, 4)
111*
112* PSM (workspace) INTEGER array, dimension( NPCOL, 4)
113*
114* NPCOL (global input) INTEGER
115* The total number of columns over which the distributed
116* submatrix is distributed.
117*
118* INDX (workspace) INTEGER array, dimension (N)
119* The permutation used to sort the contents of DLAMDA into
120* ascending order.
121*
122* INDXC (output) INTEGER array, dimension (N)
123* The permutation used to arrange the columns of the deflated
124* Q matrix into three groups: the first group contains non-zero
125* elements only at and above N1, the second contains
126* non-zero elements only below N1, and the third is dense.
127*
128* INDXP (workspace) INTEGER array, dimension (N)
129* The permutation used to place deflated values of D at the end
130* of the array. INDXP(1:K) points to the nondeflated D-values
131* and INDXP(K+1:N) points to the deflated eigenvalues.
132*
133* INDCOL (workspace) INTEGER array, dimension (N)
134*
135* COLTYP (workspace/output) INTEGER array, dimension (N)
136* During execution, a label which will indicate which of the
137* following types a column in the Q2 matrix is:
138* 1 : non-zero in the upper half only;
139* 2 : dense;
140* 3 : non-zero in the lower half only;
141* 4 : deflated.
142*
143* NN (global output) INTEGER, the order of matrix U, (PDLAED1).
144* NN1 (global output) INTEGER, the order of matrix Q1, (PDLAED1).
145* NN2 (global output) INTEGER, the order of matrix Q2, (PDLAED1).
146* IB1 (global output) INTEGER, pointeur on Q1, (PDLAED1).
147* IB2 (global output) INTEGER, pointeur on Q2, (PDLAED1).
148*
149* =====================================================================
150*
151* .. Parameters ..
152 DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT
153 parameter( mone = -1.0d0, zero = 0.0d0, one = 1.0d0,
154 $ two = 2.0d0, eight = 8.0d0 )
155* ..
156* .. Local Scalars ..
157 INTEGER COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2,
158 $ JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ,
159 $ NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ
160 DOUBLE PRECISION C, EPS, S, T, TAU, TOL
161* ..
162* .. External Functions ..
163 INTEGER IDAMAX, INDXG2L, INDXL2G, NUMROC
164 DOUBLE PRECISION DLAPY2, PDLAMCH
165 EXTERNAL idamax, indxg2l, indxl2g, numroc, pdlamch,
166 $ dlapy2
167* ..
168* .. External Subroutines ..
169 EXTERNAL blacs_gridinfo, blacs_pinfo, dcopy, dgerv2d,
170 $ dgesd2d, dlapst, drot, dscal, infog1l
171* ..
172* .. Intrinsic Functions ..
173 INTRINSIC abs, max, min, mod, sqrt
174* ..
175* .. External Functions ..
176* ..
177* .. Local Arrays ..
178 INTEGER PTT( 4 )
179* ..
180* .. Executable Statements ..
181*
182* Quick return if possible
183*
184 IF( n.EQ.0 )
185 $ RETURN
186*
187 CALL blacs_pinfo( iam, nprocs )
188 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
189 np = numroc( n, nb, myrow, drow, nprow )
190*
191 n2 = n - n1
192 n1p1 = n1 + 1
193*
194 IF( rho.LT.zero ) THEN
195 CALL dscal( n2, mone, z( n1p1 ), 1 )
196 END IF
197*
198* Normalize z so that norm(z) = 1. Since z is the concatenation of
199* two normalized vectors, norm2(z) = sqrt(2).
200*
201 t = one / sqrt( two )
202 CALL dscal( n, t, z, 1 )
203*
204* RHO = ABS( norm(z)**2 * RHO )
205*
206 rho = abs( two*rho )
207*
208* Calculate the allowable deflation tolerance
209*
210 imax = idamax( n, z, 1 )
211 jmax = idamax( n, d, 1 )
212 eps = pdlamch( ictxt, 'Epsilon' )
213 tol = eight*eps*max( abs( d( jmax ) ), abs( z( imax ) ) )
214*
215* If the rank-1 modifier is small enough, no more needs to be done
216* except to reorganize Q so that its columns correspond with the
217* elements in D.
218*
219 IF( rho*abs( z( imax ) ).LE.tol ) THEN
220 k = 0
221 GO TO 220
222 END IF
223*
224* If there are multiple eigenvalues then the problem deflates. Here
225* the number of equal eigenvalues are found. As each equal
226* eigenvalue is found, an elementary reflector is computed to rotate
227* the corresponding eigensubspace so that the corresponding
228* components of Z are zero in this new basis.
229*
230*
231 CALL dlapst( 'I', n, d, indx, info )
232*
233 DO 10 i = 1, n1
234 coltyp( i ) = 1
235 10 CONTINUE
236 DO 20 i = n1p1, n
237 coltyp( i ) = 3
238 20 CONTINUE
239 col = dcol
240 DO 40 i = 1, n, nb
241 DO 30 j = 0, nb - 1
242 IF( i+j.LE.n )
243 $ indcol( i+j ) = col
244 30 CONTINUE
245 col = mod( col+1, npcol )
246 40 CONTINUE
247*
248 k = 0
249 k2 = n + 1
250 DO 50 j = 1, n
251 nj = indx( j )
252 IF( rho*abs( z( nj ) ).LE.tol ) THEN
253*
254* Deflate due to small z component.
255*
256 k2 = k2 - 1
257 coltyp( nj ) = 4
258 indxp( k2 ) = nj
259 IF( j.EQ.n )
260 $ GO TO 80
261 ELSE
262 pj = nj
263 GO TO 60
264 END IF
265 50 CONTINUE
266 60 CONTINUE
267 j = j + 1
268 nj = indx( j )
269 IF( j.GT.n )
270 $ GO TO 80
271 IF( rho*abs( z( nj ) ).LE.tol ) THEN
272*
273* Deflate due to small z component.
274*
275 k2 = k2 - 1
276 coltyp( nj ) = 4
277 indxp( k2 ) = nj
278 ELSE
279*
280* Check if eigenvalues are close enough to allow deflation.
281*
282 s = z( pj )
283 c = z( nj )
284*
285* Find sqrt(a**2+b**2) without overflow or
286* destructive underflow.
287*
288 tau = dlapy2( c, s )
289 t = d( nj ) - d( pj )
290 c = c / tau
291 s = -s / tau
292 IF( abs( t*c*s ).LE.tol ) THEN
293*
294* Deflation is possible.
295*
296 z( nj ) = tau
297 z( pj ) = zero
298 IF( coltyp( nj ).NE.coltyp( pj ) )
299 $ coltyp( nj ) = 2
300 coltyp( pj ) = 4
301 CALL infog1l( nj, nb, npcol, mycol, dcol, njj, njcol )
302 CALL infog1l( pj, nb, npcol, mycol, dcol, pjj, pjcol )
303 IF( indcol( pj ).EQ.indcol( nj ) .AND. mycol.EQ.njcol ) THEN
304 CALL drot( np, q( 1, pjj ), 1, q( 1, njj ), 1, c, s )
305 ELSE IF( mycol.EQ.pjcol ) THEN
306 CALL dgesd2d( ictxt, np, 1, q( 1, pjj ), np, myrow,
307 $ njcol )
308 CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, njcol )
309 CALL drot( np, q( 1, pjj ), 1, qbuf, 1, c, s )
310 ELSE IF( mycol.EQ.njcol ) THEN
311 CALL dgesd2d( ictxt, np, 1, q( 1, njj ), np, myrow,
312 $ pjcol )
313 CALL dgerv2d( ictxt, np, 1, qbuf, np, myrow, pjcol )
314 CALL drot( np, qbuf, 1, q( 1, njj ), 1, c, s )
315 END IF
316 t = d( pj )*c**2 + d( nj )*s**2
317 d( nj ) = d( pj )*s**2 + d( nj )*c**2
318 d( pj ) = t
319 k2 = k2 - 1
320 i = 1
321 70 CONTINUE
322 IF( k2+i.LE.n ) THEN
323 IF( d( pj ).LT.d( indxp( k2+i ) ) ) THEN
324 indxp( k2+i-1 ) = indxp( k2+i )
325 indxp( k2+i ) = pj
326 i = i + 1
327 GO TO 70
328 ELSE
329 indxp( k2+i-1 ) = pj
330 END IF
331 ELSE
332 indxp( k2+i-1 ) = pj
333 END IF
334 pj = nj
335 ELSE
336 k = k + 1
337 dlamda( k ) = d( pj )
338 w( k ) = z( pj )
339 indxp( k ) = pj
340 pj = nj
341 END IF
342 END IF
343 GO TO 60
344 80 CONTINUE
345*
346* Record the last eigenvalue.
347*
348 k = k + 1
349 dlamda( k ) = d( pj )
350 w( k ) = z( pj )
351 indxp( k ) = pj
352*
353* Count up the total number of the various types of columns, then
354* form a permutation which positions the four column types into
355* four uniform groups (although one or more of these groups may be
356* empty).
357*
358 DO 100 j = 1, 4
359 DO 90 i = 0, npcol - 1
360 ctot( i, j ) = 0
361 90 CONTINUE
362 ptt( j ) = 0
363 100 CONTINUE
364 DO 110 j = 1, n
365 ct = coltyp( j )
366 col = indcol( j )
367 ctot( col, ct ) = ctot( col, ct ) + 1
368 110 CONTINUE
369*
370* PSM(*) = Position in SubMatrix (of types 1 through 4)
371*
372 DO 120 col = 0, npcol - 1
373 psm( col, 1 ) = 1
374 psm( col, 2 ) = 1 + ctot( col, 1 )
375 psm( col, 3 ) = psm( col, 2 ) + ctot( col, 2 )
376 psm( col, 4 ) = psm( col, 3 ) + ctot( col, 3 )
377 120 CONTINUE
378 ptt( 1 ) = 1
379 DO 140 i = 2, 4
380 ct = 0
381 DO 130 j = 0, npcol - 1
382 ct = ct + ctot( j, i-1 )
383 130 CONTINUE
384 ptt( i ) = ptt( i-1 ) + ct
385 140 CONTINUE
386*
387* Fill out the INDXC array so that the permutation which it induces
388* will place all type-1 columns first, all type-2 columns next,
389* then all type-3's, and finally all type-4's.
390*
391 DO 150 j = 1, n
392 js = indxp( j )
393 col = indcol( js )
394 ct = coltyp( js )
395 i = indxl2g( psm( col, ct ), nb, col, dcol, npcol )
396 indx( j ) = i
397 indxc( ptt( ct ) ) = i
398 psm( col, ct ) = psm( col, ct ) + 1
399 ptt( ct ) = ptt( ct ) + 1
400 150 CONTINUE
401*
402*
403 DO 160 j = 1, n
404 js = indxp( j )
405 jjs = indxg2l( js, nb, j, j, npcol )
406 col = indcol( js )
407 IF( col.EQ.mycol ) THEN
408 i = indx( j )
409 jjq2 = indxg2l( i, nb, j, j, npcol )
410 CALL dcopy( np, q( 1, jjs ), 1, q2( 1, jjq2 ), 1 )
411 END IF
412 160 CONTINUE
413*
414*
415* The deflated eigenvalues and their corresponding vectors go back
416* into the last N - K slots of D and Q respectively.
417*
418 CALL dcopy( n, d, 1, z, 1 )
419 DO 170 j = k + 1, n
420 js = indxp( j )
421 i = indx( j )
422 d( i ) = z( js )
423 170 CONTINUE
424*
425 ptt( 1 ) = 1
426 DO 190 i = 2, 4
427 ct = 0
428 DO 180 j = 0, npcol - 1
429 ct = ct + ctot( j, i-1 )
430 180 CONTINUE
431 ptt( i ) = ptt( i-1 ) + ct
432 190 CONTINUE
433*
434*
435 ib1 = indxc( 1 )
436 ie1 = ib1
437 ib2 = indxc( ptt( 2 ) )
438 ie2 = ib2
439 DO 200 i = 2, ptt( 3 ) - 1
440 ib1 = min( ib1, indxc( i ) )
441 ie1 = max( ie1, indxc( i ) )
442 200 CONTINUE
443 DO 210 i = ptt( 2 ), ptt( 4 ) - 1
444 ib2 = min( ib2, indxc( i ) )
445 ie2 = max( ie2, indxc( i ) )
446 210 CONTINUE
447 nn1 = ie1 - ib1 + 1
448 nn2 = ie2 - ib2 + 1
449 nn = max( ie1, ie2 ) - min( ib1, ib2 ) + 1
450 220 CONTINUE
451 RETURN
452*
453* End of PDLAED2
454*
subroutine dlapst(id, n, d, indx, info)
Definition dlapst.f:2
integer function indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2l.f:2
integer function indxl2g(indxloc, nb, iproc, isrcproc, nprocs)
Definition indxl2g.f:2
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
Here is the call graph for this function:
Here is the caller graph for this function: