ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlaed3.f
Go to the documentation of this file.
1  SUBROUTINE pdlaed3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA,
2  $ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW,
3  $ INDXR, INDXC, CTOT, NPCOL, INFO )
4 *
5 * -- ScaLAPACK auxiliary routine (version 1.7) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7 * and University of California, Berkeley.
8 * December 31, 1998
9 *
10 * .. Scalar Arguments ..
11  INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL
12  DOUBLE PRECISION RHO
13 * ..
14 * .. Array Arguments ..
15  INTEGER CTOT( 0: NPCOL-1, 4 ), INDCOL( * ),
16  $ INDROW( * ), INDX( * ), INDXC( * ), INDXR( * )
17  DOUBLE PRECISION BUF( * ), D( * ), DLAMDA( * ), U( LDU, * ),
18  $ W( * ), Z( * )
19 * ..
20 *
21 * Purpose
22 * =======
23 *
24 * PDLAED3 finds the roots of the secular equation, as defined by the
25 * values in D, W, and RHO, between 1 and K. It makes the
26 * appropriate calls to SLAED4
27 *
28 * This code makes very mild assumptions about floating point
29 * arithmetic. It will work on machines with a guard digit in
30 * add/subtract, or on those binary machines without guard digits
31 * which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
32 * It could conceivably fail on hexadecimal or decimal machines
33 * without guard digits, but we know of none.
34 *
35 * Arguments
36 * =========
37 *
38 * ICTXT (global input) INTEGER
39 * The BLACS context handle, indicating the global context of
40 * the operation on the matrix. The context itself is global.
41 *
42 * K (output) INTEGER
43 * The number of non-deflated eigenvalues, and the order of the
44 * related secular equation. 0 <= K <=N.
45 *
46 * N (input) INTEGER
47 * The dimension of the symmetric tridiagonal matrix. N >= 0.
48 *
49 * NB (global input) INTEGER
50 * The blocking factor used to distribute the columns of the
51 * matrix. NB >= 1.
52 *
53 * D (input/output) DOUBLE PRECISION array, dimension (N)
54 * On entry, D contains the eigenvalues of the two submatrices to
55 * be combined.
56 * On exit, D contains the trailing (N-K) updated eigenvalues
57 * (those which were deflated) sorted into increasing order.
58 *
59 * DROW (global input) INTEGER
60 * The process row over which the first row of the matrix D is
61 * distributed. 0 <= DROW < NPROW.
62 *
63 * DCOL (global input) INTEGER
64 * The process column over which the first column of the
65 * matrix D is distributed. 0 <= DCOL < NPCOL.
66 *
67 * RHO (global input/output) DOUBLE PRECISION
68 * On entry, the off-diagonal element associated with the rank-1
69 * cut which originally split the two submatrices which are now
70 * being recombined.
71 * On exit, RHO has been modified to the value required by
72 * PDLAED3.
73 *
74 * DLAMDA (global output) DOUBLE PRECISION array, dimension (N)
75 * A copy of the first K eigenvalues which will be used by
76 * DLAED4 to form the secular equation.
77 *
78 * W (global output) DOUBLE PRECISION array, dimension (N)
79 * The first k values of the final deflation-altered z-vector
80 * which will be passed to DLAED4.
81 *
82 * Z (global input) DOUBLE PRECISION array, dimension (N)
83 * On entry, Z contains the updating vector (the last
84 * row of the first sub-eigenvector matrix and the first row of
85 * the second sub-eigenvector matrix).
86 * On exit, the contents of Z have been destroyed by the updating
87 * process.
88 *
89 * U (global output) DOUBLE PRECISION array
90 * global dimension (N, N), local dimension (LDU, NQ).
91 * (See PDLAED0 for definition of NQ.)
92 * Q contains the orthonormal eigenvectors of the symmetric
93 * tridiagonal matrix.
94 *
95 * LDU (input) INTEGER
96 * The leading dimension of the array U.
97 *
98 * BUF (workspace) DOUBLE PRECISION array, dimension 3*N
99 *
100 *
101 * INDX (workspace) INTEGER array, dimension (N)
102 * The permutation used to sort the contents of DLAMDA into
103 * ascending order.
104 *
105 * INDCOL (workspace) INTEGER array, dimension (N)
106 *
107 *
108 * INDROW (workspace) INTEGER array, dimension (N)
109 *
110 *
111 * INDXR (workspace) INTEGER array, dimension (N)
112 *
113 *
114 * INDXC (workspace) INTEGER array, dimension (N)
115 *
116 * CTOT (workspace) INTEGER array, dimension( NPCOL, 4)
117 *
118 * NPCOL (global input) INTEGER
119 * The total number of columns over which the distributed
120 * submatrix is distributed.
121 *
122 * INFO (output) INTEGER
123 * = 0: successful exit.
124 * < 0: if INFO = -i, the i-th argument had an illegal value.
125 * > 0: The algorithm failed to compute the ith eigenvalue.
126 *
127 * =====================================================================
128 *
129 * .. Parameters ..
130  DOUBLE PRECISION ONE
131  PARAMETER ( ONE = 1.0d+0 )
132 * ..
133 * .. Local Scalars ..
134  INTEGER COL, GI, I, IINFO, IIU, IPD, IU, J, JJU, JU,
135  $ KK, KL, KLC, KLR, MYCOL, MYKL, MYKLR, MYROW,
136  $ nprow, pdc, pdr, row
137  DOUBLE PRECISION AUX, TEMP
138 * ..
139 * .. External Functions ..
140  INTEGER INDXG2L
141  DOUBLE PRECISION DLAMC3, DNRM2
142  EXTERNAL indxg2l, dlamc3, dnrm2
143 * ..
144 * .. External Subroutines ..
145  EXTERNAL blacs_gridinfo, dcopy, dgebr2d, dgebs2d,
146  $ dgerv2d, dgesd2d, dlaed4
147 * ..
148 * .. Intrinsic Functions ..
149  INTRINSIC mod, sign, sqrt
150 * ..
151 * .. Executable Statements ..
152 *
153 * Test the input parameters.
154 *
155  info = 0
156 *
157 * Quick return if possible
158 *
159  IF( k.EQ.0 )
160  $ RETURN
161 *
162  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
163 *
164  row = drow
165  col = dcol
166  DO 20 i = 1, n, nb
167  DO 10 j = 0, nb - 1
168  IF( i+j.LE.n ) THEN
169  indrow( i+j ) = row
170  indcol( i+j ) = col
171  END IF
172  10 CONTINUE
173  row = mod( row+1, nprow )
174  col = mod( col+1, npcol )
175  20 CONTINUE
176 *
177  mykl = ctot( mycol, 1 ) + ctot( mycol, 2 ) + ctot( mycol, 3 )
178  klr = mykl / nprow
179  IF( myrow.EQ.drow ) THEN
180  myklr = klr + mod( mykl, nprow )
181  ELSE
182  myklr = klr
183  END IF
184  pdc = 1
185  col = dcol
186  30 CONTINUE
187  IF( mycol.NE.col ) THEN
188  pdc = pdc + ctot( col, 1 ) + ctot( col, 2 ) + ctot( col, 3 )
189  col = mod( col+1, npcol )
190  GO TO 30
191  END IF
192  pdr = pdc
193  kl = klr + mod( mykl, nprow )
194  row = drow
195  40 CONTINUE
196  IF( myrow.NE.row ) THEN
197  pdr = pdr + kl
198  kl = klr
199  row = mod( row+1, nprow )
200  GO TO 40
201  END IF
202 *
203  DO 50 i = 1, k
204  dlamda( i ) = dlamc3( dlamda( i ), dlamda( i ) ) - dlamda( i )
205  z( i ) = one
206  50 CONTINUE
207  IF( myklr.GT.0 ) THEN
208  kk = pdr
209  DO 80 i = 1, myklr
210  CALL dlaed4( k, kk, dlamda, w, buf, rho, buf( k+i ), iinfo )
211  IF( iinfo.NE.0 ) THEN
212  info = kk
213  END IF
214 *
215 * ..Compute part of z
216 *
217  DO 60 j = 1, kk - 1
218  z( j ) = z( j )*( buf( j ) /
219  $ ( dlamda( j )-dlamda( kk ) ) )
220  60 CONTINUE
221  z( kk ) = z( kk )*buf( kk )
222  DO 70 j = kk + 1, k
223  z( j ) = z( j )*( buf( j ) /
224  $ ( dlamda( j )-dlamda( kk ) ) )
225  70 CONTINUE
226  kk = kk + 1
227  80 CONTINUE
228 *
229  IF( myrow.NE.drow ) THEN
230  CALL dcopy( k, z, 1, buf, 1 )
231  CALL dgesd2d( ictxt, k+myklr, 1, buf, k+myklr, drow, mycol )
232  ELSE
233  ipd = 2*k + 1
234  CALL dcopy( myklr, buf( k+1 ), 1, buf( ipd ), 1 )
235  IF( klr.GT.0 ) THEN
236  ipd = myklr + ipd
237  row = mod( drow+1, nprow )
238  DO 100 i = 1, nprow - 1
239  CALL dgerv2d( ictxt, k+klr, 1, buf, k+klr, row,
240  $ mycol )
241  CALL dcopy( klr, buf( k+1 ), 1, buf( ipd ), 1 )
242  DO 90 j = 1, k
243  z( j ) = z( j )*buf( j )
244  90 CONTINUE
245  ipd = ipd + klr
246  row = mod( row+1, nprow )
247  100 CONTINUE
248  END IF
249  END IF
250  END IF
251 *
252  IF( myrow.EQ.drow ) THEN
253  IF( mycol.NE.dcol .AND. mykl.NE.0 ) THEN
254  CALL dcopy( k, z, 1, buf, 1 )
255  CALL dcopy( mykl, buf( 2*k+1 ), 1, buf( k+1 ), 1 )
256  CALL dgesd2d( ictxt, k+mykl, 1, buf, k+mykl, myrow, dcol )
257  ELSE IF( mycol.EQ.dcol ) THEN
258  ipd = 2*k + 1
259  col = dcol
260  kl = mykl
261  DO 120 i = 1, npcol - 1
262  ipd = ipd + kl
263  col = mod( col+1, npcol )
264  kl = ctot( col, 1 ) + ctot( col, 2 ) + ctot( col, 3 )
265  IF( kl.NE.0 ) THEN
266  CALL dgerv2d( ictxt, k+kl, 1, buf, k+kl, myrow, col )
267  CALL dcopy( kl, buf( k+1 ), 1, buf( ipd ), 1 )
268  DO 110 j = 1, k
269  z( j ) = z( j )*buf( j )
270  110 CONTINUE
271  END IF
272  120 CONTINUE
273  DO 130 i = 1, k
274  z( i ) = sign( sqrt( -z( i ) ), w( i ) )
275  130 CONTINUE
276 *
277  END IF
278  END IF
279 *
280 * Diffusion
281 *
282  IF( myrow.EQ.drow .AND. mycol.EQ.dcol ) THEN
283  CALL dcopy( k, z, 1, buf, 1 )
284  CALL dcopy( k, buf( 2*k+1 ), 1, buf( k+1 ), 1 )
285  CALL dgebs2d( ictxt, 'All', ' ', 2*k, 1, buf, 2*k )
286  ELSE
287  CALL dgebr2d( ictxt, 'All', ' ', 2*k, 1, buf, 2*k, drow, dcol )
288  CALL dcopy( k, buf, 1, z, 1 )
289  END IF
290 *
291 * Copy of D at the good place
292 *
293  klc = 0
294  klr = 0
295  DO 140 i = 1, k
296  gi = indx( i )
297  d( gi ) = buf( k+i )
298  col = indcol( gi )
299  row = indrow( gi )
300  IF( col.EQ.mycol ) THEN
301  klc = klc + 1
302  indxc( klc ) = i
303  END IF
304  IF( row.EQ.myrow ) THEN
305  klr = klr + 1
306  indxr( klr ) = i
307  END IF
308  140 CONTINUE
309 *
310 * Compute eigenvectors of the modified rank-1 modification.
311 *
312  IF( mykl.NE.0 ) THEN
313  DO 180 j = 1, mykl
314  kk = indxc( j )
315  ju = indx( kk )
316  jju = indxg2l( ju, nb, j, j, npcol )
317  CALL dlaed4( k, kk, dlamda, w, buf, rho, aux, iinfo )
318  IF( iinfo.NE.0 ) THEN
319  info = kk
320  END IF
321  IF( k.EQ.1 .OR. k.EQ.2 ) THEN
322  DO 150 i = 1, klr
323  kk = indxr( i )
324  iu = indx( kk )
325  iiu = indxg2l( iu, nb, j, j, nprow )
326  u( iiu, jju ) = buf( kk )
327  150 CONTINUE
328  GO TO 180
329  END IF
330 *
331  DO 160 i = 1, k
332  buf( i ) = z( i ) / buf( i )
333  160 CONTINUE
334  temp = dnrm2( k, buf, 1 )
335  DO 170 i = 1, klr
336  kk = indxr( i )
337  iu = indx( kk )
338  iiu = indxg2l( iu, nb, j, j, nprow )
339  u( iiu, jju ) = buf( kk ) / temp
340  170 CONTINUE
341 *
342  180 CONTINUE
343  END IF
344 *
345  190 CONTINUE
346 *
347  RETURN
348 *
349 * End of PDLAED3
350 *
351  END
pdlaed3
subroutine pdlaed3(ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, W, Z, U, LDU, BUF, INDX, INDCOL, INDROW, INDXR, INDXC, CTOT, NPCOL, INFO)
Definition: pdlaed3.f:4