ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
dlar1va.f
Go to the documentation of this file.
1  SUBROUTINE dlar1va(N, B1, BN, LAMBDA, D, L, LD, LLD,
2  \$ PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA,
3  \$ R, ISUPPZ, NRMINV, RESID, RQCORR, WORK )
4 *
5  IMPLICIT NONE
6 *
7 * -- ScaLAPACK computational routine (version 2.0) --
8 * Univ. of Tennessee, Univ. of California Berkeley, Univ of Colorado Denver
9 * July 4, 2010
10 *
11 * .. Scalar Arguments ..
12  LOGICAL WANTNC
13  INTEGER B1, BN, N, NEGCNT, R
14  DOUBLE PRECISION GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID,
15  \$ rqcorr, ztz
16 * ..
17 * .. Array Arguments ..
18  INTEGER ISUPPZ( * )
19  DOUBLE PRECISION D( * ), L( * ), LD( * ), LLD( * ),
20  \$ work( * )
21  DOUBLE PRECISION Z( * )
22 *
23 * Purpose
24 * =======
25 *
26 * DLAR1VA computes the (scaled) r-th column of the inverse of
27 * the sumbmatrix in rows B1 through BN of the tridiagonal matrix
28 * L D L^T - sigma I. When sigma is close to an eigenvalue, the
29 * computed vector is an accurate eigenvector. Usually, r corresponds
30 * to the index where the eigenvector is largest in magnitude.
31 * The following steps accomplish this computation :
32 * (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T,
33 * (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T,
34 * (c) Computation of the diagonal elements of the inverse of
35 * L D L^T - sigma I by combining the above transforms, and choosing
36 * r as the index where the diagonal of the inverse is (one of the)
37 * largest in magnitude.
38 * (d) Computation of the (scaled) r-th column of the inverse using the
39 * twisted factorization obtained by combining the top part of the
40 * the stationary and the bottom part of the progressive transform.
41 *
42 * Arguments
43 * =========
44 *
45 * N (input) INTEGER
46 * The order of the matrix L D L^T.
47 *
48 * B1 (input) INTEGER
49 * First index of the submatrix of L D L^T.
50 *
51 * BN (input) INTEGER
52 * Last index of the submatrix of L D L^T.
53 *
54 * LAMBDA (input) DOUBLE PRECISION
55 * The shift. In order to compute an accurate eigenvector,
56 * LAMBDA should be a good approximation to an eigenvalue
57 * of L D L^T.
58 *
59 * L (input) DOUBLE PRECISION array, dimension (N-1)
60 * The (n-1) subdiagonal elements of the unit bidiagonal matrix
61 * L, in elements 1 to N-1.
62 *
63 * D (input) DOUBLE PRECISION array, dimension (N)
64 * The n diagonal elements of the diagonal matrix D.
65 *
66 * LD (input) DOUBLE PRECISION array, dimension (N-1)
67 * The n-1 elements L(i)*D(i).
68 *
69 * LLD (input) DOUBLE PRECISION array, dimension (N-1)
70 * The n-1 elements L(i)*L(i)*D(i).
71 *
72 * PIVMIN (input) DOUBLE PRECISION
73 * The minimum pivot in the Sturm sequence.
74 *
75 * GAPTOL (input) DOUBLE PRECISION
76 * Tolerance that indicates when eigenvector entries are negligible
77 * w.r.t. their contribution to the residual.
78 *
79 * Z (input/output) DOUBLE PRECISION array, dimension (N)
80 * On input, all entries of Z must be set to 0.
81 * On output, Z contains the (scaled) r-th column of the
82 * inverse. The scaling is such that Z(R) equals 1.
83 *
84 * WANTNC (input) LOGICAL
85 * Specifies whether NEGCNT has to be computed.
86 *
87 * NEGCNT (output) INTEGER
88 * If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin
89 * in the matrix factorization L D L^T, and NEGCNT = -1 otherwise.
90 *
91 * ZTZ (output) DOUBLE PRECISION
92 * The square of the 2-norm of Z.
93 *
94 * MINGMA (output) DOUBLE PRECISION
95 * The reciprocal of the largest (in magnitude) diagonal
96 * element of the inverse of L D L^T - sigma I.
97 *
98 * R (input/output) INTEGER
99 * The twist index for the twisted factorization used to
100 * compute Z.
101 * On input, 0 <= R <= N. If R is input as 0, R is set to
102 * the index where (L D L^T - sigma I)^{-1} is largest
103 * in magnitude. If 1 <= R <= N, R is unchanged.
104 * On output, R contains the twist index used to compute Z.
105 * Ideally, R designates the position of the maximum entry in the
106 * eigenvector.
107 *
108 * ISUPPZ (output) INTEGER array, dimension (2)
109 * The support of the vector in Z, i.e., the vector Z is
110 * nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ).
111 *
112 * NRMINV (output) DOUBLE PRECISION
113 * NRMINV = 1/SQRT( ZTZ )
114 *
115 * RESID (output) DOUBLE PRECISION
116 * The residual of the FP vector.
117 * RESID = ABS( MINGMA )/SQRT( ZTZ )
118 *
119 * RQCORR (output) DOUBLE PRECISION
120 * The Rayleigh Quotient correction to LAMBDA.
121 * RQCORR = MINGMA*TMP
122 *
123 * WORK (workspace) DOUBLE PRECISION array, dimension (4*N)
124 *
125 * Further Details
126 * ===============
127 *
128 * Based on contributions by
129 * Beresford Parlett, University of California, Berkeley, USA
130 * Jim Demmel, University of California, Berkeley, USA
131 * Inderjit Dhillon, University of Texas, Austin, USA
132 * Osni Marques, LBNL/NERSC, USA
133 * Christof Voemel, University of California, Berkeley, USA
134 *
135 * =====================================================================
136 *
137 * .. Parameters ..
138  INTEGER BLKLEN
139  PARAMETER ( BLKLEN = 16 )
140  DOUBLE PRECISION ZERO, ONE
141  parameter( zero = 0.0d0, one = 1.0d0 )
142
143 * ..
144 * .. Local Scalars ..
145  LOGICAL SAWNAN1, SAWNAN2
146  INTEGER BI, I, INDLPL, INDP, INDS, INDUMN, NB, NEG1,
147  \$ neg2, nx, r1, r2, to
148  DOUBLE PRECISION ABSZCUR, ABSZPREV, DMINUS, DPLUS, EPS,
149  \$ S, TMP, ZPREV
150 * ..
151 * .. External Functions ..
152  LOGICAL DISNAN
153  DOUBLE PRECISION DLAMCH
154  EXTERNAL disnan, dlamch
155 * ..
156 * .. Intrinsic Functions ..
157  INTRINSIC abs, max, min, dble
158 * ..
159 * .. Executable Statements ..
160 *
161  eps = dlamch( 'Precision' )
162
163
164  IF( r.EQ.0 ) THEN
165  r1 = b1
166  r2 = bn
167  ELSE
168  r1 = r
169  r2 = r
170  END IF
171
172 * Storage for LPLUS
173  indlpl = 0
174 * Storage for UMINUS
175  indumn = n
176  inds = 2*n + 1
177  indp = 3*n + 1
178
179  IF( b1.EQ.1 ) THEN
180  work( inds ) = zero
181  ELSE
182  work( inds+b1-1 ) = lld( b1-1 )
183  END IF
184
185 *
186 * Compute the stationary transform (using the differential form)
187 * until the index R2.
188 *
189  sawnan1 = .false.
190  neg1 = 0
191  s = work( inds+b1-1 ) - lambda
192  DO 50 i = b1, r1 - 1
193  dplus = d( i ) + s
194  work( indlpl+i ) = ld( i ) / dplus
195  IF(dplus.LT.zero) neg1 = neg1 + 1
196  work( inds+i ) = s*work( indlpl+i )*l( i )
197  s = work( inds+i ) - lambda
198  50 CONTINUE
199  sawnan1 = disnan( s )
200  IF( sawnan1 ) GOTO 60
201  DO 51 i = r1, r2 - 1
202  dplus = d( i ) + s
203  work( indlpl+i ) = ld( i ) / dplus
204  work( inds+i ) = s*work( indlpl+i )*l( i )
205  s = work( inds+i ) - lambda
206  51 CONTINUE
207  sawnan1 = disnan( s )
208 *
209  60 CONTINUE
210  IF( sawnan1 ) THEN
211 * Runs a slower version of the above loop if a NaN is detected
212  neg1 = 0
213  s = work( inds+b1-1 ) - lambda
214  DO 70 i = b1, r1 - 1
215  dplus = d( i ) + s
216  IF(abs(dplus).LT.pivmin) dplus = -pivmin
217  work( indlpl+i ) = ld( i ) / dplus
218  IF(dplus.LT.zero) neg1 = neg1 + 1
219  work( inds+i ) = s*work( indlpl+i )*l( i )
220  IF( work( indlpl+i ).EQ.zero )
221  \$ work( inds+i ) = lld( i )
222  s = work( inds+i ) - lambda
223  70 CONTINUE
224  DO 71 i = r1, r2 - 1
225  dplus = d( i ) + s
226  IF(abs(dplus).LT.pivmin) dplus = -pivmin
227  work( indlpl+i ) = ld( i ) / dplus
228  work( inds+i ) = s*work( indlpl+i )*l( i )
229  IF( work( indlpl+i ).EQ.zero )
230  \$ work( inds+i ) = lld( i )
231  s = work( inds+i ) - lambda
232  71 CONTINUE
233  END IF
234 *
235 * Compute the progressive transform (using the differential form)
236 * until the index R1
237 *
238  sawnan2 = .false.
239  neg2 = 0
240  work( indp+bn-1 ) = d( bn ) - lambda
241  DO 80 i = bn - 1, r1, -1
242  dminus = lld( i ) + work( indp+i )
243  tmp = d( i ) / dminus
244  IF(dminus.LT.zero) neg2 = neg2 + 1
245  work( indumn+i ) = l( i )*tmp
246  work( indp+i-1 ) = work( indp+i )*tmp - lambda
247  80 CONTINUE
248  tmp = work( indp+r1-1 )
249  sawnan2 = disnan( tmp )
250  IF( sawnan2 ) THEN
251 * Runs a slower version of the above loop if a NaN is detected
252  neg2 = 0
253  DO 100 i = bn-1, r1, -1
254  dminus = lld( i ) + work( indp+i )
255  IF(abs(dminus).LT.pivmin) dminus = -pivmin
256  tmp = d( i ) / dminus
257  IF(dminus.LT.zero) neg2 = neg2 + 1
258  work( indumn+i ) = l( i )*tmp
259  work( indp+i-1 ) = work( indp+i )*tmp - lambda
260  IF( tmp.EQ.zero )
261  \$ work( indp+i-1 ) = d( i ) - lambda
262  100 CONTINUE
263  END IF
264 *
265 * Find the index (from R1 to R2) of the largest (in magnitude)
266 * diagonal element of the inverse
267 *
268  mingma = work( inds+r1-1 ) + work( indp+r1-1 )
269  IF( mingma.LT.zero ) neg1 = neg1 + 1
270  IF( wantnc ) THEN
271  negcnt = neg1 + neg2
272  ELSE
273  negcnt = -1
274  ENDIF
275  IF( abs(mingma).EQ.zero )
276  \$ mingma = eps*work( inds+r1-1 )
277  r = r1
278  DO 110 i = r1, r2 - 1
279  tmp = work( inds+i ) + work( indp+i )
280  IF( tmp.EQ.zero )
281  \$ tmp = eps*work( inds+i )
282  IF( abs( tmp ).LE.abs( mingma ) ) THEN
283  mingma = tmp
284  r = i + 1
285  END IF
286  110 CONTINUE
287 *
288 * Compute the FP vector: solve N^T v = e_r
289 *
290  isuppz( 1 ) = b1
291  isuppz( 2 ) = bn
292  z( r ) = one
293  ztz = one
294 *
295 * Compute the FP vector upwards from R
296 *
297  nb = int((r-b1)/blklen)
298  nx = r-nb*blklen
299  IF( .NOT.sawnan1 ) THEN
300  DO 210 bi = r-1, nx, -blklen
301  to = bi-blklen+1
302  DO 205 i = bi, to, -1
303  z( i ) = -( work(indlpl+i)*z(i+1) )
304  ztz = ztz + z( i )*z( i )
305  205 CONTINUE
306  IF( abs(z(to)).LT.eps .AND.
307  \$ abs(z(to+1)).LT.eps ) THEN
308  isuppz(1) = to
309  GOTO 220
310  ENDIF
311  210 CONTINUE
312  DO 215 i = nx-1, b1, -1
313  z( i ) = -( work(indlpl+i)*z(i+1) )
314  ztz = ztz + z( i )*z( i )
315  215 CONTINUE
316  220 CONTINUE
317  ELSE
318 * Run slower loop if NaN occurred.
319  DO 230 bi = r-1, nx, -blklen
320  to = bi-blklen+1
321  DO 225 i = bi, to, -1
322  IF( z( i+1 ).EQ.zero ) THEN
323  z( i ) = -( ld( i+1 ) / ld( i ) )*z( i+2 )
324  ELSE
325  z( i ) = -( work( indlpl+i )*z( i+1 ) )
326  END IF
327  ztz = ztz + z( i )*z( i )
328  225 CONTINUE
329  IF( abs(z(to)).LT.eps .AND.
330  \$ abs(z(to+1)).LT.eps ) THEN
331  isuppz(1) = to
332  GOTO 240
333  ENDIF
334  230 CONTINUE
335  DO 235 i = nx-1, b1, -1
336  IF( z( i+1 ).EQ.zero ) THEN
337  z( i ) = -( ld( i+1 ) / ld( i ) )*z( i+2 )
338  ELSE
339  z( i ) = -( work( indlpl+i )*z( i+1 ) )
340  END IF
341  ztz = ztz + z( i )*z( i )
342  235 CONTINUE
343  240 CONTINUE
344  ENDIF
345  DO 245 i= b1, (isuppz(1)-1)
346  z(i) = zero
347  245 CONTINUE
348
349 * Compute the FP vector downwards from R in blocks of size BLKLEN
350  IF( .NOT.sawnan2 ) THEN
351  DO 260 bi = r+1, bn, blklen
352  to = bi+blklen-1
353  IF ( to.LE.bn ) THEN
354  DO 250 i = bi, to
355  z(i) = -(work(indumn+i-1)*z(i-1))
356  ztz = ztz + z( i )*z( i )
357  250 CONTINUE
358  IF( abs(z(to)).LE.eps .AND.
359  \$ abs(z(to-1)).LE.eps ) THEN
360  isuppz(2) = to
361  GOTO 265
362  ENDIF
363  ELSE
364  DO 255 i = bi, bn
365  z(i) = -(work(indumn+i-1)*z(i-1))
366  ztz = ztz + z( i )*z( i )
367  255 CONTINUE
368  ENDIF
369  260 CONTINUE
370  265 CONTINUE
371  ELSE
372 * Run slower loop if NaN occurred.
373  DO 280 bi = r+1, bn, blklen
374  to = bi+blklen-1
375  IF ( to.LE.bn ) THEN
376  DO 270 i = bi, to
377  zprev = z(i-1)
378  abszprev = abs(zprev)
379  IF( zprev.NE.zero ) THEN
380  z(i)= -(work(indumn+i-1)*zprev)
381  ELSE
382  z(i)= -(ld(i-2)/ld(i-1))*z(i-2)
383  END IF
384  abszcur = abs(z(i))
385  ztz = ztz + abszcur**2
386  270 CONTINUE
387  IF( abszcur.LT.eps .AND.
388  \$ abszprev.LT.eps ) THEN
389  isuppz(2) = i
390  GOTO 285
391  ENDIF
392  ELSE
393  DO 275 i = bi, bn
394  zprev = z(i-1)
395  abszprev = abs(zprev)
396  IF( zprev.NE.zero ) THEN
397  z(i)= -(work(indumn+i-1)*zprev)
398  ELSE
399  z(i)= -(ld(i-2)/ld(i-1))*z(i-2)
400  END IF
401  abszcur = abs(z(i))
402  ztz = ztz + abszcur**2
403  275 CONTINUE
404  ENDIF
405  280 CONTINUE
406  285 CONTINUE
407  END IF
408  DO 290 i= isuppz(2)+1,bn
409  z(i) = zero
410  290 CONTINUE
411 *
412 * Compute quantities for convergence test
413 *
414  tmp = one / ztz
415  nrminv = sqrt( tmp )
416  resid = abs( mingma )*nrminv
417  rqcorr = mingma*tmp
418 *
419  RETURN
420 *
421 * End of DLAR1VA
422 *
423  END
max
#define max(A, B)
Definition: pcgemr.c:180
dlar1va
subroutine dlar1va(N, B1, BN, LAMBDA, D, L, LD, LLD, PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, R, ISUPPZ, NRMINV, RESID, RQCORR, WORK)
Definition: dlar1va.f:4
min
#define min(A, B)
Definition: pcgemr.c:181