ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
slarrf2.f
Go to the documentation of this file.
1  SUBROUTINE slarrf2( N, D, L, LD, CLSTRT, CLEND,
2  $ CLMID1, CLMID2, W, WGAP, WERR, TRYMID,
3  $ SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA,
4  $ DPLUS, LPLUS, WORK, INFO )
5 *
6 * -- ScaLAPACK computational routine (version 2.0) --
7 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
8 * July 4, 2010
9 *
10  IMPLICIT NONE
11 *
12 * .. Scalar Arguments ..
13  INTEGER CLSTRT, CLEND, CLMID1, CLMID2, INFO, N
14  REAL CLGAPL, CLGAPR, PIVMIN, SIGMA, SPDIAM
15  LOGICAL TRYMID
16 * ..
17 * .. Array Arguments ..
18  REAL D( * ), DPLUS( * ), L( * ), LD( * ),
19  $ LPLUS( * ), W( * ), WGAP( * ), WERR( * ), WORK( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * Given the initial representation L D L^T and its cluster of close
26 * eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ...
27 * W( CLEND ), SLARRF2 finds a new relatively robust representation
28 * L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the
29 * eigenvalues of L(+) D(+) L(+)^T is relatively isolated.
30 *
31 * This is an enhanced version of SLARRF that also tries shifts in
32 * the middle of the cluster, should there be a large gap, in order to
33 * break large clusters into at least two pieces.
34 *
35 * Arguments
36 * =========
37 *
38 * N (input) INTEGER
39 * The order of the matrix (subblock, if the matrix splitted).
40 *
41 * D (input) REAL array, dimension (N)
42 * The N diagonal elements of the diagonal matrix D.
43 *
44 * L (input) REAL array, dimension (N-1)
45 * The (N-1) subdiagonal elements of the unit bidiagonal
46 * matrix L.
47 *
48 * LD (input) REAL array, dimension (N-1)
49 * The (N-1) elements L(i)*D(i).
50 *
51 * CLSTRT (input) INTEGER
52 * The index of the first eigenvalue in the cluster.
53 *
54 * CLEND (input) INTEGER
55 * The index of the last eigenvalue in the cluster.
56 *
57 * CLMID1,2(input) INTEGER
58 * The index of a middle eigenvalue pair with large gap
59 *
60 * W (input) REAL array, dimension >= (CLEND-CLSTRT+1)
61 * The eigenvalue APPROXIMATIONS of L D L^T in ascending order.
62 * W( CLSTRT ) through W( CLEND ) form the cluster of relatively
63 * close eigenalues.
64 *
65 * WGAP (input/output) REAL array, dimension >= (CLEND-CLSTRT+1)
66 * The separation from the right neighbor eigenvalue in W.
67 *
68 * WERR (input) REAL array, dimension >= (CLEND-CLSTRT+1)
69 * WERR contain the semiwidth of the uncertainty
70 * interval of the corresponding eigenvalue APPROXIMATION in W
71 *
72 * SPDIAM (input) estimate of the spectral diameter obtained from the
73 * Gerschgorin intervals
74 *
75 * CLGAPL, CLGAPR (input) absolute gap on each end of the cluster.
76 * Set by the calling routine to protect against shifts too close
77 * to eigenvalues outside the cluster.
78 *
79 * PIVMIN (input) DOUBLE PRECISION
80 * The minimum pivot allowed in the sturm sequence.
81 *
82 * SIGMA (output) REAL
83 * The shift used to form L(+) D(+) L(+)^T.
84 *
85 * DPLUS (output) REAL array, dimension (N)
86 * The N diagonal elements of the diagonal matrix D(+).
87 *
88 * LPLUS (output) REAL array, dimension (N-1)
89 * The first (N-1) elements of LPLUS contain the subdiagonal
90 * elements of the unit bidiagonal matrix L(+).
91 *
92 * WORK (workspace) REAL array, dimension (2*N)
93 * Workspace.
94 *
95 * Further Details
96 * ===============
97 *
98 * Based on contributions by
99 * Beresford Parlett, University of California, Berkeley, USA
100 * Jim Demmel, University of California, Berkeley, USA
101 * Inderjit Dhillon, University of Texas, Austin, USA
102 * Osni Marques, LBNL/NERSC, USA
103 * Christof Voemel, University of California, Berkeley, USA
104 *
105 * =====================================================================
106 *
107 * .. Parameters ..
108  REAL FOUR, MAXGROWTH1, MAXGROWTH2, ONE, QUART, TWO
109  PARAMETER ( ONE = 1.0e0, two = 2.0e0,
110  $ four = 4.0e0, quart = 0.25e0,
111  $ maxgrowth1 = 8.e0,
112  $ maxgrowth2 = 8.e0 )
113 * ..
114 * .. Local Scalars ..
115  LOGICAL DORRR1, NOFAIL, SAWNAN1, SAWNAN2, TRYRRR1
116  INTEGER BI,I,J,KTRY,KTRYMAX,SLEFT,SRIGHT,SMID,SHIFT
117  PARAMETER ( KTRYMAX = 1, smid =0, sleft = 1, sright = 2 )
118 
119 * DSTQDS loops will be blocked to detect NaNs earlier if they occur
120  INTEGER BLKLEN
121  PARAMETER ( BLKLEN = 512 )
122 
123 
124  REAL AVGAP, BESTSHIFT, CLWDTH, EPS, FACT, FAIL,
125  $ FAIL2, GROWTHBOUND, LDELTA, LDMAX, LEASTGROWTH,
126  $ LSIGMA, MAX1, MAX2, MINGAP, MSIGMA1, MSIGMA2,
127  $ oldp, prod, rdelta, rdmax, rrr1, rrr2, rsigma,
128  $ s, tmp, znm2
129 * ..
130 * .. External Functions ..
131  LOGICAL SISNAN
132  REAL SLAMCH
133  EXTERNAL SISNAN, SLAMCH
134 * ..
135 * .. External Subroutines ..
136  EXTERNAL scopy
137 * ..
138 * .. Intrinsic Functions ..
139  INTRINSIC abs
140 * ..
141 * .. Executable Statements ..
142 *
143  info = 0
144  fact = real(2**ktrymax)
145  eps = slamch( 'Precision' )
146  shift = 0
147 
148 * Decide whether the code should accept the best among all
149 * representations despite large element growth or signal INFO=1
150  nofail = .true.
151 *
152 
153 * Compute the average gap length of the cluster
154  clwdth = abs(w(clend)-w(clstrt)) + werr(clend) + werr(clstrt)
155  avgap = clwdth / real(clend-clstrt)
156  mingap = min(clgapl, clgapr)
157 
158 * Initial values for shifts to both ends of cluster
159  lsigma = min(w( clstrt ),w( clend )) - werr( clstrt )
160  rsigma = max(w( clstrt ),w( clend )) + werr( clend )
161  msigma1 = w( clmid1 ) + werr( clmid1 )
162  msigma2 = w( clmid2 ) - werr( clmid2 )
163 
164 * Use a small fudge to make sure that we really shift to the outside
165  lsigma = lsigma - abs(lsigma)* two * eps
166  rsigma = rsigma + abs(rsigma)* two * eps
167 
168 * Compute upper bounds for how much to back off the initial shifts
169  ldmax = quart * mingap + two * pivmin
170  rdmax = quart * mingap + two * pivmin
171 
172  ldelta = max(avgap,wgap( clstrt ))/fact
173  rdelta = max(avgap,wgap( clend-1 ))/fact
174 *
175 * Initialize the record of the best representation found
176 *
177  s = slamch( 'S' )
178  leastgrowth = one / s
179  fail = real(n-1)*mingap/(spdiam*eps)
180  fail2 = real(n-1)*mingap/(spdiam*sqrt(eps))
181  growthbound = maxgrowth1*spdiam
182 
183 *
184 * Set default best shift
185 *
186  bestshift = lsigma
187 
188 
189  IF(.NOT.trymid) GOTO 4
190 *
191 * Try shifts in the middle
192 *
193  shift = smid
194 
195  DO 3 j=1,2
196  sawnan1 = .false.
197  IF(j.EQ.1) THEN
198 * Try left middle point
199  sigma = msigma1
200  ELSE
201 * Try left middle point
202  sigma = msigma2
203  ENDIF
204 
205  s = -sigma
206  dplus( 1 ) = d( 1 ) + s
207  max1 = abs( dplus( 1 ) )
208  DO 2 bi = 1, n-1, blklen
209  DO 1 i = bi, min( bi+blklen-1, n-1)
210  lplus( i ) = ld( i ) / dplus( i )
211  s = s*lplus( i )*l( i ) - sigma
212  dplus( i+1 ) = d( i+1 ) + s
213  max1 = max( max1,abs(dplus(i+1)) )
214  1 CONTINUE
215  sawnan1=sawnan1 .OR. sisnan(max1)
216  IF (sawnan1) GOTO 3
217  2 CONTINUE
218 
219  IF( .NOT.sawnan1 ) THEN
220  IF( max1.LE.growthbound ) THEN
221  GOTO 100
222  ELSE IF( max1.LE.leastgrowth ) THEN
223  leastgrowth = max1
224  bestshift = sigma
225  ENDIF
226  ENDIF
227  3 CONTINUE
228 
229 
230  4 CONTINUE
231 *
232 * Shifts in the middle not tried or not succeeded
233 * Find best shift on the outside of the cluster
234 *
235 * while (KTRY <= KTRYMAX)
236  ktry = 0
237 *
238 *
239 *
240  5 CONTINUE
241 
242 * Compute element growth when shifting to both ends of the cluster
243 * accept shift if there is no element growth at one of the two ends
244 
245 * Left end
246  sawnan1 = .false.
247  s = -lsigma
248  dplus( 1 ) = d( 1 ) + s
249  max1 = abs( dplus( 1 ) )
250  DO 12 bi = 1, n-1, blklen
251  DO 11 i = bi, min( bi+blklen-1, n-1)
252  lplus( i ) = ld( i ) / dplus( i )
253  s = s*lplus( i )*l( i ) - lsigma
254  dplus( i+1 ) = d( i+1 ) + s
255  max1 = max( max1,abs(dplus(i+1)) )
256  11 CONTINUE
257  sawnan1=sawnan1 .OR. sisnan(max1)
258  IF (sawnan1) GOTO 13
259  12 CONTINUE
260  IF( .NOT.sawnan1 ) THEN
261  IF( max1.LE.growthbound ) THEN
262  sigma = lsigma
263  shift = sleft
264  GOTO 100
265  ELSE IF( max1.LE.leastgrowth ) THEN
266  leastgrowth = max1
267  bestshift = lsigma
268  ENDIF
269  ENDIF
270  13 CONTINUE
271 
272 * Right end
273  sawnan2 = .false.
274  s = -rsigma
275  work( 1 ) = d( 1 ) + s
276  max2 = abs( work( 1 ) )
277  DO 22 bi = 1, n-1, blklen
278  DO 21 i = bi, min( bi+blklen-1, n-1)
279  work( n+i ) = ld( i ) / work( i )
280  s = s*work( n+i )*l( i ) - rsigma
281  work( i+1 ) = d( i+1 ) + s
282  max2 = max( max2,abs(work(i+1)) )
283  21 CONTINUE
284  sawnan2=sawnan2 .OR. sisnan(max2)
285  IF (sawnan2) GOTO 23
286  22 CONTINUE
287  IF( .NOT.sawnan2 ) THEN
288  IF( max2.LE.growthbound ) THEN
289  sigma = rsigma
290  shift = sright
291  GOTO 100
292  ELSE IF( max2.LE.leastgrowth ) THEN
293  leastgrowth = max2
294  bestshift = rsigma
295  ENDIF
296  ENDIF
297  23 CONTINUE
298 
299 * If we are at this point, both shifts led to too much element growth
300 
301  50 CONTINUE
302 
303  IF (ktry.LT.ktrymax) THEN
304 * If we are here, both shifts failed also the RRR test.
305 * Back off to the outside
306  lsigma = max( lsigma - ldelta,
307  $ lsigma - ldmax)
308  rsigma = min( rsigma + rdelta,
309  $ rsigma + rdmax )
310  ldelta = two * ldelta
311  rdelta = two * rdelta
312 * Ensure that we do not back off too much of the initial shifts
313  ldelta = min(ldmax,ldelta)
314  rdelta = min(rdmax,rdelta)
315  ktry = ktry + 1
316  GOTO 5
317  ELSE
318 * None of the representations investigated satisfied our
319 * criteria. Take the best one we found.
320  IF((leastgrowth.LT.fail).OR.nofail) THEN
321  lsigma = bestshift
322  sawnan1 = .false.
323  s = -lsigma
324  dplus( 1 ) = d( 1 ) + s
325  DO 6 i = 1, n - 1
326  lplus( i ) = ld( i ) / dplus( i )
327  s = s*lplus( i )*l( i ) - lsigma
328  dplus( i+1 ) = d( i+1 ) + s
329  IF(abs(dplus(i+1)).LT.pivmin) THEN
330  dplus(i+1) = -pivmin
331  ENDIF
332  6 CONTINUE
333  sigma = lsigma
334  shift = sleft
335  GOTO 100
336  ELSE
337  info = 1
338  RETURN
339  ENDIF
340  END IF
341 
342  100 CONTINUE
343  IF (shift.EQ.sleft .OR. shift.EQ.smid ) THEN
344  ELSEIF (shift.EQ.sright) THEN
345 * store new L and D back into DPLUS, LPLUS
346  CALL scopy( n, work, 1, dplus, 1 )
347  CALL scopy( n-1, work(n+1), 1, lplus, 1 )
348  ENDIF
349 
350  RETURN
351 *
352 * End of SLARRF2
353 *
354  END
max
#define max(A, B)
Definition: pcgemr.c:180
slarrf2
subroutine slarrf2(N, D, L, LD, CLSTRT, CLEND, CLMID1, CLMID2, W, WGAP, WERR, TRYMID, SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, DPLUS, LPLUS, WORK, INFO)
Definition: slarrf2.f:5
min
#define min(A, B)
Definition: pcgemr.c:181