ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
slarrb2.f
Go to the documentation of this file.
1  SUBROUTINE slarrb2( N, D, LLD, IFIRST, ILAST, RTOL1,
2  $ RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK,
3  $ PIVMIN, LGPVMN, LGSPDM, TWIST, INFO )
4 *
5 * -- ScaLAPACK auxiliary routine (version 2.0) --
6 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
7 * July 4, 2010
8 *
9  IMPLICIT NONE
10 *
11 * .. Scalar Arguments ..
12  INTEGER IFIRST, ILAST, INFO, N, OFFSET, TWIST
13  REAL LGPVMN, LGSPDM, PIVMIN,
14  $ rtol1, rtol2
15 * ..
16 * .. Array Arguments ..
17  INTEGER IWORK( * )
18  REAL D( * ), LLD( * ), W( * ),
19  $ werr( * ), wgap( * ), work( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * Given the relatively robust representation(RRR) L D L^T, SLARRB2
26 * does "limited" bisection to refine the eigenvalues of L D L^T,
27 * W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial
28 * guesses for these eigenvalues are input in W, the corresponding estimate
29 * of the error in these guesses and their gaps are input in WERR
30 * and WGAP, respectively. During bisection, intervals
31 * [left, right] are maintained by storing their mid-points and
32 * semi-widths in the arrays W and WERR respectively.
33 *
34 * NOTE:
35 * There are very few minor differences between SLARRB from LAPACK
36 * and this current subroutine SLARRB2.
37 * The most important reason for creating this nearly identical copy
38 * is profiling: in the ScaLAPACK MRRR algorithm, eigenvalue computation
39 * using SLARRB2 is used for refinement in the construction of
40 * the representation tree, as opposed to the initial computation of the
41 * eigenvalues for the root RRR which uses SLARRB. When profiling,
42 * this allows an easy quantification of refinement work vs. computing
43 * eigenvalues of the root.
44 *
45 * Arguments
46 * =========
47 *
48 * N (input) INTEGER
49 * The order of the matrix.
50 *
51 * D (input) REAL array, dimension (N)
52 * The N diagonal elements of the diagonal matrix D.
53 *
54 * LLD (input) REAL array, dimension (N-1)
55 * The (N-1) elements L(i)*L(i)*D(i).
56 *
57 * IFIRST (input) INTEGER
58 * The index of the first eigenvalue to be computed.
59 *
60 * ILAST (input) INTEGER
61 * The index of the last eigenvalue to be computed.
62 *
63 * RTOL1 (input) REAL
64 * RTOL2 (input) REAL
65 * Tolerance for the convergence of the bisection intervals.
66 * An interval [LEFT,RIGHT] has converged if
67 * RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
68 * where GAP is the (estimated) distance to the nearest
69 * eigenvalue.
70 *
71 * OFFSET (input) INTEGER
72 * Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET
73 * through ILAST-OFFSET elements of these arrays are to be used.
74 *
75 * W (input/output) REAL array, dimension (N)
76 * On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are
77 * estimates of the eigenvalues of L D L^T indexed IFIRST through ILAST.
78 * On output, these estimates are refined.
79 *
80 * WGAP (input/output) REAL array, dimension (N-1)
81 * On input, the (estimated) gaps between consecutive
82 * eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between
83 * eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST
84 * then WGAP(IFIRST-OFFSET) must be set to ZERO.
85 * On output, these gaps are refined.
86 *
87 * WERR (input/output) REAL array, dimension (N)
88 * On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are
89 * the errors in the estimates of the corresponding elements in W.
90 * On output, these errors are refined.
91 *
92 * WORK (workspace) REAL array, dimension (4*N)
93 * Workspace.
94 *
95 * IWORK (workspace) INTEGER array, dimension (2*N)
96 * Workspace.
97 *
98 * PIVMIN (input) REAL
99 * The minimum pivot in the sturm sequence.
100 *
101 * LGPVMN (input) REAL
102 * Logarithm of PIVMIN, precomputed.
103 *
104 * LGSPDM (input) REAL
105 * Logarithm of the spectral diameter, precomputed.
106 *
107 * TWIST (input) INTEGER
108 * The twist index for the twisted factorization that is used
109 * for the negcount.
110 * TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T
111 * TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T
112 * TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r)
113 *
114 * INFO (output) INTEGER
115 * Error flag.
116 *
117 * .. Parameters ..
118  REAL ZERO, TWO, HALF
119  PARAMETER ( ZERO = 0.0e0, two = 2.0e0,
120  $ half = 0.5e0 )
121  INTEGER MAXITR
122 * ..
123 * .. Local Scalars ..
124  INTEGER I, I1, II, INDLLD, IP, ITER, J, K, NEGCNT,
125  $ NEXT, NINT, OLNINT, PREV, R
126  REAL BACK, CVRGD, GAP, LEFT, LGAP, MID, MNWDTH,
127  $ RGAP, RIGHT, SAVGAP, TMP, WIDTH
128  LOGICAL PARANOID
129 * ..
130 * .. External Functions ..
131  LOGICAL SISNAN
132  REAL SLAMCH
133  INTEGER SLANEG2A
134  EXTERNAL sisnan, slamch,
135  $ slaneg2a
136 *
137 * ..
138 * .. Intrinsic Functions ..
139  INTRINSIC abs, max, min
140 * ..
141 * .. Executable Statements ..
142 *
143  info = 0
144 *
145 * Turn on paranoid check for rounding errors
146 * invalidating uncertainty intervals of eigenvalues
147 *
148  paranoid = .true.
149 *
150  maxitr = int( ( lgspdm - lgpvmn ) / log( two ) ) + 2
151  mnwdth = two * pivmin
152 *
153  r = twist
154 *
155  indlld = 2*n
156  DO 5 j = 1, n-1
157  i=2*j
158  work(indlld+i-1) = d(j)
159  work(indlld+i) = lld(j)
160  5 CONTINUE
161  work(indlld+2*n-1) = d(n)
162 *
163  IF((r.LT.1).OR.(r.GT.n)) r = n
164 *
165 * Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ].
166 * The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while
167 * Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 )
168 * for an unconverged interval is set to the index of the next unconverged
169 * interval, and is -1 or 0 for a converged interval. Thus a linked
170 * list of unconverged intervals is set up.
171 *
172  i1 = ifirst
173 * The number of unconverged intervals
174  nint = 0
175 * The last unconverged interval found
176  prev = 0
177 
178  rgap = wgap( i1-offset )
179  DO 75 i = i1, ilast
180  k = 2*i
181  ii = i - offset
182  left = w( ii ) - werr( ii )
183  right = w( ii ) + werr( ii )
184  lgap = rgap
185  rgap = wgap( ii )
186  gap = min( lgap, rgap )
187 
188  IF((abs(left).LE.16*pivmin).OR.(abs(right).LE.16*pivmin))
189  $ THEN
190  info = -1
191  RETURN
192  ENDIF
193 
194  IF( paranoid ) THEN
195 * Make sure that [LEFT,RIGHT] contains the desired eigenvalue
196 * Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT
197 *
198 * Do while( NEGCNT(LEFT).GT.I-1 )
199 *
200  back = werr( ii )
201  20 CONTINUE
202  negcnt = slaneg2a( n, work(indlld+1), left, pivmin, r )
203  IF( negcnt.GT.i-1 ) THEN
204  left = left - back
205  back = two*back
206  GO TO 20
207  END IF
208 *
209 * Do while( NEGCNT(RIGHT).LT.I )
210 * Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT
211 *
212  back = werr( ii )
213  50 CONTINUE
214  negcnt = slaneg2a( n, work(indlld+1),right, pivmin, r )
215 
216  IF( negcnt.LT.i ) THEN
217  right = right + back
218  back = two*back
219  GO TO 50
220  END IF
221  ENDIF
222 
223  width = half*abs( left - right )
224  tmp = max( abs( left ), abs( right ) )
225  cvrgd = max(rtol1*gap,rtol2*tmp)
226  IF( width.LE.cvrgd .OR. width.LE.mnwdth ) THEN
227 * This interval has already converged and does not need refinement.
228 * (Note that the gaps might change through refining the
229 * eigenvalues, however, they can only get bigger.)
230 * Remove it from the list.
231  iwork( k-1 ) = -1
232 * Make sure that I1 always points to the first unconverged interval
233  IF((i.EQ.i1).AND.(i.LT.ilast)) i1 = i + 1
234  IF((prev.GE.i1).AND.(i.LE.ilast)) iwork( 2*prev-1 ) = i + 1
235  ELSE
236 * unconverged interval found
237  prev = i
238  nint = nint + 1
239  iwork( k-1 ) = i + 1
240  iwork( k ) = negcnt
241  END IF
242  work( k-1 ) = left
243  work( k ) = right
244  75 CONTINUE
245 
246 *
247 * Do while( NINT.GT.0 ), i.e. there are still unconverged intervals
248 * and while (ITER.LT.MAXITR)
249 *
250  iter = 0
251  80 CONTINUE
252  prev = i1 - 1
253  i = i1
254  olnint = nint
255 
256  DO 100 ip = 1, olnint
257  k = 2*i
258  ii = i - offset
259  rgap = wgap( ii )
260  lgap = rgap
261  IF(ii.GT.1) lgap = wgap( ii-1 )
262  gap = min( lgap, rgap )
263  next = iwork( k-1 )
264  left = work( k-1 )
265  right = work( k )
266  mid = half*( left + right )
267 * semiwidth of interval
268  width = right - mid
269  tmp = max( abs( left ), abs( right ) )
270  cvrgd = max(rtol1*gap,rtol2*tmp)
271  IF( ( width.LE.cvrgd ) .OR. ( width.LE.mnwdth ).OR.
272  $ ( iter.EQ.maxitr ) )THEN
273 * reduce number of unconverged intervals
274  nint = nint - 1
275 * Mark interval as converged.
276  iwork( k-1 ) = 0
277  IF( i1.EQ.i ) THEN
278  i1 = next
279  ELSE
280 * Prev holds the last unconverged interval previously examined
281  IF(prev.GE.i1) iwork( 2*prev-1 ) = next
282  END IF
283  i = next
284  GO TO 100
285  END IF
286  prev = i
287 *
288 * Perform one bisection step
289 *
290  negcnt = slaneg2a( n, work(indlld+1), mid, pivmin, r )
291  IF( negcnt.LE.i-1 ) THEN
292  work( k-1 ) = mid
293  ELSE
294  work( k ) = mid
295  END IF
296  i = next
297  100 CONTINUE
298  iter = iter + 1
299 * do another loop if there are still unconverged intervals
300 * However, in the last iteration, all intervals are accepted
301 * since this is the best we can do.
302  IF( ( nint.GT.0 ).AND.(iter.LE.maxitr) ) GO TO 80
303 *
304 *
305 * At this point, all the intervals have converged
306 *
307 * save this gap to restore it after the loop
308  savgap = wgap( ilast-offset )
309 *
310  left = work( 2*ifirst-1 )
311  DO 110 i = ifirst, ilast
312  k = 2*i
313  ii = i - offset
314 * RIGHT is the right boundary of this current interval
315  right = work( k )
316 * All intervals marked by '0' have been refined.
317  IF( iwork( k-1 ).EQ.0 ) THEN
318  w( ii ) = half*( left+right )
319  werr( ii ) = right - w( ii )
320  END IF
321 * Left is the boundary of the next interval
322  left = work( k +1 )
323  wgap( ii ) = max( zero, left - right )
324  110 CONTINUE
325 * restore the last gap which was overwritten by garbage
326  wgap( ilast-offset ) = savgap
327 
328  RETURN
329 *
330 * End of SLARRB2
331 *
332  END
333 *
334 *
335 *
336  FUNCTION slaneg2( N, D, LLD, SIGMA, PIVMIN, R )
337 *
338  IMPLICIT NONE
339 *
340  INTEGER slaneg2
341 *
342 * .. Scalar Arguments ..
343  INTEGER n, r
344  REAL pivmin, sigma
345 * ..
346 * .. Array Arguments ..
347  REAL d( * ), lld( * )
348 *
349  REAL zero
350  PARAMETER ( zero = 0.0e0 )
351 
352  INTEGER blklen
353  PARAMETER ( blklen = 2048 )
354 * ..
355 * .. Local Scalars ..
356  INTEGER bj, j, neg1, neg2, negcnt, to
357  REAL dminus, dplus, gamma, p, s, t, tmp, xsav
358  LOGICAL sawnan
359 * ..
360 * .. External Functions ..
361  LOGICAL sisnan
362  EXTERNAL sisnan
363 
364  negcnt = 0
365 *
366 * I) upper part: L D L^T - SIGMA I = L+ D+ L+^T
367 * run dstqds block-wise to avoid excessive work when NaNs occur
368 *
369  s = zero
370  DO 210 bj = 1, r-1, blklen
371  neg1 = 0
372  xsav = s
373  to = bj+blklen-1
374  IF ( to.LE.r-1 ) THEN
375  DO 21 j = bj, to
376  t = s - sigma
377  dplus = d( j ) + t
378  IF( dplus.LT.zero ) neg1=neg1 + 1
379  s = t*lld( j ) / dplus
380  21 CONTINUE
381  ELSE
382  DO 22 j = bj, r-1
383  t = s - sigma
384  dplus = d( j ) + t
385  IF( dplus.LT.zero ) neg1=neg1 + 1
386  s = t*lld( j ) / dplus
387  22 CONTINUE
388  ENDIF
389  sawnan = sisnan( s )
390 *
391  IF( sawnan ) THEN
392  neg1 = 0
393  s = xsav
394  to = bj+blklen-1
395  IF ( to.LE.r-1 ) THEN
396  DO 23 j = bj, to
397  t = s - sigma
398  dplus = d( j ) + t
399  IF(abs(dplus).LT.pivmin)
400  $ dplus = -pivmin
401  tmp = lld( j ) / dplus
402  IF( dplus.LT.zero )
403  $ neg1 = neg1 + 1
404  s = t*tmp
405  IF( tmp.EQ.zero ) s = lld( j )
406  23 CONTINUE
407  ELSE
408  DO 24 j = bj, r-1
409  t = s - sigma
410  dplus = d( j ) + t
411  IF(abs(dplus).LT.pivmin)
412  $ dplus = -pivmin
413  tmp = lld( j ) / dplus
414  IF( dplus.LT.zero ) neg1=neg1+1
415  s = t*tmp
416  IF( tmp.EQ.zero ) s = lld( j )
417  24 CONTINUE
418  ENDIF
419  END IF
420  negcnt = negcnt + neg1
421  210 CONTINUE
422 *
423 * II) lower part: L D L^T - SIGMA I = U- D- U-^T
424 *
425  p = d( n ) - sigma
426  DO 230 bj = n-1, r, -blklen
427  neg2 = 0
428  xsav = p
429  to = bj-blklen+1
430  IF ( to.GE.r ) THEN
431  DO 25 j = bj, to, -1
432  dminus = lld( j ) + p
433  IF( dminus.LT.zero ) neg2=neg2+1
434  tmp = p / dminus
435  p = tmp * d( j ) - sigma
436  25 CONTINUE
437  ELSE
438  DO 26 j = bj, r, -1
439  dminus = lld( j ) + p
440  IF( dminus.LT.zero ) neg2=neg2+1
441  tmp = p / dminus
442  p = tmp * d( j ) - sigma
443  26 CONTINUE
444  ENDIF
445  sawnan = sisnan( p )
446 *
447  IF( sawnan ) THEN
448  neg2 = 0
449  p = xsav
450  to = bj-blklen+1
451  IF ( to.GE.r ) THEN
452  DO 27 j = bj, to, -1
453  dminus = lld( j ) + p
454  IF(abs(dminus).LT.pivmin)
455  $ dminus = -pivmin
456  tmp = d( j ) / dminus
457  IF( dminus.LT.zero )
458  $ neg2 = neg2 + 1
459  p = p*tmp - sigma
460  IF( tmp.EQ.zero )
461  $ p = d( j ) - sigma
462  27 CONTINUE
463  ELSE
464  DO 28 j = bj, r, -1
465  dminus = lld( j ) + p
466  IF(abs(dminus).LT.pivmin)
467  $ dminus = -pivmin
468  tmp = d( j ) / dminus
469  IF( dminus.LT.zero )
470  $ neg2 = neg2 + 1
471  p = p*tmp - sigma
472  IF( tmp.EQ.zero )
473  $ p = d( j ) - sigma
474  28 CONTINUE
475  ENDIF
476  END IF
477  negcnt = negcnt + neg2
478  230 CONTINUE
479 *
480 * III) Twist index
481 *
482  gamma = s + p
483  IF( gamma.LT.zero ) negcnt = negcnt+1
484 
485  slaneg2 = negcnt
486  END
487 *
488 *
489 *
490  FUNCTION slaneg2a( N, DLLD, SIGMA, PIVMIN, R )
491 *
492  IMPLICIT NONE
493 *
494  INTEGER slaneg2a
495 *
496 * .. Scalar Arguments ..
497  INTEGER n, r
498  REAL pivmin, sigma
499 * ..
500 * .. Array Arguments ..
501  REAL dlld( * )
502 *
503  REAL zero
504  PARAMETER ( zero = 0.0e0 )
505 
506  INTEGER blklen
507  PARAMETER ( blklen = 512 )
508 *
509 * ..
510 * .. Intrinsic Functions ..
511  INTRINSIC int
512 * ..
513 * .. Local Scalars ..
514  INTEGER bj, i, j, nb, neg1, neg2, negcnt, nx
515  REAL dminus, dplus, gamma, p, s, t, tmp, xsav
516  LOGICAL sawnan
517 * ..
518 * .. External Functions ..
519  LOGICAL sisnan
520  EXTERNAL sisnan
521 
522  negcnt = 0
523 *
524 * I) upper part: L D L^T - SIGMA I = L+ D+ L+^T
525 * run dstqds block-wise to avoid excessive work when NaNs occur,
526 * first in chunks of size BLKLEN and then the remainder
527 *
528  nb = int((r-1)/blklen)
529  nx = nb*blklen
530  s = zero
531  DO 210 bj = 1, nx, blklen
532  neg1 = 0
533  xsav = s
534  DO 21 j = bj, bj+blklen-1
535  i = 2*j
536  t = s - sigma
537  dplus = dlld( i-1 ) + t
538  IF( dplus.LT.zero ) neg1=neg1 + 1
539  s = t*dlld( i ) / dplus
540  21 CONTINUE
541  sawnan = sisnan( s )
542 *
543  IF( sawnan ) THEN
544  neg1 = 0
545  s = xsav
546  DO 23 j = bj, bj+blklen-1
547  i = 2*j
548  t = s - sigma
549  dplus = dlld( i-1 ) + t
550  IF(abs(dplus).LT.pivmin)
551  $ dplus = -pivmin
552  tmp = dlld( i ) / dplus
553  IF( dplus.LT.zero )
554  $ neg1 = neg1 + 1
555  s = t*tmp
556  IF( tmp.EQ.zero ) s = dlld( i )
557  23 CONTINUE
558  END IF
559  negcnt = negcnt + neg1
560  210 CONTINUE
561 *
562  neg1 = 0
563  xsav = s
564  DO 22 j = nx+1, r-1
565  i = 2*j
566  t = s - sigma
567  dplus = dlld( i-1 ) + t
568  IF( dplus.LT.zero ) neg1=neg1 + 1
569  s = t*dlld( i ) / dplus
570  22 CONTINUE
571  sawnan = sisnan( s )
572 *
573  IF( sawnan ) THEN
574  neg1 = 0
575  s = xsav
576  DO 24 j = nx+1, r-1
577  i = 2*j
578  t = s - sigma
579  dplus = dlld( i-1 ) + t
580  IF(abs(dplus).LT.pivmin)
581  $ dplus = -pivmin
582  tmp = dlld( i ) / dplus
583  IF( dplus.LT.zero ) neg1=neg1+1
584  s = t*tmp
585  IF( tmp.EQ.zero ) s = dlld( i )
586  24 CONTINUE
587  ENDIF
588  negcnt = negcnt + neg1
589 *
590 * II) lower part: L D L^T - SIGMA I = U- D- U-^T
591 *
592  nb = int((n-r)/blklen)
593  nx = n-nb*blklen
594  p = dlld( 2*n-1 ) - sigma
595  DO 230 bj = n-1, nx, -blklen
596  neg2 = 0
597  xsav = p
598  DO 25 j = bj, bj-blklen+1, -1
599  i = 2*j
600  dminus = dlld( i ) + p
601  IF( dminus.LT.zero ) neg2=neg2+1
602  tmp = p / dminus
603  p = tmp * dlld( i-1 ) - sigma
604  25 CONTINUE
605  sawnan = sisnan( p )
606 *
607  IF( sawnan ) THEN
608  neg2 = 0
609  p = xsav
610  DO 27 j = bj, bj-blklen+1, -1
611  i = 2*j
612  dminus = dlld( i ) + p
613  IF(abs(dminus).LT.pivmin)
614  $ dminus = -pivmin
615  tmp = dlld( i-1 ) / dminus
616  IF( dminus.LT.zero )
617  $ neg2 = neg2 + 1
618  p = p*tmp - sigma
619  IF( tmp.EQ.zero )
620  $ p = dlld( i-1 ) - sigma
621  27 CONTINUE
622  END IF
623  negcnt = negcnt + neg2
624  230 CONTINUE
625 
626  neg2 = 0
627  xsav = p
628  DO 26 j = nx-1, r, -1
629  i = 2*j
630  dminus = dlld( i ) + p
631  IF( dminus.LT.zero ) neg2=neg2+1
632  tmp = p / dminus
633  p = tmp * dlld( i-1 ) - sigma
634  26 CONTINUE
635  sawnan = sisnan( p )
636 *
637  IF( sawnan ) THEN
638  neg2 = 0
639  p = xsav
640  DO 28 j = nx-1, r, -1
641  i = 2*j
642  dminus = dlld( i ) + p
643  IF(abs(dminus).LT.pivmin)
644  $ dminus = -pivmin
645  tmp = dlld( i-1 ) / dminus
646  IF( dminus.LT.zero )
647  $ neg2 = neg2 + 1
648  p = p*tmp - sigma
649  IF( tmp.EQ.zero )
650  $ p = dlld( i-1 ) - sigma
651  28 CONTINUE
652  END IF
653  negcnt = negcnt + neg2
654 *
655 * III) Twist index
656 *
657  gamma = s + p
658  IF( gamma.LT.zero ) negcnt = negcnt+1
659 
660  slaneg2a = negcnt
661  END
662 
max
#define max(A, B)
Definition: pcgemr.c:180
slaneg2
integer function slaneg2(N, D, LLD, SIGMA, PIVMIN, R)
Definition: slarrb2.f:337
slarrb2
subroutine slarrb2(N, D, LLD, IFIRST, ILAST, RTOL1, RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK, PIVMIN, LGPVMN, LGSPDM, TWIST, INFO)
Definition: slarrb2.f:4
slaneg2a
integer function slaneg2a(N, DLLD, SIGMA, PIVMIN, R)
Definition: slarrb2.f:491
min
#define min(A, B)
Definition: pcgemr.c:181