SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ dlarrv2()

subroutine dlarrv2 ( integer  n,
double precision  vl,
double precision  vu,
double precision, dimension( * )  d,
double precision, dimension( * )  l,
double precision  pivmin,
integer, dimension( * )  isplit,
integer  m,
integer  dol,
integer  dou,
integer  needil,
integer  neediu,
double precision  minrgp,
double precision  rtol1,
double precision  rtol2,
double precision, dimension( * )  w,
double precision, dimension( * )  werr,
double precision, dimension( * )  wgap,
integer, dimension( * )  iblock,
integer, dimension( * )  indexw,
double precision, dimension( * )  gers,
double precision, dimension( * )  sdiam,
double precision, dimension( ldz, * )  z,
integer  ldz,
integer, dimension( * )  isuppz,
double precision, dimension( * )  work,
integer, dimension( * )  iwork,
logical  vstart,
logical  finish,
integer  maxcls,
integer  ndepth,
integer  parity,
integer  zoffset,
integer  info 
)

Definition at line 1 of file dlarrv2.f.

8
9* -- ScaLAPACK auxiliary routine (version 2.0) --
10* Univ. of Tennessee, Univ. of California Berkeley, Univ of Colorado Denver
11* July 4, 2010
12*
13 IMPLICIT NONE
14*
15* .. Scalar Arguments ..
16 INTEGER DOL, DOU, INFO, LDZ, M, N, MAXCLS,
17 $ NDEPTH, NEEDIL, NEEDIU, PARITY, ZOFFSET
18 DOUBLE PRECISION MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU
19 LOGICAL VSTART, FINISH
20* ..
21* .. Array Arguments ..
22 INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ),
23 $ ISUPPZ( * ), IWORK( * )
24 DOUBLE PRECISION D( * ), GERS( * ), L( * ), SDIAM( * ),
25 $ W( * ), WERR( * ),
26 $ WGAP( * ), WORK( * )
27 DOUBLE PRECISION Z( LDZ, * )
28*
29* Purpose
30* =======
31*
32* DLARRV2 computes the eigenvectors of the tridiagonal matrix
33* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T.
34* The input eigenvalues should have been computed by DLARRE2A
35* or by precious calls to DLARRV2.
36*
37* The major difference between the parallel and the sequential construction
38* of the representation tree is that in the parallel case, not all eigenvalues
39* of a given cluster might be computed locally. Other processors might "own"
40* and refine part of an eigenvalue cluster. This is crucial for scalability.
41* Thus there might be communication necessary before the current level of the
42* representation tree can be parsed.
43*
44* Please note:
45* 1. The calling sequence has two additional INTEGER parameters,
46* DOL and DOU, that should satisfy M>=DOU>=DOL>=1.
47* These parameters are only relevant for the case JOBZ = 'V'.
48* DLARRV2 ONLY computes the eigenVECTORS
49* corresponding to eigenvalues DOL through DOU in W. (That is,
50* instead of computing the eigenvectors belonging to W(1)
51* through W(M), only the eigenvectors belonging to eigenvalues
52* W(DOL) through W(DOU) are computed. In this case, only the
53* eigenvalues DOL:DOU are guaranteed to be accurately refined
54* to all figures by Rayleigh-Quotient iteration.
55*
56* 2. The additional arguments VSTART, FINISH, NDEPTH, PARITY, ZOFFSET
57* are included as a thread-safe implementation equivalent to SAVE variables.
58* These variables store details about the local representation tree which is
59* computed layerwise. For scalability reasons, eigenvalues belonging to the
60* locally relevant representation tree might be computed on other processors.
61* These need to be communicated before the inspection of the RRRs can proceed
62* on any given layer.
63* Note that only when the variable FINISH is true, the computation has ended
64* All eigenpairs between DOL and DOU have been computed. M is set = DOU - DOL + 1.
65*
66* 3. DLARRV2 needs more workspace in Z than the sequential DLARRV.
67* It is used to store the conformal embedding of the local representation tree.
68*
69* Arguments
70* =========
71*
72* N (input) INTEGER
73* The order of the matrix. N >= 0.
74*
75* VL (input) DOUBLE PRECISION
76* VU (input) DOUBLE PRECISION
77* Lower and upper bounds of the interval that contains the desired
78* eigenvalues. VL < VU. Needed to compute gaps on the left or right
79* end of the extremal eigenvalues in the desired RANGE.
80* VU is currently not used but kept as parameter in case needed.
81*
82* D (input/output) DOUBLE PRECISION array, dimension (N)
83* On entry, the N diagonal elements of the diagonal matrix D.
84* On exit, D is overwritten.
85*
86* L (input/output) DOUBLE PRECISION array, dimension (N)
87* On entry, the (N-1) subdiagonal elements of the unit
88* bidiagonal matrix L are in elements 1 to N-1 of L
89* (if the matrix is not splitted.) At the end of each block
90* is stored the corresponding shift as given by DLARRE.
91* On exit, L is overwritten.
92*
93* PIVMIN (in) DOUBLE PRECISION
94* The minimum pivot allowed in the sturm sequence.
95*
96* ISPLIT (input) INTEGER array, dimension (N)
97* The splitting points, at which T breaks up into blocks.
98* The first block consists of rows/columns 1 to
99* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
100* through ISPLIT( 2 ), etc.
101*
102* M (input) INTEGER
103* The total number of input eigenvalues. 0 <= M <= N.
104*
105* DOL (input) INTEGER
106* DOU (input) INTEGER
107* If the user wants to compute only selected eigenvectors from all
108* the eigenvalues supplied, he can specify an index range DOL:DOU.
109* Or else the setting DOL=1, DOU=M should be applied.
110* Note that DOL and DOU refer to the order in which the eigenvalues
111* are stored in W.
112* If the user wants to compute only selected eigenpairs, then
113* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the
114* computed eigenvectors. All other columns of Z are set to zero.
115* If DOL > 1, then Z(:,DOL-1-ZOFFSET) is used.
116* If DOU < M, then Z(:,DOU+1-ZOFFSET) is used.
117*
118*
119* NEEDIL (input/output) INTEGER
120* NEEDIU (input/output) INTEGER
121* Describe which are the left and right outermost eigenvalues
122* that still need to be included in the computation. These indices
123* indicate whether eigenvalues from other processors are needed to
124* correctly compute the conformally embedded representation tree.
125* When DOL<=NEEDIL<=NEEDIU<=DOU, all required eigenvalues are local
126* to the processor and no communication is required to compute its
127* part of the representation tree.
128*
129* MINRGP (input) DOUBLE PRECISION
130* The minimum relativ gap threshold to decide whether an eigenvalue
131* or a cluster boundary is reached.
132*
133* RTOL1 (input) DOUBLE PRECISION
134* RTOL2 (input) DOUBLE PRECISION
135* Parameters for bisection.
136* An interval [LEFT,RIGHT] has converged if
137* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
138*
139* W (input/output) DOUBLE PRECISION array, dimension (N)
140* The first M elements of W contain the APPROXIMATE eigenvalues for
141* which eigenvectors are to be computed. The eigenvalues
142* should be grouped by split-off block and ordered from
143* smallest to largest within the block. (The output array
144* W from DSTEGR2A is expected here.) Furthermore, they are with
145* respect to the shift of the corresponding root representation
146* for their block. On exit,
147* W holds those UNshifted eigenvalues
148* for which eigenvectors have already been computed.
149*
150* WERR (input/output) DOUBLE PRECISION array, dimension (N)
151* The first M elements contain the semiwidth of the uncertainty
152* interval of the corresponding eigenvalue in W
153*
154* WGAP (input/output) DOUBLE PRECISION array, dimension (N)
155* The separation from the right neighbor eigenvalue in W.
156*
157* IBLOCK (input) INTEGER array, dimension (N)
158* The indices of the blocks (submatrices) associated with the
159* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue
160* W(i) belongs to the first block from the top, =2 if W(i)
161* belongs to the second block, etc.
162*
163* INDEXW (input) INTEGER array, dimension (N)
164* The indices of the eigenvalues within each block (submatrix);
165* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the
166* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block.
167*
168* GERS (input) DOUBLE PRECISION array, dimension (2*N)
169* The N Gerschgorin intervals (the i-th Gerschgorin interval
170* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should
171* be computed from the original UNshifted matrix.
172* Currently NOT used but kept as parameter in case it becomes
173* needed in the future.
174*
175* SDIAM (input) DOUBLE PRECISION array, dimension (N)
176* The spectral diameters for all unreduced blocks.
177*
178* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) )
179* If INFO = 0, the first M columns of Z contain the
180* orthonormal eigenvectors of the matrix T
181* corresponding to the input eigenvalues, with the i-th
182* column of Z holding the eigenvector associated with W(i).
183* In the distributed version, only a subset of columns
184* is accessed, see DOL,DOU and ZOFFSET.
185*
186* LDZ (input) INTEGER
187* The leading dimension of the array Z. LDZ >= 1, and if
188* JOBZ = 'V', LDZ >= max(1,N).
189*
190* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) )
191* The support of the eigenvectors in Z, i.e., the indices
192* indicating the nonzero elements in Z. The I-th eigenvector
193* is nonzero only in elements ISUPPZ( 2*I-1 ) through
194* ISUPPZ( 2*I ).
195*
196* WORK (workspace) DOUBLE PRECISION array, dimension (12*N)
197*
198* IWORK (workspace) INTEGER array, dimension (7*N)
199*
200* VSTART (input/output) LOGICAL
201* .TRUE. on initialization, set to .FALSE. afterwards.
202*
203* FINISH (input/output) LOGICAL
204* A flag that indicates whether all eigenpairs have been computed.
205*
206* MAXCLS (input/output) INTEGER
207* The largest cluster worked on by this processor in the
208* representation tree.
209*
210* NDEPTH (input/output) INTEGER
211* The current depth of the representation tree. Set to
212* zero on initial pass, changed when the deeper levels of
213* the representation tree are generated.
214*
215* PARITY (input/output) INTEGER
216* An internal parameter needed for the storage of the
217* clusters on the current level of the representation tree.
218*
219* ZOFFSET (input) INTEGER
220* Offset for storing the eigenpairs when Z is distributed
221* in 1D-cyclic fashion.
222*
223* INFO (output) INTEGER
224* = 0: successful exit
225*
226* > 0: A problem occured in DLARRV2.
227* < 0: One of the called subroutines signaled an internal problem.
228* Needs inspection of the corresponding parameter IINFO
229* for further information.
230*
231* =-1: Problem in DLARRB2 when refining a child's eigenvalues.
232* =-2: Problem in DLARRF2 when computing the RRR of a child.
233* When a child is inside a tight cluster, it can be difficult
234* to find an RRR. A partial remedy from the user's point of
235* view is to make the parameter MINRGP smaller and recompile.
236* However, as the orthogonality of the computed vectors is
237* proportional to 1/MINRGP, the user should be aware that
238* he might be trading in precision when he decreases MINRGP.
239* =-3: Problem in DLARRB2 when refining a single eigenvalue
240* after the Rayleigh correction was rejected.
241* = 5: The Rayleigh Quotient Iteration failed to converge to
242* full accuracy in MAXITR steps.
243*
244* =====================================================================
245*
246* .. Parameters ..
247 INTEGER MAXITR, USE30, USE31, USE32A, USE32B
248 parameter( maxitr = 10, use30=30, use31=31,
249 $ use32a=3210, use32b = 3211 )
250 DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, HALF
251 parameter( zero = 0.0d0, one = 1.0d0,
252 $ two = 2.0d0, three = 3.0d0,
253 $ four = 4.0d0, half = 0.5d0)
254* ..
255* .. Local Arrays ..
256 INTEGER SPLACE( 4 )
257* ..
258* .. Local Scalars ..
259 LOGICAL DELREF, ESKIP, NEEDBS, ONLYLC, STP2II, TRYMID,
260 $ TRYRQC, USEDBS, USEDRQ
261 INTEGER I, IBEGIN, IEND, II, IINCLS, IINDC1, IINDC2,
262 $ IINDWK, IINFO, IM, IN, INDEIG, INDLD, INDLLD,
263 $ INDWRK, ISUPMN, ISUPMX, ITER, ITMP1, ITWIST, J,
264 $ JBLK, K, KK, MINIWSIZE, MINWSIZE, MYWFST,
265 $ MYWLST, NCLUS, NEGCNT, NEWCLS, NEWFST, NEWFTT,
266 $ NEWLST, NEWSIZ, OFFSET, OLDCLS, OLDFST, OLDIEN,
267 $ OLDLST, OLDNCL, P, Q, VRTREE, WBEGIN, WEND,
268 $ WINDEX, WINDMN, WINDPL, ZFROM, ZINDEX, ZTO,
269 $ ZUSEDL, ZUSEDU, ZUSEDW
270 DOUBLE PRECISION AVGAP, BSTRES, BSTW, ENUFGP, EPS, FUDGE, GAP,
271 $ GAPTOL, LAMBDA, LEFT, LGAP, LGPVMN, LGSPDM,
272 $ LOG_IN, MGAP, MINGMA, MYERR, NRMINV, NXTERR,
273 $ ORTOL, RESID, RGAP, RIGHT, RLTL30, RQCORR,
274 $ RQTOL, SAVEGP, SGNDEF, SIGMA, SPDIAM, SSIGMA,
275 $ TAU, TMP, TOL, ZTZ
276* ..
277* .. External Functions ..
278 DOUBLE PRECISION DLAMCH
279 DOUBLE PRECISION DDOT, DNRM2
280 EXTERNAL ddot, dlamch, dnrm2
281* ..
282* .. External Subroutines ..
283 EXTERNAL daxpy, dcopy, dlar1va, dlarrb2,
284 $ dlarrf2, dlaset, dscal
285* ..
286* .. Intrinsic Functions ..
287 INTRINSIC abs, dble, max, min, sqrt
288* ..
289* .. Executable Statements ..
290* ..
291
292
293 info = 0
294* The first N entries of WORK are reserved for the eigenvalues
295 indld = n+1
296 indlld= 2*n+1
297 indwrk= 3*n+1
298 minwsize = 12 * n
299
300* IWORK(IINCLS+JBLK) holds the number of clusters on the current level
301* of the reptree for block JBLK
302 iincls = 0
303* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current
304* layer and the one above.
305 iindc1 = n
306 iindc2 = 2*n
307 iindwk = 3*n + 1
308 miniwsize = 7 * n
309
310 eps = dlamch( 'Precision' )
311 rqtol = two * eps
312
313 tryrqc = .true.
314* Decide which representation tree criterion to use
315* USE30 = Lapack 3.0 criterion
316* USE31 = LAPACK 3.1 criterion
317* USE32A = two criteria, determines singletons with USE31, and groups with avgap.
318* USE32B = two criteria, determines singletons with USE31, and groups with USE30.
319 vrtree = use32a
320*
321 lgpvmn = log( pivmin )
322
323
324 IF(vstart) THEN
325*
326* PREPROCESSING, DONE ONLY IN THE FIRST CALL
327*
328 vstart = .false.
329*
330 maxcls = 1
331
332* Set delayed eigenvalue refinement
333* In order to enable more parallelism, refinement
334* must be done immediately and cannot be delayed until
335* the next representation tree level.
336 delref = .false.
337
338 DO 1 i= 1,minwsize
339 work( i ) = zero
340 1 CONTINUE
341
342 DO 2 i= 1,miniwsize
343 iwork( i ) = 0
344 2 CONTINUE
345
346 zusedl = 1
347 IF(dol.GT.1) THEN
348* Set lower bound for use of Z
349 zusedl = dol-1
350 ENDIF
351 zusedu = m
352 IF(dou.LT.m) THEN
353* Set lower bound for use of Z
354 zusedu = dou+1
355 ENDIF
356* The width of the part of Z that is used
357 zusedw = zusedu - zusedl + 1
358*
359 CALL dlaset( 'Full', n, zusedw, zero, zero,
360 $ z(1,(zusedl-zoffset)), ldz )
361
362* Initialize NDEPTH, the current depth of the representation tree
363 ndepth = 0
364* Initialize parity
365 parity = 1
366
367* Go through blocks, initialize data structures
368 ibegin = 1
369 wbegin = 1
370 DO 10 jblk = 1, iblock( m )
371 iend = isplit( jblk )
372 sigma = l( iend )
373 wend = wbegin - 1
374 3 CONTINUE
375 IF( wend.LT.m ) THEN
376 IF( iblock( wend+1 ).EQ.jblk ) THEN
377 wend = wend + 1
378 GO TO 3
379 END IF
380 END IF
381 IF( wend.LT.wbegin ) THEN
382 iwork( iincls + jblk ) = 0
383 ibegin = iend + 1
384 GO TO 10
385 ELSEIF( (wend.LT.dol).OR.(wbegin.GT.dou) ) THEN
386 iwork( iincls + jblk ) = 0
387 ibegin = iend + 1
388 wbegin = wend + 1
389 GO TO 10
390 END IF
391* The number of eigenvalues in the current block
392 im = wend - wbegin + 1
393* This is for a 1x1 block
394 IF( ibegin.EQ.iend ) THEN
395 iwork( iincls + jblk ) = 0
396 z( ibegin, (wbegin-zoffset) ) = one
397 isuppz( 2*wbegin-1 ) = ibegin
398 isuppz( 2*wbegin ) = ibegin
399 w( wbegin ) = w( wbegin ) + sigma
400 work( wbegin ) = w( wbegin )
401 ibegin = iend + 1
402 wbegin = wbegin + 1
403 GO TO 10
404 END IF
405 CALL dcopy( im, w( wbegin ), 1,
406 & work( wbegin ), 1 )
407* We store in W the eigenvalue approximations w.r.t. the original
408* matrix T.
409 DO 5 i=1,im
410 w(wbegin+i-1) = w(wbegin+i-1)+sigma
411 5 CONTINUE
412
413* Initialize cluster counter for this block
414 iwork( iincls + jblk ) = 1
415 iwork( iindc1+ibegin ) = 1
416 iwork( iindc1+ibegin+1 ) = im
417*
418 ibegin = iend + 1
419 wbegin = wend + 1
42010 CONTINUE
421*
422 ENDIF
423
424* Init NEEDIL and NEEDIU
425 needil = dou
426 neediu = dol
427
428* Here starts the main loop
429* Only one pass through the loop is done until no collaboration
430* with other processors is needed.
431 40 CONTINUE
432
433 parity = 1 - parity
434
435* For each block, build next level of representation tree
436* if there are still remaining clusters
437 ibegin = 1
438 wbegin = 1
439 DO 170 jblk = 1, iblock( m )
440 iend = isplit( jblk )
441 sigma = l( iend )
442* Find the eigenvectors of the submatrix indexed IBEGIN
443* through IEND.
444 IF(m.EQ.n) THEN
445* all eigenpairs are computed
446 wend = iend
447 ELSE
448* count how many wanted eigenpairs are in this block
449 wend = wbegin - 1
450 15 CONTINUE
451 IF( wend.LT.m ) THEN
452 IF( iblock( wend+1 ).EQ.jblk ) THEN
453 wend = wend + 1
454 GO TO 15
455 END IF
456 END IF
457 ENDIF
458
459 oldncl = iwork( iincls + jblk )
460 IF( oldncl.EQ.0 ) THEN
461 ibegin = iend + 1
462 wbegin = wend + 1
463 GO TO 170
464 END IF
465* OLDIEN is the last index of the previous block
466 oldien = ibegin - 1
467* Calculate the size of the current block
468 in = iend - ibegin + 1
469* The number of eigenvalues in the current block
470 im = wend - wbegin + 1
471
472* Find local spectral diameter of the block
473 spdiam = sdiam(jblk)
474 lgspdm = log( spdiam + pivmin )
475* Compute ORTOL parameter, similar to DSTEIN
476 ortol = spdiam*1.0d-3
477* Compute average gap
478 avgap = spdiam/dble(in-1)
479* Compute the minimum of average gap and ORTOL parameter
480* This can used as a lower bound for acceptable separation
481* between eigenvalues
482 enufgp = min(ortol,avgap)
483
484* Any 1x1 block has been treated before
485
486* loop while( OLDNCLS.GT.0 )
487* generate the next representation tree level for the current block
488 IF( oldncl.GT.0 ) THEN
489* This is a crude protection against infinitely deep trees
490 IF( ndepth.GT.m ) THEN
491 info = -2
492 RETURN
493 ENDIF
494* breadth first processing of the current level of the representation
495* tree: OLDNCL = number of clusters on current level
496* NCLUS is the number of clusters for the next level of the reptree
497* reset NCLUS to count the number of child clusters
498 nclus = 0
499*
500 log_in = log(dble(in))
501*
502 rltl30 = min( 1.0d-2, one / dble( in ) )
503*
504 IF( parity.EQ.0 ) THEN
505 oldcls = iindc1+ibegin-1
506 newcls = iindc2+ibegin-1
507 ELSE
508 oldcls = iindc2+ibegin-1
509 newcls = iindc1+ibegin-1
510 END IF
511* Process the clusters on the current level
512 DO 150 i = 1, oldncl
513 j = oldcls + 2*i
514* OLDFST, OLDLST = first, last index of current cluster.
515* cluster indices start with 1 and are relative
516* to WBEGIN when accessing W, WGAP, WERR, Z
517 oldfst = iwork( j-1 )
518 oldlst = iwork( j )
519 IF( ndepth.GT.0 ) THEN
520* Retrieve relatively robust representation (RRR) of cluster
521* that has been computed at the previous level
522* The RRR is stored in Z and overwritten once the eigenvectors
523* have been computed or when the cluster is refined
524
525 IF((dol.EQ.1).AND.(dou.EQ.m)) THEN
526* Get representation from location of the leftmost evalue
527* of the cluster
528 j = wbegin + oldfst - 1
529 ELSE
530 IF(wbegin+oldfst-1.LT.dol) THEN
531* Get representation from the left end of Z array
532 j = dol - 1
533 ELSEIF(wbegin+oldfst-1.GT.dou) THEN
534* Get representation from the right end of Z array
535 j = dou
536 ELSE
537 j = wbegin + oldfst - 1
538 ENDIF
539 ENDIF
540 CALL dcopy( in, z( ibegin, (j-zoffset) ),
541 $ 1, d( ibegin ), 1 )
542 CALL dcopy( in-1, z( ibegin, (j+1-zoffset) ),
543 $ 1, l( ibegin ),1 )
544 sigma = z( iend, (j+1-zoffset) )
545* Set the corresponding entries in Z to zero
546 CALL dlaset( 'Full', in, 2, zero, zero,
547 $ z( ibegin, (j-zoffset) ), ldz )
548 END IF
549
550* Compute DL and DLL of current RRR
551 DO 50 j = ibegin, iend-1
552 tmp = d( j )*l( j )
553 work( indld-1+j ) = tmp
554 work( indlld-1+j ) = tmp*l( j )
555 50 CONTINUE
556
557 IF( ndepth.GT.0 .AND. delref ) THEN
558* P and Q are index of the first and last eigenvalue to compute
559* within the current block
560 p = indexw( wbegin-1+oldfst )
561 q = indexw( wbegin-1+oldlst )
562* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET
563* thru' Q-OFFSET elements of these arrays are to be used.
564C OFFSET = P-OLDFST
565 offset = indexw( wbegin ) - 1
566* perform limited bisection (if necessary) to get approximate
567* eigenvalues to the precision needed.
568 CALL dlarrb2( in, d( ibegin ),
569 $ work(indlld+ibegin-1),
570 $ p, q, rtol1, rtol2, offset,
571 $ work(wbegin),wgap(wbegin),werr(wbegin),
572 $ work( indwrk ), iwork( iindwk ),
573 $ pivmin, lgpvmn, lgspdm, in, iinfo )
574 IF( iinfo.NE.0 ) THEN
575 info = -1
576 RETURN
577 ENDIF
578* We also recompute the extremal gaps. W holds all eigenvalues
579* of the unshifted matrix and must be used for computation
580* of WGAP, the entries of WORK might stem from RRRs with
581* different shifts. The gaps from WBEGIN-1+OLDFST to
582* WBEGIN-1+OLDLST are correctly computed in DLARRB2.
583* However, we only allow the gaps to become greater since
584* this is what should happen when we decrease WERR
585 IF( oldfst.GT.1) THEN
586 wgap( wbegin+oldfst-2 ) =
587 $ max(wgap(wbegin+oldfst-2),
588 $ w(wbegin+oldfst-1)-werr(wbegin+oldfst-1)
589 $ - w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) )
590 ENDIF
591 IF( wbegin + oldlst -1 .LT. wend ) THEN
592 wgap( wbegin+oldlst-1 ) =
593 $ max(wgap(wbegin+oldlst-1),
594 $ w(wbegin+oldlst)-werr(wbegin+oldlst)
595 $ - w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) )
596 ENDIF
597* Each time the eigenvalues in WORK get refined, we store
598* the newly found approximation with all shifts applied in W
599 DO 53 j=oldfst,oldlst
600 w(wbegin+j-1) = work(wbegin+j-1)+sigma
601 53 CONTINUE
602 ELSEIF( (ndepth.EQ.0) .OR. (.NOT.delref) ) THEN
603* Some of the eigenvalues might have been computed on
604* other processors
605* Recompute gaps for this cluster
606* (all eigenvalues have the same
607* representation, i.e. the same shift, so this is easy)
608 DO 54 j = oldfst, oldlst-1
609 myerr = werr(wbegin + j - 1)
610 nxterr = werr(wbegin + j )
611 wgap(wbegin+j-1) = max(wgap(wbegin+j-1),
612 $ ( work(wbegin+j) - nxterr )
613 $ - ( work(wbegin+j-1) + myerr )
614 $ )
615 54 CONTINUE
616 END IF
617*
618* Process the current node.
619*
620 newfst = oldfst
621 DO 140 j = oldfst, oldlst
622 IF( j.EQ.oldlst ) THEN
623* we are at the right end of the cluster, this is also the
624* boundary of the child cluster
625 newlst = j
626 ELSE
627 IF (vrtree.EQ.use30) THEN
628 IF(wgap( wbegin + j -1).GE.
629 $ rltl30 * abs(work(wbegin + j -1)) ) THEN
630* the right relgap is big enough by the Lapack 3.0 criterion
631 newlst = j
632 ELSE
633* inside a child cluster, the relative gap is not
634* big enough.
635 GOTO 140
636 ENDIF
637 ELSE IF (vrtree.EQ.use31) THEN
638 IF ( wgap( wbegin + j -1).GE.
639 $ minrgp* abs( work(wbegin + j -1) ) ) THEN
640* the right relgap is big enough by the Lapack 3.1 criterion
641* (NEWFST,..,NEWLST) is well separated from the following
642 newlst = j
643 ELSE
644* inside a child cluster, the relative gap is not
645* big enough.
646 GOTO 140
647 ENDIF
648 ELSE IF (vrtree.EQ.use32a) THEN
649 IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.
650 $ minrgp* abs(work(wbegin+j-1)) ) ) THEN
651* the right relgap is big enough by the Lapack 3.1 criterion
652* Found a singleton
653 newlst = j
654 ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.
655 $ ( wgap(wbegin+j-2).GE.
656 $ minrgp* abs(work(wbegin+j-1)) ).AND.
657 $ ( wgap(wbegin+j-1).GE.
658 $ minrgp* abs(work(wbegin+j-1)) )
659 $ ) THEN
660* Found a singleton
661 newlst = j
662 ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.
663 $ (minrgp*abs(work(wbegin+j-1)) ) )
664 $ THEN
665* the right relgap is big enough by the Lapack 3.1 criterion
666 newlst = j
667 ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.
668 $ (wgap(wbegin+j-1).GE.enufgp))
669 $ THEN
670* the right gap is bigger than ENUFGP
671* Care needs to be taken with this criterion to make
672* sure it does not create a remaining `false' singleton
673 newlst = j
674 ELSE
675* inside a child cluster, the relative gap is not
676* big enough.
677 GOTO 140
678 ENDIF
679 ELSE IF (vrtree.EQ.use32b) THEN
680 IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.
681 $ minrgp* abs(work(wbegin+j-1)) ) ) THEN
682* the right relgap is big enough by the Lapack 3.1 criterion
683* Found a singleton
684 newlst = j
685 ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.
686 $ ( wgap(wbegin+j-2).GE.
687 $ minrgp* abs(work(wbegin+j-1)) ).AND.
688 $ ( wgap(wbegin+j-1).GE.
689 $ minrgp* abs(work(wbegin+j-1)) )
690 $ ) THEN
691* Found a singleton
692 newlst = j
693 ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.
694 $ (minrgp*abs(work(wbegin+j-1)) ) )
695 $ THEN
696* the right relgap is big enough by the Lapack 3.1 criterion
697 newlst = j
698 ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.
699 $ (wgap( wbegin + j -1).GE.
700 $ rltl30 * abs(work(wbegin + j -1)) ))
701 $ THEN
702* the right relgap is big enough by the Lapack 3.0 criterion
703* Care needs to be taken with this criterion to make
704* sure it does not create a remaining `false' singleton
705 newlst = j
706 ELSE
707* inside a child cluster, the relative gap is not
708* big enough.
709 GOTO 140
710 ENDIF
711 END IF
712 END IF
713
714* Compute size of child cluster found
715 newsiz = newlst - newfst + 1
716 maxcls = max( newsiz, maxcls )
717
718* NEWFTT is the place in Z where the new RRR or the computed
719* eigenvector is to be stored
720 IF((dol.EQ.1).AND.(dou.EQ.m)) THEN
721* Store representation at location of the leftmost evalue
722* of the cluster
723 newftt = wbegin + newfst - 1
724 ELSE
725 IF(wbegin+newfst-1.LT.dol) THEN
726* Store representation at the left end of Z array
727 newftt = dol - 1
728 ELSEIF(wbegin+newfst-1.GT.dou) THEN
729* Store representation at the right end of Z array
730 newftt = dou
731 ELSE
732 newftt = wbegin + newfst - 1
733 ENDIF
734 ENDIF
735* FOR 1D-DISTRIBUTED Z, COMPUTE NEWFTT SHIFTED BY ZOFFSET
736 newftt = newftt - zoffset
737
738 IF( newsiz.GT.1) THEN
739*
740* Current child is not a singleton but a cluster.
741*
742*
743 IF((wbegin+newlst-1.LT.dol).OR.
744 $ (wbegin+newfst-1.GT.dou)) THEN
745* if the cluster contains no desired eigenvalues
746* skip the computation of that branch of the rep. tree
747 GOTO 139
748 ENDIF
749
750* Compute left and right cluster gap.
751*
752 IF( newfst.EQ.1 ) THEN
753 lgap = max( zero,
754 $ w(wbegin)-werr(wbegin) - vl )
755 ELSE
756 lgap = wgap( wbegin+newfst-2 )
757 ENDIF
758 rgap = wgap( wbegin+newlst-1 )
759*
760* For larger clusters, record the largest gap observed
761* somewhere near the middle of the cluster as a possible
762* alternative position for a shift when TRYMID is TRUE
763*
764 mgap = zero
765 IF(newsiz.GE.50) THEN
766 kk = newfst
767 DO 545 k =newfst+newsiz/3,newlst-newsiz/3
768 IF(mgap.LT.wgap( wbegin+k-1 )) THEN
769 kk = k
770 mgap = wgap( wbegin+k-1 )
771 ENDIF
772 545 CONTINUE
773 ENDIF
774
775*
776* Record the left- and right-most eigenvalues needed
777* for the next level of the representation tree
778 needil = min(needil,wbegin+newfst-1)
779 neediu = max(neediu,wbegin+newlst-1)
780
781*
782* Check if middle gap is large enough to shift there
783*
784 gap = min(lgap,rgap)
785 trymid = (mgap.GT.gap)
786
787 splace(1) = newfst
788 splace(2) = newlst
789 IF(trymid) THEN
790 splace(3) = kk
791 splace(4) = kk+1
792 ELSE
793 splace(3) = newfst
794 splace(4) = newlst
795 ENDIF
796*
797* Compute left- and rightmost eigenvalue of child
798* to high precision in order to shift as close
799* as possible and obtain as large relative gaps
800* as possible
801*
802
803 DO 55 k =1,4
804 p = indexw( wbegin-1+splace(k) )
805 offset = indexw( wbegin ) - 1
806 CALL dlarrb2( in, d(ibegin),
807 $ work( indlld+ibegin-1 ),p,p,
808 $ rqtol, rqtol, offset,
809 $ work(wbegin),wgap(wbegin),
810 $ werr(wbegin),work( indwrk ),
811 $ iwork( iindwk ),
812 $ pivmin, lgpvmn, lgspdm, in, iinfo )
813 55 CONTINUE
814*
815* Compute RRR of child cluster.
816* Note that the new RRR is stored in Z
817*
818C DLARRF2 needs LWORK = 2*N
819 CALL dlarrf2( in, d( ibegin ), l( ibegin ),
820 $ work(indld+ibegin-1),
821 $ splace(1), splace(2),
822 $ splace(3), splace(4), work(wbegin),
823 $ wgap(wbegin), werr(wbegin), trymid,
824 $ spdiam, lgap, rgap, pivmin, tau,
825 $ z( ibegin, newftt ),
826 $ z( ibegin, newftt+1 ),
827 $ work( indwrk ), iinfo )
828 IF( iinfo.EQ.0 ) THEN
829* a new RRR for the cluster was found by DLARRF2
830* update shift and store it
831 ssigma = sigma + tau
832 z( iend, newftt+1 ) = ssigma
833* WORK() are the midpoints and WERR() the semi-width
834* Note that the entries in W are unchanged.
835 DO 116 k = newfst, newlst
836 fudge =
837 $ three*eps*abs(work(wbegin+k-1))
838 work( wbegin + k - 1 ) =
839 $ work( wbegin + k - 1) - tau
840 fudge = fudge +
841 $ four*eps*abs(work(wbegin+k-1))
842* Fudge errors
843 werr( wbegin + k - 1 ) =
844 $ werr( wbegin + k - 1 ) + fudge
845 116 CONTINUE
846
847 nclus = nclus + 1
848 k = newcls + 2*nclus
849 iwork( k-1 ) = newfst
850 iwork( k ) = newlst
851*
852 IF(.NOT.delref) THEN
853 onlylc = .true.
854*
855 IF(onlylc) THEN
856 mywfst = max(wbegin-1+newfst,dol-1)
857 mywlst = min(wbegin-1+newlst,dou+1)
858 ELSE
859 mywfst = wbegin-1+newfst
860 mywlst = wbegin-1+newlst
861 ENDIF
862
863* Compute LLD of new RRR
864 DO 5000 k = ibegin, iend-1
865 work( indwrk-1+k ) =
866 $ z(k,newftt)*
867 $ (z(k,newftt+1)**2)
868 5000 CONTINUE
869* P and Q are index of the first and last
870* eigenvalue to compute within the new cluster
871 p = indexw( mywfst )
872 q = indexw( mywlst )
873* Offset for the arrays WORK, WGAP and WERR
874 offset = indexw( wbegin ) - 1
875* perform limited bisection (if necessary) to get approximate
876* eigenvalues to the precision needed.
877 CALL dlarrb2( in,
878 $ z(ibegin, newftt ),
879 $ work(indwrk+ibegin-1),
880 $ p, q, rtol1, rtol2, offset,
881 $ work(wbegin),wgap(wbegin),werr(wbegin),
882 $ work( indwrk+n ), iwork( iindwk ),
883 $ pivmin, lgpvmn, lgspdm, in, iinfo )
884 IF( iinfo.NE.0 ) THEN
885 info = -1
886 RETURN
887 ENDIF
888* Each time the eigenvalues in WORK get refined, we store
889* the newly found approximation with all shifts applied in W
890 DO 5003 k=newfst,newlst
891 w(wbegin+k-1) = work(wbegin+k-1)+ssigma
892 5003 CONTINUE
893 ENDIF
894*
895 ELSE
896 info = -2
897 RETURN
898 ENDIF
899 ELSE
900*
901* Compute eigenvector of singleton
902*
903 iter = 0
904*
905 tol = four * log_in * eps
906*
907 k = newfst
908 windex = wbegin + k - 1
909 zindex = windex - zoffset
910 windmn = max(windex - 1,1)
911 windpl = min(windex + 1,m)
912 lambda = work( windex )
913* Check if eigenvector computation is to be skipped
914 IF((windex.LT.dol).OR.
915 $ (windex.GT.dou)) THEN
916 eskip = .true.
917 GOTO 125
918 ELSE
919 eskip = .false.
920 ENDIF
921 left = work( windex ) - werr( windex )
922 right = work( windex ) + werr( windex )
923 indeig = indexw( windex )
924 IF( k .EQ. 1) THEN
925 lgap = eps*max(abs(left),abs(right))
926 ELSE
927 lgap = wgap(windmn)
928 ENDIF
929 IF( k .EQ. im) THEN
930 rgap = eps*max(abs(left),abs(right))
931 ELSE
932 rgap = wgap(windex)
933 ENDIF
934 gap = min( lgap, rgap )
935 IF(( k .EQ. 1).OR.(k .EQ. im)) THEN
936 gaptol = zero
937 ELSE
938 gaptol = gap * eps
939 ENDIF
940 isupmn = in
941 isupmx = 1
942* Update WGAP so that it holds the minimum gap
943* to the left or the right. This is crucial in the
944* case where bisection is used to ensure that the
945* eigenvalue is refined up to the required precision.
946* The correct value is restored afterwards.
947 savegp = wgap(windex)
948 wgap(windex) = gap
949* We want to use the Rayleigh Quotient Correction
950* as often as possible since it converges quadratically
951* when we are close enough to the desired eigenvalue.
952* However, the Rayleigh Quotient can have the wrong sign
953* and lead us away from the desired eigenvalue. In this
954* case, the best we can do is to use bisection.
955 usedbs = .false.
956 usedrq = .false.
957* Bisection is initially turned off unless it is forced
958 needbs = .NOT.tryrqc
959* Reset ITWIST
960 itwist = 0
961 120 CONTINUE
962* Check if bisection should be used to refine eigenvalue
963 IF(needbs) THEN
964* Take the bisection as new iterate
965 usedbs = .true.
966* Temporary copy of twist index needed
967 itmp1 = itwist
968 offset = indexw( wbegin ) - 1
969 CALL dlarrb2( in, d(ibegin),
970 $ work(indlld+ibegin-1),indeig,indeig,
971 $ zero, two*eps, offset,
972 $ work(wbegin),wgap(wbegin),
973 $ werr(wbegin),work( indwrk ),
974 $ iwork( iindwk ),
975 $ pivmin, lgpvmn, lgspdm, itmp1, iinfo )
976 IF( iinfo.NE.0 ) THEN
977 info = -3
978 RETURN
979 ENDIF
980 lambda = work( windex )
981* Reset twist index from inaccurate LAMBDA to
982* force computation of true MINGMA
983 itwist = 0
984 ENDIF
985* Given LAMBDA, compute the eigenvector.
986 CALL dlar1va( in, 1, in, lambda, d(ibegin),
987 $ l( ibegin ), work(indld+ibegin-1),
988 $ work(indlld+ibegin-1),
989 $ pivmin, gaptol, z( ibegin, zindex),
990 $ .NOT.usedbs, negcnt, ztz, mingma,
991 $ itwist, isuppz( 2*windex-1 ),
992 $ nrminv, resid, rqcorr, work( indwrk ) )
993 IF(iter .EQ. 0) THEN
994 bstres = resid
995 bstw = lambda
996 ELSEIF(resid.LT.bstres) THEN
997 bstres = resid
998 bstw = lambda
999 ENDIF
1000 isupmn = min(isupmn,isuppz( 2*windex-1 ))
1001 isupmx = max(isupmx,isuppz( 2*windex ))
1002 iter = iter + 1
1003*
1004* Convergence test for Rayleigh-Quotient iteration
1005* (omitted when Bisection has been used)
1006*
1007 IF( resid.GT.tol*gap .AND. abs( rqcorr ).GT.
1008 $ rqtol*abs( lambda ) .AND. .NOT. usedbs)
1009 $ THEN
1010* We need to check that the RQCORR update doesn't
1011* move the eigenvalue away from the desired one and
1012* towards a neighbor. -> protection with bisection
1013 IF(indeig.LE.negcnt) THEN
1014* The wanted eigenvalue lies to the left
1015 sgndef = -one
1016 ELSE
1017* The wanted eigenvalue lies to the right
1018 sgndef = one
1019 ENDIF
1020* We only use the RQCORR if it improves the
1021* the iterate reasonably.
1022 IF( ( rqcorr*sgndef.GE.zero )
1023 $ .AND.( lambda + rqcorr.LE. right)
1024 $ .AND.( lambda + rqcorr.GE. left)
1025 $ ) THEN
1026 usedrq = .true.
1027* Store new midpoint of bisection interval in WORK
1028 IF(sgndef.EQ.one) THEN
1029* The current LAMBDA is on the left of the true
1030* eigenvalue
1031 left = lambda
1032 ELSE
1033* The current LAMBDA is on the right of the true
1034* eigenvalue
1035 right = lambda
1036 ENDIF
1037 work( windex ) =
1038 $ half * (right + left)
1039* Take RQCORR since it has the correct sign and
1040* improves the iterate reasonably
1041 lambda = lambda + rqcorr
1042* Update width of error interval
1043 werr( windex ) =
1044 $ half * (right-left)
1045 ELSE
1046 needbs = .true.
1047 ENDIF
1048 IF(right-left.LT.rqtol*abs(lambda)) THEN
1049* The eigenvalue is computed to bisection accuracy
1050* compute eigenvector and stop
1051 usedbs = .true.
1052 GOTO 120
1053 ELSEIF( iter.LT.maxitr ) THEN
1054 GOTO 120
1055 ELSEIF( iter.EQ.maxitr ) THEN
1056 needbs = .true.
1057 GOTO 120
1058 ELSE
1059 info = 5
1060 RETURN
1061 END IF
1062 ELSE
1063 stp2ii = .false.
1064 IF(usedrq .AND. usedbs .AND.
1065 $ bstres.LE.resid) THEN
1066 lambda = bstw
1067 stp2ii = .true.
1068 ENDIF
1069 IF (stp2ii) THEN
1070 CALL dlar1va( in, 1, in, lambda,
1071 $ d( ibegin ), l( ibegin ),
1072 $ work(indld+ibegin-1),
1073 $ work(indlld+ibegin-1),
1074 $ pivmin, gaptol,
1075 $ z( ibegin, zindex ),
1076 $ .NOT.usedbs, negcnt, ztz, mingma,
1077 $ itwist,
1078 $ isuppz( 2*windex-1 ),
1079 $ nrminv, resid, rqcorr, work( indwrk ) )
1080 ENDIF
1081 work( windex ) = lambda
1082 END IF
1083*
1084* Compute FP-vector support w.r.t. whole matrix
1085*
1086 isuppz( 2*windex-1 ) = isuppz( 2*windex-1 )+oldien
1087 isuppz( 2*windex ) = isuppz( 2*windex )+oldien
1088 zfrom = isuppz( 2*windex-1 )
1089 zto = isuppz( 2*windex )
1090 isupmn = isupmn + oldien
1091 isupmx = isupmx + oldien
1092* Ensure vector is ok if support in the RQI has changed
1093 IF(isupmn.LT.zfrom) THEN
1094 DO 122 ii = isupmn,zfrom-1
1095 z( ii, zindex ) = zero
1096 122 CONTINUE
1097 ENDIF
1098 IF(isupmx.GT.zto) THEN
1099 DO 123 ii = zto+1,isupmx
1100 z( ii, zindex ) = zero
1101 123 CONTINUE
1102 ENDIF
1103 CALL dscal( zto-zfrom+1, nrminv,
1104 $ z( zfrom, zindex ), 1 )
1105 125 CONTINUE
1106* Update W
1107 w( windex ) = lambda+sigma
1108* Recompute the gaps on the left and right
1109* But only allow them to become larger and not
1110* smaller (which can only happen through "bad"
1111* cancellation and doesn't reflect the theory
1112* where the initial gaps are underestimated due
1113* to WERR being too crude.)
1114 IF(.NOT.eskip) THEN
1115 IF( k.GT.1) THEN
1116 wgap( windmn ) = max( wgap(windmn),
1117 $ w(windex)-werr(windex)
1118 $ - w(windmn)-werr(windmn) )
1119 ENDIF
1120 IF( windex.LT.wend ) THEN
1121 wgap( windex ) = max( savegp,
1122 $ w( windpl )-werr( windpl )
1123 $ - w( windex )-werr( windex) )
1124 ENDIF
1125 ENDIF
1126 ENDIF
1127* here ends the code for the current child
1128*
1129 139 CONTINUE
1130* Proceed to any remaining child nodes
1131 newfst = j + 1
1132 140 CONTINUE
1133 150 CONTINUE
1134* Store number of clusters
1135 iwork( iincls + jblk ) = nclus
1136*
1137 END IF
1138 ibegin = iend + 1
1139 wbegin = wend + 1
1140 170 CONTINUE
1141*
1142* Check if everything is done: no clusters left for
1143* this processor in any block
1144*
1145 finish = .true.
1146 DO 180 jblk = 1, iblock( m )
1147 finish = finish .AND. (iwork(iincls + jblk).EQ.0)
1148 180 CONTINUE
1149
1150 IF(.NOT.finish) THEN
1151 ndepth = ndepth + 1
1152 IF((needil.GE.dol).AND.(neediu.LE.dou)) THEN
1153* Once this processor's part of the
1154* representation tree consists exclusively of eigenvalues
1155* between DOL and DOU, it can work independently from all
1156* others.
1157 GOTO 40
1158 ENDIF
1159 ENDIF
1160*
1161
1162 RETURN
1163*
1164* End of DLARRV2
1165*
subroutine dlar1va(n, b1, bn, lambda, d, l, ld, lld, pivmin, gaptol, z, wantnc, negcnt, ztz, mingma, r, isuppz, nrminv, resid, rqcorr, work)
Definition dlar1va.f:4
subroutine dlarrb2(n, d, lld, ifirst, ilast, rtol1, rtol2, offset, w, wgap, werr, work, iwork, pivmin, lgpvmn, lgspdm, twist, info)
Definition dlarrb2.f:4
subroutine dlarrf2(n, d, l, ld, clstrt, clend, clmid1, clmid2, w, wgap, werr, trymid, spdiam, clgapl, clgapr, pivmin, sigma, dplus, lplus, work, info)
Definition dlarrf2.f:5
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
double precision function dlamch(cmach)
Definition tools.f:10
Here is the call graph for this function:
Here is the caller graph for this function: