LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
iparmq.f
Go to the documentation of this file.
1 *> \brief \b IPARMQ
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download IPARMQ + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparmq.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparmq.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparmq.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
22 *
23 * .. Scalar Arguments ..
24 * INTEGER IHI, ILO, ISPEC, LWORK, N
25 * CHARACTER NAME*( * ), OPTS*( * )
26 *
27 *
28 *> \par Purpose:
29 * =============
30 *>
31 *> \verbatim
32 *>
33 *> This program sets problem and machine dependent parameters
34 *> useful for xHSEQR and related subroutines for eigenvalue
35 *> problems. It is called whenever
36 *> IPARMQ is called with 12 <= ISPEC <= 16
37 *> \endverbatim
38 *
39 * Arguments:
40 * ==========
41 *
42 *> \param[in] ISPEC
43 *> \verbatim
44 *> ISPEC is INTEGER
45 *> ISPEC specifies which tunable parameter IPARMQ should
46 *> return.
47 *>
48 *> ISPEC=12: (INMIN) Matrices of order nmin or less
49 *> are sent directly to xLAHQR, the implicit
50 *> double shift QR algorithm. NMIN must be
51 *> at least 11.
52 *>
53 *> ISPEC=13: (INWIN) Size of the deflation window.
54 *> This is best set greater than or equal to
55 *> the number of simultaneous shifts NS.
56 *> Larger matrices benefit from larger deflation
57 *> windows.
58 *>
59 *> ISPEC=14: (INIBL) Determines when to stop nibbling and
60 *> invest in an (expensive) multi-shift QR sweep.
61 *> If the aggressive early deflation subroutine
62 *> finds LD converged eigenvalues from an order
63 *> NW deflation window and LD > (NW*NIBBLE)/100,
64 *> then the next QR sweep is skipped and early
65 *> deflation is applied immediately to the
66 *> remaining active diagonal block. Setting
67 *> IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a
68 *> multi-shift QR sweep whenever early deflation
69 *> finds a converged eigenvalue. Setting
70 *> IPARMQ(ISPEC=14) greater than or equal to 100
71 *> prevents TTQRE from skipping a multi-shift
72 *> QR sweep.
73 *>
74 *> ISPEC=15: (NSHFTS) The number of simultaneous shifts in
75 *> a multi-shift QR iteration.
76 *>
77 *> ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the
78 *> following meanings.
79 *> 0: During the multi-shift QR/QZ sweep,
80 *> blocked eigenvalue reordering, blocked
81 *> Hessenberg-triangular reduction,
82 *> reflections and/or rotations are not
83 *> accumulated when updating the
84 *> far-from-diagonal matrix entries.
85 *> 1: During the multi-shift QR/QZ sweep,
86 *> blocked eigenvalue reordering, blocked
87 *> Hessenberg-triangular reduction,
88 *> reflections and/or rotations are
89 *> accumulated, and matrix-matrix
90 *> multiplication is used to update the
91 *> far-from-diagonal matrix entries.
92 *> 2: During the multi-shift QR/QZ sweep,
93 *> blocked eigenvalue reordering, blocked
94 *> Hessenberg-triangular reduction,
95 *> reflections and/or rotations are
96 *> accumulated, and 2-by-2 block structure
97 *> is exploited during matrix-matrix
98 *> multiplies.
99 *> (If xTRMM is slower than xGEMM, then
100 *> IPARMQ(ISPEC=16)=1 may be more efficient than
101 *> IPARMQ(ISPEC=16)=2 despite the greater level of
102 *> arithmetic work implied by the latter choice.)
103 *>
104 *> ISPEC=17: (ICOST) An estimate of the relative cost of flops
105 *> within the near-the-diagonal shift chase compared
106 *> to flops within the BLAS calls of a QZ sweep.
107 *> \endverbatim
108 *>
109 *> \param[in] NAME
110 *> \verbatim
111 *> NAME is CHARACTER string
112 *> Name of the calling subroutine
113 *> \endverbatim
114 *>
115 *> \param[in] OPTS
116 *> \verbatim
117 *> OPTS is CHARACTER string
118 *> This is a concatenation of the string arguments to
119 *> TTQRE.
120 *> \endverbatim
121 *>
122 *> \param[in] N
123 *> \verbatim
124 *> N is INTEGER
125 *> N is the order of the Hessenberg matrix H.
126 *> \endverbatim
127 *>
128 *> \param[in] ILO
129 *> \verbatim
130 *> ILO is INTEGER
131 *> \endverbatim
132 *>
133 *> \param[in] IHI
134 *> \verbatim
135 *> IHI is INTEGER
136 *> It is assumed that H is already upper triangular
137 *> in rows and columns 1:ILO-1 and IHI+1:N.
138 *> \endverbatim
139 *>
140 *> \param[in] LWORK
141 *> \verbatim
142 *> LWORK is INTEGER
143 *> The amount of workspace available.
144 *> \endverbatim
145 *
146 * Authors:
147 * ========
148 *
149 *> \author Univ. of Tennessee
150 *> \author Univ. of California Berkeley
151 *> \author Univ. of Colorado Denver
152 *> \author NAG Ltd.
153 *
154 *> \ingroup OTHERauxiliary
155 *
156 *> \par Further Details:
157 * =====================
158 *>
159 *> \verbatim
160 *>
161 *> Little is known about how best to choose these parameters.
162 *> It is possible to use different values of the parameters
163 *> for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR.
164 *>
165 *> It is probably best to choose different parameters for
166 *> different matrices and different parameters at different
167 *> times during the iteration, but this has not been
168 *> implemented --- yet.
169 *>
170 *>
171 *> The best choices of most of the parameters depend
172 *> in an ill-understood way on the relative execution
173 *> rate of xLAQR3 and xLAQR5 and on the nature of each
174 *> particular eigenvalue problem. Experiment may be the
175 *> only practical way to determine which choices are most
176 *> effective.
177 *>
178 *> Following is a list of default values supplied by IPARMQ.
179 *> These defaults may be adjusted in order to attain better
180 *> performance in any particular computational environment.
181 *>
182 *> IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point.
183 *> Default: 75. (Must be at least 11.)
184 *>
185 *> IPARMQ(ISPEC=13) Recommended deflation window size.
186 *> This depends on ILO, IHI and NS, the
187 *> number of simultaneous shifts returned
188 *> by IPARMQ(ISPEC=15). The default for
189 *> (IHI-ILO+1) <= 500 is NS. The default
190 *> for (IHI-ILO+1) > 500 is 3*NS/2.
191 *>
192 *> IPARMQ(ISPEC=14) Nibble crossover point. Default: 14.
193 *>
194 *> IPARMQ(ISPEC=15) Number of simultaneous shifts, NS.
195 *> a multi-shift QR iteration.
196 *>
197 *> If IHI-ILO+1 is ...
198 *>
199 *> greater than ...but less ... the
200 *> or equal to ... than default is
201 *>
202 *> 0 30 NS = 2+
203 *> 30 60 NS = 4+
204 *> 60 150 NS = 10
205 *> 150 590 NS = **
206 *> 590 3000 NS = 64
207 *> 3000 6000 NS = 128
208 *> 6000 infinity NS = 256
209 *>
210 *> (+) By default matrices of this order are
211 *> passed to the implicit double shift routine
212 *> xLAHQR. See IPARMQ(ISPEC=12) above. These
213 *> values of NS are used only in case of a rare
214 *> xLAHQR failure.
215 *>
216 *> (**) The asterisks (**) indicate an ad-hoc
217 *> function increasing from 10 to 64.
218 *>
219 *> IPARMQ(ISPEC=16) Select structured matrix multiply.
220 *> (See ISPEC=16 above for details.)
221 *> Default: 3.
222 *>
223 *> IPARMQ(ISPEC=17) Relative cost heuristic for blocksize selection.
224 *> Expressed as a percentage.
225 *> Default: 10.
226 *> \endverbatim
227 *>
228 * =====================================================================
229  INTEGER FUNCTION iparmq( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
230 *
231 * -- LAPACK auxiliary routine --
232 * -- LAPACK is a software package provided by Univ. of Tennessee, --
233 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
234 *
235 * .. Scalar Arguments ..
236  INTEGER ihi, ilo, ispec, lwork, n
237  CHARACTER name*( * ), opts*( * )
238 *
239 * ================================================================
240 * .. Parameters ..
241  INTEGER inmin, inwin, inibl, ishfts, iacc22, icost
242  parameter( inmin = 12, inwin = 13, inibl = 14,
243  $ ishfts = 15, iacc22 = 16, icost = 17 )
244  INTEGER nmin, k22min, kacmin, nibble, knwswp, rcost
245  parameter( nmin = 75, k22min = 14, kacmin = 14,
246  $ nibble = 14, knwswp = 500, rcost = 10 )
247  REAL two
248  parameter( two = 2.0 )
249 * ..
250 * .. Local Scalars ..
251  INTEGER nh, ns
252  INTEGER i, ic, iz
253  CHARACTER subnam*6
254 * ..
255 * .. Intrinsic Functions ..
256  INTRINSIC log, max, mod, nint, real
257 * ..
258 * .. Executable Statements ..
259  IF( ( ispec.EQ.ishfts ) .OR. ( ispec.EQ.inwin ) .OR.
260  $ ( ispec.EQ.iacc22 ) ) THEN
261 *
262 * ==== Set the number simultaneous shifts ====
263 *
264  nh = ihi - ilo + 1
265  ns = 2
266  IF( nh.GE.30 )
267  $ ns = 4
268  IF( nh.GE.60 )
269  $ ns = 10
270  IF( nh.GE.150 )
271  $ ns = max( 10, nh / nint( log( real( nh ) ) / log( two ) ) )
272  IF( nh.GE.590 )
273  $ ns = 64
274  IF( nh.GE.3000 )
275  $ ns = 128
276  IF( nh.GE.6000 )
277  $ ns = 256
278  ns = max( 2, ns-mod( ns, 2 ) )
279  END IF
280 *
281  IF( ispec.EQ.inmin ) THEN
282 *
283 *
284 * ===== Matrices of order smaller than NMIN get sent
285 * . to xLAHQR, the classic double shift algorithm.
286 * . This must be at least 11. ====
287 *
288  iparmq = nmin
289 *
290  ELSE IF( ispec.EQ.inibl ) THEN
291 *
292 * ==== INIBL: skip a multi-shift qr iteration and
293 * . whenever aggressive early deflation finds
294 * . at least (NIBBLE*(window size)/100) deflations. ====
295 *
296  iparmq = nibble
297 *
298  ELSE IF( ispec.EQ.ishfts ) THEN
299 *
300 * ==== NSHFTS: The number of simultaneous shifts =====
301 *
302  iparmq = ns
303 *
304  ELSE IF( ispec.EQ.inwin ) THEN
305 *
306 * ==== NW: deflation window size. ====
307 *
308  IF( nh.LE.knwswp ) THEN
309  iparmq = ns
310  ELSE
311  iparmq = 3*ns / 2
312  END IF
313 *
314  ELSE IF( ispec.EQ.iacc22 ) THEN
315 *
316 * ==== IACC22: Whether to accumulate reflections
317 * . before updating the far-from-diagonal elements
318 * . and whether to use 2-by-2 block structure while
319 * . doing it. A small amount of work could be saved
320 * . by making this choice dependent also upon the
321 * . NH=IHI-ILO+1.
322 *
323 *
324 * Convert NAME to upper case if the first character is lower case.
325 *
326  iparmq = 0
327  subnam = name
328  ic = ichar( subnam( 1: 1 ) )
329  iz = ichar( 'Z' )
330  IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
331 *
332 * ASCII character set
333 *
334  IF( ic.GE.97 .AND. ic.LE.122 ) THEN
335  subnam( 1: 1 ) = char( ic-32 )
336  DO i = 2, 6
337  ic = ichar( subnam( i: i ) )
338  IF( ic.GE.97 .AND. ic.LE.122 )
339  $ subnam( i: i ) = char( ic-32 )
340  END DO
341  END IF
342 *
343  ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
344 *
345 * EBCDIC character set
346 *
347  IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
348  $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
349  $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
350  subnam( 1: 1 ) = char( ic+64 )
351  DO i = 2, 6
352  ic = ichar( subnam( i: i ) )
353  IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
354  $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
355  $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
356  $ i ) = char( ic+64 )
357  END DO
358  END IF
359 *
360  ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
361 *
362 * Prime machines: ASCII+128
363 *
364  IF( ic.GE.225 .AND. ic.LE.250 ) THEN
365  subnam( 1: 1 ) = char( ic-32 )
366  DO i = 2, 6
367  ic = ichar( subnam( i: i ) )
368  IF( ic.GE.225 .AND. ic.LE.250 )
369  $ subnam( i: i ) = char( ic-32 )
370  END DO
371  END IF
372  END IF
373 *
374  IF( subnam( 2:6 ).EQ.'GGHRD' .OR.
375  $ subnam( 2:6 ).EQ.'GGHD3' ) THEN
376  iparmq = 1
377  IF( nh.GE.k22min )
378  $ iparmq = 2
379  ELSE IF ( subnam( 4:6 ).EQ.'EXC' ) THEN
380  IF( nh.GE.kacmin )
381  $ iparmq = 1
382  IF( nh.GE.k22min )
383  $ iparmq = 2
384  ELSE IF ( subnam( 2:6 ).EQ.'HSEQR' .OR.
385  $ subnam( 2:5 ).EQ.'LAQR' ) THEN
386  IF( ns.GE.kacmin )
387  $ iparmq = 1
388  IF( ns.GE.k22min )
389  $ iparmq = 2
390  END IF
391 *
392  ELSE IF( ispec.EQ.icost ) THEN
393 *
394 * === Relative cost of near-the-diagonal chase vs
395 * BLAS updates ===
396 *
397  iparmq = rcost
398  ELSE
399 * ===== invalid value of ispec =====
400  iparmq = -1
401 *
402  END IF
403 *
404 * ==== End of IPARMQ ====
405 *
406  END
integer function iparmq(ISPEC, NAME, OPTS, N, ILO, IHI, LWORK)
IPARMQ
Definition: iparmq.f:230