ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
piparmq.f
Go to the documentation of this file.
1  INTEGER FUNCTION piparmq( ICTXT, ISPEC, NAME, OPTS, N, ILO, IHI,
2  $ LWORKNB )
3 *
4 * Contribution from the Department of Computing Science and HPC2N,
5 * Umea University, Sweden
6 *
7 * -- ScaLAPACK auxiliary routine (version 2.0.1) --
8 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9 * Univ. of Colorado Denver and University of California, Berkeley.
10 * January, 2012
11 *
12  IMPLICIT NONE
13 *
14 * .. Scalar Arguments ..
15  INTEGER ictxt, ihi, ilo, ispec, lworknb, n
16  CHARACTER name*( * ), opts*( * )
17 *
18 * Purpose
19 * =======
20 *
21 * This program sets problem and machine dependent parameters
22 * useful for PxHSEQR and its subroutines. It is called whenever
23 * PILAENVX is called with 12 <= ISPEC <= 16
24 *
25 * Arguments
26 * =========
27 *
28 * ICTXT (local input) INTEGER
29 * On entry, ICTXT specifies the BLACS context handle,
30 * indicating the global context of the operation. The
31 * context itself is global, but the value of ICTXT is
32 * local.
33 *
34 * ISPEC (global input) INTEGER
35 * ISPEC specifies which tunable parameter PIPARMQ should
36 * return.
37 *
38 * ISPEC=12: (INMIN) Matrices of order nmin or less
39 * are sent directly to PxLAHQR, the implicit
40 * double shift QR algorithm. NMIN must be
41 * at least 11.
42 *
43 * ISPEC=13: (INWIN) Size of the deflation window.
44 * This is best set greater than or equal to
45 * the number of simultaneous shifts NS.
46 * Larger matrices benefit from larger deflation
47 * windows.
48 *
49 * ISPEC=14: (INIBL) Determines when to stop nibbling and
50 * invest in an (expensive) multi-shift QR sweep.
51 * If the aggressive early deflation subroutine
52 * finds LD converged eigenvalues from an order
53 * NW deflation window and LD.GT.(NW*NIBBLE)/100,
54 * then the next QR sweep is skipped and early
55 * deflation is applied immediately to the
56 * remaining active diagonal block. Setting
57 * PIPARMQ(ISPEC=14) = 0 causes PxLAQR0 to skip a
58 * multi-shift QR sweep whenever early deflation
59 * finds a converged eigenvalue. Setting
60 * PIPARMQ(ISPEC=14) greater than or equal to 100
61 * prevents PxLAQR0 from skipping a multi-shift
62 * QR sweep.
63 *
64 * ISPEC=15: (NSHFTS) The number of simultaneous shifts in
65 * a multi-shift QR iteration.
66 *
67 * ISPEC=16: (IACC22) PIPARMQ is set to 1 or 2 with the
68 * following meanings.
69 * 1: During the multi-shift QR sweep,
70 * PxLAQR5 and/or xLAQR6 accumulates reflections
71 * and uses matrix-matrix multiply to update
72 * the far-from-diagonal matrix entries.
73 * 2: During the multi-shift QR sweep.
74 * PxLAQR5 accumulates reflections and takes
75 * advantage of 2-by-2 block structure during
76 * matrix-matrix multiplies.
77 *
78 * ( IACC22=0 is valid in LAPACK but not here.
79 * Householder reflections are always accumulated
80 * for the performance consideration.
81 * If xTRMM is slower than xGEMM or NB is small,
82 * PIPARMQ(ISPEC=16)=1 may be more efficient than
83 * PIPARMQ(ISPEC=16)=2 despite the greater level of
84 * arithmetic work implied by the latter choice. )
85 *
86 * NAME (global input) character string
87 * Name of the calling subroutine
88 *
89 * OPTS (global input) character string
90 * This is a concatenation of the string arguments to
91 * TTQRE.
92 *
93 * N (global input) integer scalar
94 * N is the order of the Hessenberg matrix H.
95 *
96 * ILO (global input) INTEGER
97 * IHI (global input) INTEGER
98 * It is assumed that H is already upper triangular
99 * in rows and columns 1:ILO-1 and IHI+1:N.
100 *
101 * LWORKNB (global input) INTEGER
102 * The amount of workspace available or the blockfactor.
103 *
104 * Further Details
105 * ===============
106 *
107 * Little is known about how best to choose these parameters.
108 * It is possible to use different values of the parameters
109 * for each of PCHSEQR, PDHSEQR, PSHSEQR and PZHSEQR.
110 *
111 * It is probably best to choose different parameters for
112 * different matrices and different parameters at different
113 * times during the iteration, but this has not been fully
114 * implemented --- yet.
115 *
116 *
117 * The best choices of most of the parameters depend
118 * in an ill-understood way on the relative execution
119 * rate of PxLAQR3 and PxLAQR5 and on the nature of each
120 * particular eigenvalue problem. Experiment may be the
121 * only practical way to determine which choices are most
122 * effective.
123 *
124 * Following is a list of default values supplied by PIPARMQ.
125 * These defaults may be adjusted in order to attain better
126 * performance in any particular computational environment.
127 *
128 * PIPARMQ(ISPEC=12) The PxLAQR1 vs PxLAQR0 crossover point.
129 * Default: 220. (Must be at least 11.)
130 *
131 * PIPARMQ(ISPEC=13) Recommended deflation window size.
132 * This depends on ILO, IHI and NS, the
133 * number of simultaneous shifts returned
134 * by PIPARMQ(ISPEC=15). The default for
135 * (IHI-ILO+1).LE.500 is NS. The default
136 * for (IHI-ILO+1).GT.500 is 3*NS/2.
137 *
138 * PIPARMQ(ISPEC=14) Nibble crossover point.
139 * The default for the serial case is 14.
140 * The default for the parallel case is
141 * 335 * N**(-0.44) * NPROCS.
142 *
143 * PIPARMQ(ISPEC=15) Number of simultaneous shifts, NS.
144 * a multi-shift QR iteration.
145 *
146 * If IHI-ILO+1 is ...
147 *
148 * greater than ...but less ... the
149 * or equal to ... than default is
150 *
151 * 0 30 NS = 2+
152 * 30 60 NS = 4+
153 * 60 150 NS = 10
154 * 150 590 NS = **
155 * 590 3000 NS = 64
156 * 3000 6000 NS = 128
157 * 6000 12000 NS = 256
158 * 12000 24000 NS = 512
159 * 24000 48000 NS = 1024
160 * 48000 96000 NS = 2048
161 * 96000 INFINITY NS = 4096
162 *
163 * (+) By default matrices of this order are
164 * passed to the implicit double shift routine
165 * PxLAQR1. See PIPARMQ(ISPEC=12) above. These
166 * values of NS are used only in case of a rare
167 * PxLAQR1 failure.
168 *
169 * (**) The asterisks (**) indicate an ad-hoc
170 * function increasing from 10 to 64.
171 *
172 * PIPARMQ(ISPEC=16) Select structured matrix multiply.
173 * (See ISPEC=16 above for details.)
174 * Default: 3.
175 *
176 * ================================================================
177 * .. Parameters ..
178  INTEGER inmin, inwin, inibl, ishfts, iacc22
179  parameter( inmin = 12, inwin = 13, inibl = 14,
180  $ ishfts = 15, iacc22 = 16 )
181  INTEGER nmin, nmin2, k22min, kacmin, nibble, knwswp
182  parameter( nmin = 220, k22min = 14, kacmin = 14,
183  $ nibble = 14, knwswp = 500, nmin2 = 770 )
184  REAL two
185  parameter( two = 2.0 )
186 * ..
187 * .. Local Scalars ..
188  INTEGER nh, ns, myrow, mycol, nprow, npcol, np
189 * ..
190 * .. Intrinsic Functions ..
191  INTRINSIC log, max, mod, nint, real
192 * ..
193 * .. External functions ..
194  INTEGER iceil
195  EXTERNAL iceil
196 * ..
197 * .. External Subroutines ..
198  EXTERNAL blacs_gridinfo
199 * ..
200 * .. Executable Statements ..
201  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
202 *
203  IF( ( ispec.EQ.ishfts ) .OR. ( ispec.EQ.inwin ) .OR.
204  $ ( ispec.EQ.iacc22 ) ) THEN
205 *
206 * ==== Set the number simultaneous shifts ====
207 *
208  nh = ihi - ilo + 1
209  ns = 2
210  IF( nh.GE.30 )
211  $ ns = 4
212  IF( nh.GE.60 )
213  $ ns = 10
214  IF( nh.GE.150 )
215  $ ns = max( 10, nh / nint( log( real( nh ) ) / log( two ) ))
216  IF( nh.GE.590 )
217  $ ns = 64
218  IF( nh.GE.3000 )
219  $ ns = 128
220  IF( nh.GE.6000 )
221  $ ns = 256
222  IF( nh.GE.12000 )
223  $ ns = 512
224  IF( nh.GE.24000 )
225  $ ns = 1024
226  IF( nh.GE.48000 )
227  $ ns = 2048
228  IF( nh.GE.96000 )
229  $ ns = 4096
230  IF( nh.GE.192000 )
231  $ ns = 8192
232  IF( nh.GE.384000 )
233  $ ns = 16384
234  IF( nh.GE.768000 )
235  $ ns = 32768
236  IF( nh.GE.1000000 )
237  $ ns = iceil( nh, 25 )
238  ns = max( ns, 2*min(nprow,npcol) )
239  ns = max( 2, ns-mod( ns, 2 ) )
240  END IF
241 *
242  IF( ispec.EQ.inmin ) THEN
243 *
244 *
245 * ===== Submatrices of order smaller than NMIN*min(P_r,P_c)
246 * . get sent to PxLAHQR, the classic ScaLAPACK algorithm.
247 * . This must be at least 11. ====
248 *
249  piparmq = nmin * min( nprow, npcol )
250 *
251  ELSE IF( ispec.EQ.inibl ) THEN
252 *
253 * ==== INIBL: skip a multi-shift QR iteration and
254 * . whenever aggressive early deflation finds
255 * . at least (NIBBLE*(window size)/100) deflations. ====
256 *
257  np = min( nprow, npcol )
258  IF( np.EQ.1 ) THEN
259  piparmq = nibble
260  ELSE
261  nh = ihi - ilo + 1
262  piparmq = min( 100,
263  $ ceiling( 335.0d+0 * nh**(-0.44d+0) * np ) )
264  END IF
265 *
266  ELSE IF( ispec.EQ.ishfts ) THEN
267 *
268 * ==== NSHFTS: The number of simultaneous shifts =====
269 *
270  piparmq = ns
271 *
272  ELSE IF( ispec.EQ.inwin ) THEN
273 *
274 * ==== NW: deflation window size. ====
275 *
276  IF( nh.LE.knwswp ) THEN
277  piparmq = ns
278  ELSE
279  piparmq = 3*ns / 2
280  END IF
281 *
282  ELSE IF( ispec.EQ.iacc22 ) THEN
283 *
284 * ==== IACC22: Whether to use 2-by-2 block structure while
285 * . doing it. A small amount of work could be saved
286 * . by making this choice dependent also upon the
287 * . NH=IHI-ILO+1.
288 *
289  piparmq = 1
290 c PIPARMQ = 0
291 c IF( NS.GE.KACMIN )
292 c $ PIPARMQ = 1
293  IF( ns.GE.k22min )
294  $ piparmq = 2
295 *
296  ELSE
297 * ===== invalid value of ispec =====
298  piparmq = -1
299 *
300  END IF
301 *
302 * ==== End of PIPARMQ ====
303 *
304  END
max
#define max(A, B)
Definition: pcgemr.c:180
piparmq
integer function piparmq(ICTXT, ISPEC, NAME, OPTS, N, ILO, IHI, LWORKNB)
Definition: piparmq.f:3
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2