LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
iparam2stage.F
Go to the documentation of this file.
1 *> \brief \b IPARAM2STAGE
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download IPARAM2STAGE + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
22 * NI, NBI, IBI, NXI )
23 * #if defined(_OPENMP)
24 * use omp_lib
25 * #endif
26 * IMPLICIT NONE
27 *
28 * .. Scalar Arguments ..
29 * CHARACTER*( * ) NAME, OPTS
30 * INTEGER ISPEC, NI, NBI, IBI, NXI
31 *
32 *> \par Purpose:
33 * =============
34 *>
35 *> \verbatim
36 *>
37 *> This program sets problem and machine dependent parameters
38 *> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
39 *> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
40 *> and related subroutines for eigenvalue problems.
41 *> It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
42 *> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
43 *> with a direct conversion ISPEC + 16.
44 *> \endverbatim
45 *
46 * Arguments:
47 * ==========
48 *
49 *> \param[in] ISPEC
50 *> \verbatim
51 *> ISPEC is integer scalar
52 *> ISPEC specifies which tunable parameter IPARAM2STAGE should
53 *> return.
54 *>
55 *> ISPEC=17: the optimal blocksize nb for the reduction to
56 *> BAND
57 *>
58 *> ISPEC=18: the optimal blocksize ib for the eigenvectors
59 *> singular vectors update routine
60 *>
61 *> ISPEC=19: The length of the array that store the Housholder
62 *> representation for the second stage
63 *> Band to Tridiagonal or Bidiagonal
64 *>
65 *> ISPEC=20: The workspace needed for the routine in input.
66 *>
67 *> ISPEC=21: For future release.
68 *> \endverbatim
69 *>
70 *> \param[in] NAME
71 *> \verbatim
72 *> NAME is character string
73 *> Name of the calling subroutine
74 *> \endverbatim
75 *>
76 *> \param[in] OPTS
77 *> \verbatim
78 *> OPTS is CHARACTER*(*)
79 *> The character options to the subroutine NAME, concatenated
80 *> into a single character string. For example, UPLO = 'U',
81 *> TRANS = 'T', and DIAG = 'N' for a triangular routine would
82 *> be specified as OPTS = 'UTN'.
83 *> \endverbatim
84 *>
85 *> \param[in] NI
86 *> \verbatim
87 *> NI is INTEGER which is the size of the matrix
88 *> \endverbatim
89 *>
90 *> \param[in] NBI
91 *> \verbatim
92 *> NBI is INTEGER which is the used in the reduciton,
93 *> (e.g., the size of the band), needed to compute workspace
94 *> and LHOUS2.
95 *> \endverbatim
96 *>
97 *> \param[in] IBI
98 *> \verbatim
99 *> IBI is INTEGER which represent the IB of the reduciton,
100 *> needed to compute workspace and LHOUS2.
101 *> \endverbatim
102 *>
103 *> \param[in] NXI
104 *> \verbatim
105 *> NXI is INTEGER needed in the future release.
106 *> \endverbatim
107 *
108 * Authors:
109 * ========
110 *
111 *> \author Univ. of Tennessee
112 *> \author Univ. of California Berkeley
113 *> \author Univ. of Colorado Denver
114 *> \author NAG Ltd.
115 *
116 *> \ingroup auxOTHERauxiliary
117 *
118 *> \par Further Details:
119 * =====================
120 *>
121 *> \verbatim
122 *>
123 *> Implemented by Azzam Haidar.
124 *>
125 *> All detail are available on technical report, SC11, SC13 papers.
126 *>
127 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
128 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
129 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
130 *> of 2011 International Conference for High Performance Computing,
131 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
132 *> Article 8 , 11 pages.
133 *> http://doi.acm.org/10.1145/2063384.2063394
134 *>
135 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
136 *> An improved parallel singular value algorithm and its implementation
137 *> for multicore hardware, In Proceedings of 2013 International Conference
138 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
139 *> Denver, Colorado, USA, 2013.
140 *> Article 90, 12 pages.
141 *> http://doi.acm.org/10.1145/2503210.2503292
142 *>
143 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
144 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
145 *> calculations based on fine-grained memory aware tasks.
146 *> International Journal of High Performance Computing Applications.
147 *> Volume 28 Issue 2, Pages 196-209, May 2014.
148 *> http://hpc.sagepub.com/content/28/2/196
149 *>
150 *> \endverbatim
151 *>
152 * =====================================================================
153  INTEGER FUNCTION iparam2stage( ISPEC, NAME, OPTS,
154  $ NI, NBI, IBI, NXI )
155 #if defined(_OPENMP)
156  use omp_lib
157 #endif
158  IMPLICIT NONE
159 *
160 * -- LAPACK auxiliary routine --
161 * -- LAPACK is a software package provided by Univ. of Tennessee, --
162 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163 *
164 * .. Scalar Arguments ..
165  CHARACTER*( * ) name, opts
166  INTEGER ispec, ni, nbi, ibi, nxi
167 *
168 * ================================================================
169 * ..
170 * .. Local Scalars ..
171  INTEGER i, ic, iz, kd, ib, lhous, lwork, nthreads,
172  $ factoptnb, qroptnb, lqoptnb
173  LOGICAL rprec, cprec
174  CHARACTER prec*1, algo*3, stag*5, subnam*12, vect*1
175 * ..
176 * .. Intrinsic Functions ..
177  INTRINSIC char, ichar, max
178 * ..
179 * .. External Functions ..
180  INTEGER ilaenv
181  EXTERNAL ilaenv
182 * ..
183 * .. Executable Statements ..
184 *
185 * Invalid value for ISPEC
186 *
187  IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
188  iparam2stage = -1
189  RETURN
190  ENDIF
191 *
192 * Get the number of threads
193 *
194  nthreads = 1
195 #if defined(_OPENMP)
196 !$OMP PARALLEL
197  nthreads = omp_get_num_threads()
198 !$OMP END PARALLEL
199 #endif
200 * WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
201 *
202  IF( ispec .NE. 19 ) THEN
203 *
204 * Convert NAME to upper case if the first character is lower case.
205 *
206  iparam2stage = -1
207  subnam = name
208  ic = ichar( subnam( 1: 1 ) )
209  iz = ichar( 'Z' )
210  IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
211 *
212 * ASCII character set
213 *
214  IF( ic.GE.97 .AND. ic.LE.122 ) THEN
215  subnam( 1: 1 ) = char( ic-32 )
216  DO 100 i = 2, 12
217  ic = ichar( subnam( i: i ) )
218  IF( ic.GE.97 .AND. ic.LE.122 )
219  $ subnam( i: i ) = char( ic-32 )
220  100 CONTINUE
221  END IF
222 *
223  ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
224 *
225 * EBCDIC character set
226 *
227  IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
228  $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
229  $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
230  subnam( 1: 1 ) = char( ic+64 )
231  DO 110 i = 2, 12
232  ic = ichar( subnam( i: i ) )
233  IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
234  $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
235  $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
236  $ i ) = char( ic+64 )
237  110 CONTINUE
238  END IF
239 *
240  ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
241 *
242 * Prime machines: ASCII+128
243 *
244  IF( ic.GE.225 .AND. ic.LE.250 ) THEN
245  subnam( 1: 1 ) = char( ic-32 )
246  DO 120 i = 2, 12
247  ic = ichar( subnam( i: i ) )
248  IF( ic.GE.225 .AND. ic.LE.250 )
249  $ subnam( i: i ) = char( ic-32 )
250  120 CONTINUE
251  END IF
252  END IF
253 *
254  prec = subnam( 1: 1 )
255  algo = subnam( 4: 6 )
256  stag = subnam( 8:12 )
257  rprec = prec.EQ.'S' .OR. prec.EQ.'D'
258  cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
259 *
260 * Invalid value for PRECISION
261 *
262  IF( .NOT.( rprec .OR. cprec ) ) THEN
263  iparam2stage = -1
264  RETURN
265  ENDIF
266  ENDIF
267 * WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
268 * $ ' ALGO ',ALGO,' STAGE ',STAG
269 *
270 *
271  IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
272 *
273 * ISPEC = 17, 18: block size KD, IB
274 * Could be also dependent from N but for now it
275 * depend only on sequential or parallel
276 *
277  IF( nthreads.GT.4 ) THEN
278  IF( cprec ) THEN
279  kd = 128
280  ib = 32
281  ELSE
282  kd = 160
283  ib = 40
284  ENDIF
285  ELSE IF( nthreads.GT.1 ) THEN
286  IF( cprec ) THEN
287  kd = 64
288  ib = 32
289  ELSE
290  kd = 64
291  ib = 32
292  ENDIF
293  ELSE
294  IF( cprec ) THEN
295  kd = 16
296  ib = 16
297  ELSE
298  kd = 32
299  ib = 16
300  ENDIF
301  ENDIF
302  IF( ispec.EQ.17 ) iparam2stage = kd
303  IF( ispec.EQ.18 ) iparam2stage = ib
304 *
305  ELSE IF ( ispec .EQ. 19 ) THEN
306 *
307 * ISPEC = 19:
308 * LHOUS length of the Houselholder representation
309 * matrix (V,T) of the second stage. should be >= 1.
310 *
311 * Will add the VECT OPTION HERE next release
312  vect = opts(1:1)
313  IF( vect.EQ.'N' ) THEN
314  lhous = max( 1, 4*ni )
315  ELSE
316 * This is not correct, it need to call the ALGO and the stage2
317  lhous = max( 1, 4*ni ) + ibi
318  ENDIF
319  IF( lhous.GE.0 ) THEN
320  iparam2stage = lhous
321  ELSE
322  iparam2stage = -1
323  ENDIF
324 *
325  ELSE IF ( ispec .EQ. 20 ) THEN
326 *
327 * ISPEC = 20: (21 for future use)
328 * LWORK length of the workspace for
329 * either or both stages for TRD and BRD. should be >= 1.
330 * TRD:
331 * TRD_stage 1: = LT + LW + LS1 + LS2
332 * = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
333 * where LDT=LDS2=KD
334 * = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
335 * TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
336 * TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
337 * = N*KD + N*max(KD+1,FACTOPTNB)
338 * + max(2*KD*KD, KD*NTHREADS)
339 * + (KD+1)*N
340  lwork = -1
341  subnam(1:1) = prec
342  subnam(2:6) = 'GEQRF'
343  qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
344  subnam(2:6) = 'GELQF'
345  lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
346 * Could be QR or LQ for TRD and the max for BRD
347  factoptnb = max(qroptnb, lqoptnb)
348  IF( algo.EQ.'TRD' ) THEN
349  IF( stag.EQ.'2STAG' ) THEN
350  lwork = ni*nbi + ni*max(nbi+1,factoptnb)
351  $ + max(2*nbi*nbi, nbi*nthreads)
352  $ + (nbi+1)*ni
353  ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
354  lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
355  ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
356  lwork = (2*nbi+1)*ni + nbi*nthreads
357  ENDIF
358  ELSE IF( algo.EQ.'BRD' ) THEN
359  IF( stag.EQ.'2STAG' ) THEN
360  lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
361  $ + max(2*nbi*nbi, nbi*nthreads)
362  $ + (nbi+1)*ni
363  ELSE IF( stag.EQ.'GE2GB' ) THEN
364  lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
365  ELSE IF( stag.EQ.'GB2BD' ) THEN
366  lwork = (3*nbi+1)*ni + nbi*nthreads
367  ENDIF
368  ENDIF
369  lwork = max( 1, lwork )
370 
371  IF( lwork.GT.0 ) THEN
372  iparam2stage = lwork
373  ELSE
374  iparam2stage = -1
375  ENDIF
376 *
377  ELSE IF ( ispec .EQ. 21 ) THEN
378 *
379 * ISPEC = 21 for future use
380  iparam2stage = nxi
381  ENDIF
382 *
383 * ==== End of IPARAM2STAGE ====
384 *
385  END
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
integer function iparam2stage(ISPEC, NAME, OPTS, NI, NBI, IBI, NXI)
IPARAM2STAGE
Definition: iparam2stage.F:155