LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ iparam2stage()

integer function iparam2stage ( integer  ispec,
character*( * )  name,
character*( * )  opts,
integer  ni,
integer  nbi,
integer  ibi,
integer  nxi 
)

IPARAM2STAGE

Download IPARAM2STAGE + dependencies [TGZ] [ZIP] [TXT]

Purpose:
      This program sets problem and machine dependent parameters
      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
      and related subroutines for eigenvalue problems.
      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
      with a direct conversion ISPEC + 16.
Parameters
[in]ISPEC
          ISPEC is integer scalar
              ISPEC specifies which tunable parameter IPARAM2STAGE should
              return.

              ISPEC=17: the optimal blocksize nb for the reduction to
                        BAND

              ISPEC=18: the optimal blocksize ib for the eigenvectors
                        singular vectors update routine

              ISPEC=19: The length of the array that store the Housholder
                        representation for the second stage
                        Band to Tridiagonal or Bidiagonal

              ISPEC=20: The workspace needed for the routine in input.

              ISPEC=21: For future release.
[in]NAME
          NAME is character string
               Name of the calling subroutine
[in]OPTS
          OPTS is CHARACTER*(*)
          The character options to the subroutine NAME, concatenated
          into a single character string.  For example, UPLO = 'U',
          TRANS = 'T', and DIAG = 'N' for a triangular routine would
          be specified as OPTS = 'UTN'.
[in]NI
          NI is INTEGER which is the size of the matrix
[in]NBI
          NBI is INTEGER which is the used in the reduction,
          (e.g., the size of the band), needed to compute workspace
          and LHOUS2.
[in]IBI
          IBI is INTEGER which represent the IB of the reduction,
          needed to compute workspace and LHOUS2.
[in]NXI
          NXI is INTEGER needed in the future release.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
  Implemented by Azzam Haidar.

  All detail are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 153 of file iparam2stage.F.

155#if defined(_OPENMP)
156 use omp_lib
157#endif
158 IMPLICIT NONE
159*
160* -- LAPACK auxiliary routine --
161* -- LAPACK is a software package provided by Univ. of Tennessee, --
162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164* .. Scalar Arguments ..
165 CHARACTER*( * ) NAME, OPTS
166 INTEGER ISPEC, NI, NBI, IBI, NXI
167*
168* ================================================================
169* ..
170* .. Local Scalars ..
171 INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
172 $ FACTOPTNB, QROPTNB, LQOPTNB
173 LOGICAL RPREC, CPREC
174 CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
175* ..
176* .. Intrinsic Functions ..
177 INTRINSIC char, ichar, max
178* ..
179* .. External Functions ..
180 INTEGER ILAENV
181 LOGICAL LSAME
182 EXTERNAL ilaenv, lsame
183* ..
184* .. Executable Statements ..
185*
186* Invalid value for ISPEC
187*
188 IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
189 iparam2stage = -1
190 RETURN
191 ENDIF
192*
193* Get the number of threads
194*
195 nthreads = 1
196#if defined(_OPENMP)
197!$OMP PARALLEL
198 nthreads = omp_get_num_threads()
199!$OMP END PARALLEL
200#endif
201* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
202*
203 IF( ispec .NE. 19 ) THEN
204*
205* Convert NAME to upper case if the first character is lower case.
206*
207 iparam2stage = -1
208 subnam = name
209 ic = ichar( subnam( 1: 1 ) )
210 iz = ichar( 'Z' )
211 IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
212*
213* ASCII character set
214*
215 IF( ic.GE.97 .AND. ic.LE.122 ) THEN
216 subnam( 1: 1 ) = char( ic-32 )
217 DO 100 i = 2, 12
218 ic = ichar( subnam( i: i ) )
219 IF( ic.GE.97 .AND. ic.LE.122 )
220 $ subnam( i: i ) = char( ic-32 )
221 100 CONTINUE
222 END IF
223*
224 ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
225*
226* EBCDIC character set
227*
228 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
229 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
230 $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
231 subnam( 1: 1 ) = char( ic+64 )
232 DO 110 i = 2, 12
233 ic = ichar( subnam( i: i ) )
234 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
235 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
236 $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
237 $ i ) = char( ic+64 )
238 110 CONTINUE
239 END IF
240*
241 ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
242*
243* Prime machines: ASCII+128
244*
245 IF( ic.GE.225 .AND. ic.LE.250 ) THEN
246 subnam( 1: 1 ) = char( ic-32 )
247 DO 120 i = 2, 12
248 ic = ichar( subnam( i: i ) )
249 IF( ic.GE.225 .AND. ic.LE.250 )
250 $ subnam( i: i ) = char( ic-32 )
251 120 CONTINUE
252 END IF
253 END IF
254*
255 prec = subnam( 1: 1 )
256 algo = subnam( 4: 6 )
257 stag = subnam( 8:12 )
258 rprec = prec.EQ.'S' .OR. prec.EQ.'D'
259 cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
260*
261* Invalid value for PRECISION
262*
263 IF( .NOT.( rprec .OR. cprec ) ) THEN
264 iparam2stage = -1
265 RETURN
266 ENDIF
267 ENDIF
268* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
269* $ ' ALGO ',ALGO,' STAGE ',STAG
270*
271*
272 IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
273*
274* ISPEC = 17, 18: block size KD, IB
275* Could be also dependent from N but for now it
276* depend only on sequential or parallel
277*
278 IF( nthreads.GT.4 ) THEN
279 IF( cprec ) THEN
280 kd = 128
281 ib = 32
282 ELSE
283 kd = 160
284 ib = 40
285 ENDIF
286 ELSE IF( nthreads.GT.1 ) THEN
287 IF( cprec ) THEN
288 kd = 64
289 ib = 32
290 ELSE
291 kd = 64
292 ib = 32
293 ENDIF
294 ELSE
295 IF( cprec ) THEN
296 kd = 16
297 ib = 16
298 ELSE
299 kd = 32
300 ib = 16
301 ENDIF
302 ENDIF
303 IF( ispec.EQ.17 ) iparam2stage = kd
304 IF( ispec.EQ.18 ) iparam2stage = ib
305*
306 ELSE IF ( ispec .EQ. 19 ) THEN
307*
308* ISPEC = 19:
309* LHOUS length of the Houselholder representation
310* matrix (V,T) of the second stage. should be >= 1.
311*
312* Will add the VECT OPTION HERE next release
313 vect = opts(1:1)
314 IF( lsame( vect, 'N' ) ) THEN
315 lhous = max( 1, 4*ni )
316 ELSE
317* This is not correct, it need to call the ALGO and the stage2
318 lhous = max( 1, 4*ni ) + ibi
319 ENDIF
320 IF( lhous.GE.0 ) THEN
321 iparam2stage = lhous
322 ELSE
323 iparam2stage = -1
324 ENDIF
325*
326 ELSE IF ( ispec .EQ. 20 ) THEN
327*
328* ISPEC = 20: (21 for future use)
329* LWORK length of the workspace for
330* either or both stages for TRD and BRD. should be >= 1.
331* TRD:
332* TRD_stage 1: = LT + LW + LS1 + LS2
333* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
334* where LDT=LDS2=KD
335* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
336* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
337* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
338* = N*KD + N*max(KD+1,FACTOPTNB)
339* + max(2*KD*KD, KD*NTHREADS)
340* + (KD+1)*N
341 lwork = -1
342 subnam(1:1) = prec
343 subnam(2:6) = 'GEQRF'
344 qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
345 subnam(2:6) = 'GELQF'
346 lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
347* Could be QR or LQ for TRD and the max for BRD
348 factoptnb = max(qroptnb, lqoptnb)
349 IF( algo.EQ.'TRD' ) THEN
350 IF( stag.EQ.'2STAG' ) THEN
351 lwork = ni*nbi + ni*max(nbi+1,factoptnb)
352 $ + max(2*nbi*nbi, nbi*nthreads)
353 $ + (nbi+1)*ni
354 ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
355 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
356 ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
357 lwork = (2*nbi+1)*ni + nbi*nthreads
358 ENDIF
359 ELSE IF( algo.EQ.'BRD' ) THEN
360 IF( stag.EQ.'2STAG' ) THEN
361 lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
362 $ + max(2*nbi*nbi, nbi*nthreads)
363 $ + (nbi+1)*ni
364 ELSE IF( stag.EQ.'GE2GB' ) THEN
365 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
366 ELSE IF( stag.EQ.'GB2BD' ) THEN
367 lwork = (3*nbi+1)*ni + nbi*nthreads
368 ENDIF
369 ENDIF
370 lwork = max( 1, lwork )
371
372 IF( lwork.GT.0 ) THEN
373 iparam2stage = lwork
374 ELSE
375 iparam2stage = -1
376 ENDIF
377*
378 ELSE IF ( ispec .EQ. 21 ) THEN
379*
380* ISPEC = 21 for future use
381 iparam2stage = nxi
382 ENDIF
383*
384* ==== End of IPARAM2STAGE ====
385*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
integer function iparam2stage(ispec, name, opts, ni, nbi, ibi, nxi)
IPARAM2STAGE
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: