LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ iparam2stage()

integer function iparam2stage ( integer  ISPEC,
character*( * )  NAME,
character*( * )  OPTS,
integer  NI,
integer  NBI,
integer  IBI,
integer  NXI 
)

IPARAM2STAGE

Download IPARAM2STAGE + dependencies [TGZ] [ZIP] [TXT]

Purpose:
      This program sets problem and machine dependent parameters
      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
      and related subroutines for eigenvalue problems. 
      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
      with a direct conversion ISPEC + 16.
Parameters
[in]ISPEC
          ISPEC is integer scalar
              ISPEC specifies which tunable parameter IPARAM2STAGE should
              return.

              ISPEC=17: the optimal blocksize nb for the reduction to
                        BAND

              ISPEC=18: the optimal blocksize ib for the eigenvectors
                        singular vectors update routine

              ISPEC=19: The length of the array that store the Housholder 
                        representation for the second stage 
                        Band to Tridiagonal or Bidiagonal

              ISPEC=20: The workspace needed for the routine in input.

              ISPEC=21: For future release.
[in]NAME
          NAME is character string
               Name of the calling subroutine
[in]OPTS
          OPTS is CHARACTER*(*)
          The character options to the subroutine NAME, concatenated
          into a single character string.  For example, UPLO = 'U',
          TRANS = 'T', and DIAG = 'N' for a triangular routine would
          be specified as OPTS = 'UTN'.
[in]NI
          NI is INTEGER which is the size of the matrix
[in]NBI
          NBI is INTEGER which is the used in the reduciton, 
          (e.g., the size of the band), needed to compute workspace
          and LHOUS2.
[in]IBI
          IBI is INTEGER which represent the IB of the reduciton,
          needed to compute workspace and LHOUS2.
[in]NXI
          NXI is INTEGER needed in the future release.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
  Implemented by Azzam Haidar.

  All detail are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196 

Definition at line 153 of file iparam2stage.F.

155 #if defined(_OPENMP)
156  use omp_lib
157 #endif
158  IMPLICIT NONE
159 *
160 * -- LAPACK auxiliary routine --
161 * -- LAPACK is a software package provided by Univ. of Tennessee, --
162 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163 *
164 * .. Scalar Arguments ..
165  CHARACTER*( * ) NAME, OPTS
166  INTEGER ISPEC, NI, NBI, IBI, NXI
167 *
168 * ================================================================
169 * ..
170 * .. Local Scalars ..
171  INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
172  $ FACTOPTNB, QROPTNB, LQOPTNB
173  LOGICAL RPREC, CPREC
174  CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
175 * ..
176 * .. Intrinsic Functions ..
177  INTRINSIC char, ichar, max
178 * ..
179 * .. External Functions ..
180  INTEGER ILAENV
181  EXTERNAL ilaenv
182 * ..
183 * .. Executable Statements ..
184 *
185 * Invalid value for ISPEC
186 *
187  IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
188  iparam2stage = -1
189  RETURN
190  ENDIF
191 *
192 * Get the number of threads
193 *
194  nthreads = 1
195 #if defined(_OPENMP)
196 !$OMP PARALLEL
197  nthreads = omp_get_num_threads()
198 !$OMP END PARALLEL
199 #endif
200 * WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
201 *
202  IF( ispec .NE. 19 ) THEN
203 *
204 * Convert NAME to upper case if the first character is lower case.
205 *
206  iparam2stage = -1
207  subnam = name
208  ic = ichar( subnam( 1: 1 ) )
209  iz = ichar( 'Z' )
210  IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
211 *
212 * ASCII character set
213 *
214  IF( ic.GE.97 .AND. ic.LE.122 ) THEN
215  subnam( 1: 1 ) = char( ic-32 )
216  DO 100 i = 2, 12
217  ic = ichar( subnam( i: i ) )
218  IF( ic.GE.97 .AND. ic.LE.122 )
219  $ subnam( i: i ) = char( ic-32 )
220  100 CONTINUE
221  END IF
222 *
223  ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
224 *
225 * EBCDIC character set
226 *
227  IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
228  $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
229  $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
230  subnam( 1: 1 ) = char( ic+64 )
231  DO 110 i = 2, 12
232  ic = ichar( subnam( i: i ) )
233  IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
234  $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
235  $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
236  $ i ) = char( ic+64 )
237  110 CONTINUE
238  END IF
239 *
240  ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
241 *
242 * Prime machines: ASCII+128
243 *
244  IF( ic.GE.225 .AND. ic.LE.250 ) THEN
245  subnam( 1: 1 ) = char( ic-32 )
246  DO 120 i = 2, 12
247  ic = ichar( subnam( i: i ) )
248  IF( ic.GE.225 .AND. ic.LE.250 )
249  $ subnam( i: i ) = char( ic-32 )
250  120 CONTINUE
251  END IF
252  END IF
253 *
254  prec = subnam( 1: 1 )
255  algo = subnam( 4: 6 )
256  stag = subnam( 8:12 )
257  rprec = prec.EQ.'S' .OR. prec.EQ.'D'
258  cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
259 *
260 * Invalid value for PRECISION
261 *
262  IF( .NOT.( rprec .OR. cprec ) ) THEN
263  iparam2stage = -1
264  RETURN
265  ENDIF
266  ENDIF
267 * WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
268 * $ ' ALGO ',ALGO,' STAGE ',STAG
269 *
270 *
271  IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
272 *
273 * ISPEC = 17, 18: block size KD, IB
274 * Could be also dependent from N but for now it
275 * depend only on sequential or parallel
276 *
277  IF( nthreads.GT.4 ) THEN
278  IF( cprec ) THEN
279  kd = 128
280  ib = 32
281  ELSE
282  kd = 160
283  ib = 40
284  ENDIF
285  ELSE IF( nthreads.GT.1 ) THEN
286  IF( cprec ) THEN
287  kd = 64
288  ib = 32
289  ELSE
290  kd = 64
291  ib = 32
292  ENDIF
293  ELSE
294  IF( cprec ) THEN
295  kd = 16
296  ib = 16
297  ELSE
298  kd = 32
299  ib = 16
300  ENDIF
301  ENDIF
302  IF( ispec.EQ.17 ) iparam2stage = kd
303  IF( ispec.EQ.18 ) iparam2stage = ib
304 *
305  ELSE IF ( ispec .EQ. 19 ) THEN
306 *
307 * ISPEC = 19:
308 * LHOUS length of the Houselholder representation
309 * matrix (V,T) of the second stage. should be >= 1.
310 *
311 * Will add the VECT OPTION HERE next release
312  vect = opts(1:1)
313  IF( vect.EQ.'N' ) THEN
314  lhous = max( 1, 4*ni )
315  ELSE
316 * This is not correct, it need to call the ALGO and the stage2
317  lhous = max( 1, 4*ni ) + ibi
318  ENDIF
319  IF( lhous.GE.0 ) THEN
320  iparam2stage = lhous
321  ELSE
322  iparam2stage = -1
323  ENDIF
324 *
325  ELSE IF ( ispec .EQ. 20 ) THEN
326 *
327 * ISPEC = 20: (21 for future use)
328 * LWORK length of the workspace for
329 * either or both stages for TRD and BRD. should be >= 1.
330 * TRD:
331 * TRD_stage 1: = LT + LW + LS1 + LS2
332 * = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
333 * where LDT=LDS2=KD
334 * = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
335 * TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
336 * TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
337 * = N*KD + N*max(KD+1,FACTOPTNB)
338 * + max(2*KD*KD, KD*NTHREADS)
339 * + (KD+1)*N
340  lwork = -1
341  subnam(1:1) = prec
342  subnam(2:6) = 'GEQRF'
343  qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
344  subnam(2:6) = 'GELQF'
345  lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
346 * Could be QR or LQ for TRD and the max for BRD
347  factoptnb = max(qroptnb, lqoptnb)
348  IF( algo.EQ.'TRD' ) THEN
349  IF( stag.EQ.'2STAG' ) THEN
350  lwork = ni*nbi + ni*max(nbi+1,factoptnb)
351  $ + max(2*nbi*nbi, nbi*nthreads)
352  $ + (nbi+1)*ni
353  ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
354  lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
355  ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
356  lwork = (2*nbi+1)*ni + nbi*nthreads
357  ENDIF
358  ELSE IF( algo.EQ.'BRD' ) THEN
359  IF( stag.EQ.'2STAG' ) THEN
360  lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
361  $ + max(2*nbi*nbi, nbi*nthreads)
362  $ + (nbi+1)*ni
363  ELSE IF( stag.EQ.'GE2GB' ) THEN
364  lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
365  ELSE IF( stag.EQ.'GB2BD' ) THEN
366  lwork = (3*nbi+1)*ni + nbi*nthreads
367  ENDIF
368  ENDIF
369  lwork = max( 1, lwork )
370 
371  IF( lwork.GT.0 ) THEN
372  iparam2stage = lwork
373  ELSE
374  iparam2stage = -1
375  ENDIF
376 *
377  ELSE IF ( ispec .EQ. 21 ) THEN
378 *
379 * ISPEC = 21 for future use
380  iparam2stage = nxi
381  ENDIF
382 *
383 * ==== End of IPARAM2STAGE ====
384 *
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
integer function iparam2stage(ISPEC, NAME, OPTS, NI, NBI, IBI, NXI)
IPARAM2STAGE
Definition: iparam2stage.F:155
Here is the call graph for this function:
Here is the caller graph for this function: