*> \brief \b IPARAM2STAGE * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * *> \htmlonly *> Download IPARAM2STAGE + dependencies *> *> [TGZ] *> *> [ZIP] *> *> [TXT] *> \endhtmlonly * * Definition: * =========== * * INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, * NI, NBI, IBI, NXI ) * #if defined(_OPENMP) * use omp_lib * #endif * IMPLICIT NONE * * .. Scalar Arguments .. * CHARACTER*( * ) NAME, OPTS * INTEGER ISPEC, NI, NBI, IBI, NXI * *> \par Purpose: * ============= *> *> \verbatim *> *> This program sets problem and machine dependent parameters *> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST, *> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD *> and related subroutines for eigenvalue problems. *> It is called whenever ILAENV is called with 17 <= ISPEC <= 21. *> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5 *> with a direct conversion ISPEC + 16. *> \endverbatim * * Arguments: * ========== * *> \param[in] ISPEC *> \verbatim *> ISPEC is integer scalar *> ISPEC specifies which tunable parameter IPARAM2STAGE should *> return. *> *> ISPEC=17: the optimal blocksize nb for the reduction to *> BAND *> *> ISPEC=18: the optimal blocksize ib for the eigenvectors *> singular vectors update routine *> *> ISPEC=19: The length of the array that store the Housholder *> representation for the second stage *> Band to Tridiagonal or Bidiagonal *> *> ISPEC=20: The workspace needed for the routine in input. *> *> ISPEC=21: For future release. *> \endverbatim *> *> \param[in] NAME *> \verbatim *> NAME is character string *> Name of the calling subroutine *> \endverbatim *> *> \param[in] OPTS *> \verbatim *> OPTS is CHARACTER*(*) *> The character options to the subroutine NAME, concatenated *> into a single character string. For example, UPLO = 'U', *> TRANS = 'T', and DIAG = 'N' for a triangular routine would *> be specified as OPTS = 'UTN'. *> \endverbatim *> *> \param[in] NI *> \verbatim *> NI is INTEGER which is the size of the matrix *> \endverbatim *> *> \param[in] NBI *> \verbatim *> NBI is INTEGER which is the used in the reduciton, *> (e.g., the size of the band), needed to compute workspace *> and LHOUS2. *> \endverbatim *> *> \param[in] IBI *> \verbatim *> IBI is INTEGER which represent the IB of the reduciton, *> needed to compute workspace and LHOUS2. *> \endverbatim *> *> \param[in] NXI *> \verbatim *> NXI is INTEGER needed in the future release. *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date June 2016 * *> \ingroup auxOTHERauxiliary * *> \par Further Details: * ===================== *> *> \verbatim *> *> Implemented by Azzam Haidar. *> *> All detail are available on technical report, SC11, SC13 papers. *> *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. *> Parallel reduction to condensed forms for symmetric eigenvalue problems *> using aggregated fine-grained and memory-aware kernels. In Proceedings *> of 2011 International Conference for High Performance Computing, *> Networking, Storage and Analysis (SC '11), New York, NY, USA, *> Article 8 , 11 pages. *> http://doi.acm.org/10.1145/2063384.2063394 *> *> A. Haidar, J. Kurzak, P. Luszczek, 2013. *> An improved parallel singular value algorithm and its implementation *> for multicore hardware, In Proceedings of 2013 International Conference *> for High Performance Computing, Networking, Storage and Analysis (SC '13). *> Denver, Colorado, USA, 2013. *> Article 90, 12 pages. *> http://doi.acm.org/10.1145/2503210.2503292 *> *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure *> calculations based on fine-grained memory aware tasks. *> International Journal of High Performance Computing Applications. *> Volume 28 Issue 2, Pages 196-209, May 2014. *> http://hpc.sagepub.com/content/28/2/196 *> *> \endverbatim *> * ===================================================================== INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, $ NI, NBI, IBI, NXI ) #if defined(_OPENMP) use omp_lib #endif IMPLICIT NONE * * -- LAPACK auxiliary routine (version 3.8.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * June 2016 * * .. Scalar Arguments .. CHARACTER*( * ) NAME, OPTS INTEGER ISPEC, NI, NBI, IBI, NXI * * ================================================================ * .. * .. Local Scalars .. INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS, $ FACTOPTNB, QROPTNB, LQOPTNB LOGICAL RPREC, CPREC CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1 * .. * .. Intrinsic Functions .. INTRINSIC CHAR, ICHAR, MAX * .. * .. External Functions .. INTEGER ILAENV EXTERNAL ILAENV * .. * .. Executable Statements .. * * Invalid value for ISPEC * IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN IPARAM2STAGE = -1 RETURN ENDIF * * Get the number of threads * NTHREADS = 1 #if defined(_OPENMP) !$OMP PARALLEL NTHREADS = OMP_GET_NUM_THREADS() !$OMP END PARALLEL #endif * WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC * IF( ISPEC .NE. 19 ) THEN * * Convert NAME to upper case if the first character is lower case. * IPARAM2STAGE = -1 SUBNAM = NAME IC = ICHAR( SUBNAM( 1: 1 ) ) IZ = ICHAR( 'Z' ) IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN * * ASCII character set * IF( IC.GE.97 .AND. IC.LE.122 ) THEN SUBNAM( 1: 1 ) = CHAR( IC-32 ) DO 100 I = 2, 12 IC = ICHAR( SUBNAM( I: I ) ) IF( IC.GE.97 .AND. IC.LE.122 ) $ SUBNAM( I: I ) = CHAR( IC-32 ) 100 CONTINUE END IF * ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN * * EBCDIC character set * IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN SUBNAM( 1: 1 ) = CHAR( IC+64 ) DO 110 I = 2, 12 IC = ICHAR( SUBNAM( I: I ) ) IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: $ I ) = CHAR( IC+64 ) 110 CONTINUE END IF * ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN * * Prime machines: ASCII+128 * IF( IC.GE.225 .AND. IC.LE.250 ) THEN SUBNAM( 1: 1 ) = CHAR( IC-32 ) DO 120 I = 2, 12 IC = ICHAR( SUBNAM( I: I ) ) IF( IC.GE.225 .AND. IC.LE.250 ) $ SUBNAM( I: I ) = CHAR( IC-32 ) 120 CONTINUE END IF END IF * PREC = SUBNAM( 1: 1 ) ALGO = SUBNAM( 4: 6 ) STAG = SUBNAM( 8:12 ) RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D' CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z' * * Invalid value for PRECISION * IF( .NOT.( RPREC .OR. CPREC ) ) THEN IPARAM2STAGE = -1 RETURN ENDIF ENDIF * WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC, * $ ' ALGO ',ALGO,' STAGE ',STAG * * IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN * * ISPEC = 17, 18: block size KD, IB * Could be also dependent from N but for now it * depend only on sequential or parallel * IF( NTHREADS.GT.4 ) THEN IF( CPREC ) THEN KD = 128 IB = 32 ELSE KD = 160 IB = 40 ENDIF ELSE IF( NTHREADS.GT.1 ) THEN IF( CPREC ) THEN KD = 64 IB = 32 ELSE KD = 64 IB = 32 ENDIF ELSE IF( CPREC ) THEN KD = 16 IB = 16 ELSE KD = 32 IB = 16 ENDIF ENDIF IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB * ELSE IF ( ISPEC .EQ. 19 ) THEN * * ISPEC = 19: * LHOUS length of the Houselholder representation * matrix (V,T) of the second stage. should be >= 1. * * Will add the VECT OPTION HERE next release VECT = OPTS(1:1) IF( VECT.EQ.'N' ) THEN LHOUS = MAX( 1, 4*NI ) ELSE * This is not correct, it need to call the ALGO and the stage2 LHOUS = MAX( 1, 4*NI ) + IBI ENDIF IF( LHOUS.GE.0 ) THEN IPARAM2STAGE = LHOUS ELSE IPARAM2STAGE = -1 ENDIF * ELSE IF ( ISPEC .EQ. 20 ) THEN * * ISPEC = 20: (21 for future use) * LWORK length of the workspace for * either or both stages for TRD and BRD. should be >= 1. * TRD: * TRD_stage 1: = LT + LW + LS1 + LS2 * = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD * where LDT=LDS2=KD * = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD * TRD_stage 2: = (2NB+1)*N + KD*NTHREADS * TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N ) * = N*KD + N*max(KD+1,FACTOPTNB) * + max(2*KD*KD, KD*NTHREADS) * + (KD+1)*N LWORK = -1 SUBNAM(1:1) = PREC SUBNAM(2:6) = 'GEQRF' QROPTNB = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 ) SUBNAM(2:6) = 'GELQF' LQOPTNB = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 ) * Could be QR or LQ for TRD and the max for BRD FACTOPTNB = MAX(QROPTNB, LQOPTNB) IF( ALGO.EQ.'TRD' ) THEN IF( STAG.EQ.'2STAG' ) THEN LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) $ + MAX(2*NBI*NBI, NBI*NTHREADS) $ + (NBI+1)*NI ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN LWORK = (2*NBI+1)*NI + NBI*NTHREADS ENDIF ELSE IF( ALGO.EQ.'BRD' ) THEN IF( STAG.EQ.'2STAG' ) THEN LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) $ + MAX(2*NBI*NBI, NBI*NTHREADS) $ + (NBI+1)*NI ELSE IF( STAG.EQ.'GE2GB' ) THEN LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI ELSE IF( STAG.EQ.'GB2BD' ) THEN LWORK = (3*NBI+1)*NI + NBI*NTHREADS ENDIF ENDIF LWORK = MAX ( 1, LWORK ) IF( LWORK.GT.0 ) THEN IPARAM2STAGE = LWORK ELSE IPARAM2STAGE = -1 ENDIF * ELSE IF ( ISPEC .EQ. 21 ) THEN * * ISPEC = 21 for future use IPARAM2STAGE = NXI ENDIF * * ==== End of IPARAM2STAGE ==== * END