d0/d32/pssyngst_8f_source.html

      SUBROUTINE pssyngst( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB,

     $                     DESCB, SCALE, WORK, LWORK, INFO )

*

*  -- ScaLAPACK routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     October 15, 1999

*

*     .. Scalar Arguments ..

      CHARACTER          UPLO

      INTEGER            IA, IB, IBTYPE, INFO, JA, JB, LWORK, N

      REAL               SCALE

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * ), DESCB( * )

      REAL               A( * ), B( * ), WORK( * )

*     ..

*

*  Purpose

*

*  =======

*

*  PSSYNGST reduces a complex Hermitian-definite generalized

*  eigenproblem to standard form.

*

*  PSSYNGST performs the same function as PSHEGST, but is based on

*  rank 2K updates, which are faster and more scalable than

*  triangular solves (the basis of PSSYNGST).

*

*  PSSYNGST calls PSHEGST when UPLO='U', hence PSHENGST provides

*  improved performance only when UPLO='L', IBTYPE=1.

*

*  PSSYNGST also calls PSHEGST when insufficient workspace is

*  provided,  hence PSSYNGST provides improved

*  performance only when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB

*

*  In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and

*  sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ).

*

*  If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x,

*  and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or

*  inv(L)*sub( A )*inv(L**H)

*

*  If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or

*  sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by

*  U*sub( A )*U**H or L**H*sub( A )*L.

*

*  sub( B ) must have been previously factorized as U**H*U or L*L**H by

*  PSPOTRF.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  IBTYPE   (global input) INTEGER

*          = 1: compute inv(U**H)*sub( A )*inv(U) or

*               inv(L)*sub( A )*inv(L**H);

*          = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L.

*

*  UPLO    (global input) CHARACTER

*          = 'U':  Upper triangle of sub( A ) is stored and sub( B ) is

*                  factored as U**H*U;

*          = 'L':  Lower triangle of sub( A ) is stored and sub( B ) is

*                  factored as L*L**H.

*

*  N       (global input) INTEGER

*          The order of the matrices sub( A ) and sub( B ).  N >= 0.

*

*  A       (local input/local output) REAL pointer into the

*          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).

*          On entry, this array contains the local pieces of the

*          N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',

*          the leading N-by-N upper triangular part of sub( A ) contains

*          the upper triangular part of the matrix, and its strictly

*          lower triangular part is not referenced.  If UPLO = 'L', the

*          leading N-by-N lower triangular part of sub( A ) contains

*          the lower triangular part of the matrix, and its strictly

*          upper triangular part is not referenced.

*

*          On exit, if INFO = 0, the transformed matrix, stored in the

*          same format as sub( A ).

*

*  IA      (global input) INTEGER

*          A's global row index, which points to the beginning of the

*          submatrix which is to be operated on.

*

*  JA      (global input) INTEGER

*          A's global column index, which points to the beginning of

*          the submatrix which is to be operated on.

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  B       (local input) REAL pointer into the local memory

*          to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry,

*          this array contains the local pieces of the triangular factor

*          from the Cholesky factorization of sub( B ), as returned by

*          PSPOTRF.

*

*  IB      (global input) INTEGER

*          B's global row index, which points to the beginning of the

*          submatrix which is to be operated on.

*

*  JB      (global input) INTEGER

*          B's global column index, which points to the beginning of

*          the submatrix which is to be operated on.

*

*  DESCB   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix B.

*

*  SCALE   (global output) REAL

*          Amount by which the eigenvalues should be scaled to

*          compensate for the scaling performed in this routine.

*          At present, SCALE is always returned as 1.0, it is

*          returned here to allow for future enhancement.

*

*  WORK    (local workspace/local output) REAL array,

*                                                  dimension (LWORK)

*          On exit, WORK( 1 ) returns the minimal and optimal LWORK.

*

*  LWORK   (local or global input) INTEGER

*          The dimension of the array WORK.

*          LWORK is local input and must be at least

*          LWORK >= MAX( NB * ( NP0 +1 ), 3 * NB )

*

*          When IBTYPE = 1 and UPLO = 'L', PSSYNGST provides improved

*          performance when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB

*

*          where NB = MB_A = NB_A,

*          NP0 = NUMROC( N, NB, 0, 0, NPROW ),

*          NQ0 = NUMROC( N, NB, 0, 0, NPROW ),

*

*          NUMROC ia a ScaLAPACK tool functions

*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling

*          the subroutine BLACS_GRIDINFO.

*

*          If LWORK = -1, then LWORK is global input and a workspace

*          query is assumed; the routine only calculates the

*          optimal size for all work arrays. Each of these

*          values is returned in the first entry of the corresponding

*          work array, and no error message is issued by PXERBLA.

*

*  INFO    (global output) INTEGER

*          = 0:  successful exit

*          < 0:  If the i-th argument is an array and the j-entry had

*                an illegal value, then INFO = -(i*100+j), if the i-th

*                argument is a scalar and had an illegal value, then

*                INFO = -i.

*

*  =====================================================================

*

*

*

*     .. Parameters ..

      REAL               ONEHALF, ONE, MONE

      parameter( onehalf = 0.5e0, one = 1.0e0, mone = -1.0e0 )

      INTEGER            DLEN_, CTXT_, MB_, NB_, RSRC_, CSRC_, LLD_

      parameter( dlen_ = 9, ctxt_ = 2, mb_ = 5, nb_ = 6,

     $                   rsrc_ = 7, csrc_ = 8, lld_ = 9 )

*     ..

*     .. Local Scalars ..

      LOGICAL            LQUERY, UPPER

      INTEGER            I, IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,

     $                   ictxt, indaa, indg, indr, indrt, iroffa,

     $                   iroffb, j, k, kb, lwmin, lwopt, mycol, myrow,

     $                   nb, np0, npcol, npk, nprow, nq0, postk

*     ..

*     .. Local Arrays ..

      INTEGER            DESCAA( DLEN_ ), DESCG( DLEN_ ),

     $                   descr( dlen_ ), descrt( dlen_ ), idum1( 2 ),

     $                   idum2( 2 )

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            INDXG2P, NUMROC

      EXTERNAL           lsame, indxg2p, numroc

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, chk1mat, descset, pchk2mat,

     $                   psgemm, pslacpy, pssygst, pssymm, pssyr2k,

     $                   pstrsm, pxerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          ichar, max, min, mod, real

*     ..

*     .. Executable Statements ..

      ictxt = desca( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

      scale = 1.0e0

*

      nb = desca( mb_ )

*

*

*     Test the input parameters

*

      info = 0

      IF( nprow.EQ.-1 ) THEN

         info = -( 700+ctxt_ )

      ELSE

         upper = lsame( uplo, 'U' )

         CALL chk1mat( n, 3, n, 3, ia, ja, desca, 7, info )

         CALL chk1mat( n, 3, n, 3, ib, jb, descb, 11, info )

         IF( info.EQ.0 ) THEN

            iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),

     $              nprow )

            ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),

     $              nprow )

            iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),

     $              npcol )

            ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),

     $              npcol )

            iroffa = mod( ia-1, desca( mb_ ) )

            icoffa = mod( ja-1, desca( nb_ ) )

            iroffb = mod( ib-1, descb( mb_ ) )

            icoffb = mod( jb-1, descb( nb_ ) )

            np0 = numroc( n, nb, 0, 0, nprow )

            nq0 = numroc( n, nb, 0, 0, npcol )

            lwmin = max( nb*( np0+1 ), 3*nb )

            IF( ibtype.EQ.1 .AND. .NOT.upper ) THEN

               lwopt = 2*np0*nb + nq0*nb + nb*nb

            ELSE

               lwopt = lwmin

            END IF

            work( 1 ) = real( lwopt )

            lquery = ( lwork.EQ.-1 )

            IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN

               info = -1

            ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

               info = -2

            ELSE IF( n.LT.0 ) THEN

               info = -3

            ELSE IF( iroffa.NE.0 ) THEN

               info = -5

            ELSE IF( icoffa.NE.0 ) THEN

               info = -6

            ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN

               info = -( 700+nb_ )

            ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN

               info = -9

            ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN

               info = -10

            ELSE IF( descb( mb_ ).NE.desca( mb_ ) ) THEN

               info = -( 1100+mb_ )

            ELSE IF( descb( nb_ ).NE.desca( nb_ ) ) THEN

               info = -( 1100+nb_ )

            ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN

               info = -( 1100+ctxt_ )

            ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN

               info = -13

            END IF

         END IF

         idum1( 1 ) = ibtype

         idum2( 1 ) = 1

         IF( upper ) THEN

            idum1( 2 ) = ichar( 'U' )

         ELSE

            idum1( 2 ) = ichar( 'L' )

         END IF

         idum2( 2 ) = 2

         CALL pchk2mat( n, 3, n, 3, ia, ja, desca, 7, n, 3, n, 3, ib,

     $                  jb, descb, 11, 2, idum1, idum2, info )

      END IF

*

      IF( info.NE.0 ) THEN

         CALL pxerbla( ictxt, 'PSSYNGST', -info )

         RETURN

      ELSE IF( lquery ) THEN

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( n.EQ.0 )

     $   RETURN

*

*

      IF( ibtype.NE.1 .OR. upper .OR. lwork.LT.lwopt ) THEN

         CALL pssygst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,

     $                 descb, scale, info )

         RETURN

      END IF

*

      CALL descset( descg, n, nb, nb, nb, iarow, iacol, ictxt, np0 )

      CALL descset( descr, n, nb, nb, nb, iarow, iacol, ictxt, np0 )

      CALL descset( descrt, nb, n, nb, nb, iarow, iacol, ictxt, nb )

      CALL descset( descaa, nb, nb, nb, nb, iarow, iacol, ictxt, nb )

*

      indg = 1

      indr = indg + descg( lld_ )*nb

      indaa = indr + descr( lld_ )*nb

      indrt = indaa + descaa( lld_ )*nb

*

      DO 30 k = 1, n, nb

*

         kb = min( n-k+1, nb )

         postk = k + kb

         npk = n - postk + 1

*

*

         CALL pslacpy( 'A', n-postk+1, kb, b, postk+ib-1, k+jb-1, descb,

     $                 work( indg ), postk, 1, descg )

         CALL pslacpy( 'A', n-postk+1, kb, a, postk+ia-1, k+ja-1, desca,

     $                 work( indr ), postk, 1, descr )

         CALL pslacpy( 'A', kb, k-1, a, k+ia-1, ja, desca,

     $                 work( indrt ), 1, 1, descrt )

*

         CALL pslacpy( 'L', kb, kb, a, k+ia-1, k+ja-1, desca,

     $                 work( indr ), k, 1, descr )

         CALL pstrsm( 'Right', 'L', 'N', 'N', npk, kb, mone, b, k+ib-1,

     $                k+jb-1, descb, work( indg ), postk, 1, descg )

*

         CALL pssymm( 'Right', 'L', npk, kb, onehalf, a, k+ia-1, k+ja-1,

     $                desca, work( indg ), postk, 1, descg, one,

     $                work( indr ), postk, 1, descr )

*

         CALL pssyr2k( 'Lower', 'No T', npk, kb, one, work( indg ),

     $                 postk, 1, descg, work( indr ), postk, 1, descr,

     $                 one, a, postk+ia-1, postk+ja-1, desca )

*

         CALL psgemm( 'No T', 'No Conj', npk, k-1, kb, one,

     $                work( indg ), postk, 1, descg, work( indrt ), 1,

     $                1, descrt, one, a, postk+ia-1, ja, desca )

*

         CALL pssymm( 'Right', 'L', npk, kb, one, work( indr ), k, 1,

     $                descr, work( indg ), postk, 1, descg, one, a,

     $                postk+ia-1, k+ja-1, desca )

*

         CALL pstrsm( 'Left', 'Lower', 'No Conj', 'Non-unit', kb, k-1,

     $                one, b, k+ib-1, k+jb-1, descb, a, k+ia-1, ja,

     $                desca )

*

         CALL pslacpy( 'L', kb, kb, a, k+ia-1, k+ja-1, desca,

     $                 work( indaa ), 1, 1, descaa )

*

         IF( myrow.EQ.descaa( rsrc_ ) .AND. mycol.EQ.descaa( csrc_ ) )

     $        THEN

            DO 20 i = 1, kb

               DO 10 j = 1, i

                  work( indaa+j-1+( i-1 )*descaa( lld_ ) )

     $               = work( indaa+i-1+( j-1 )*descaa( lld_ ) )

   10          CONTINUE

   20       CONTINUE

         END IF

*

         CALL pstrsm( 'Left', 'Lower', 'No Conj', 'Non-unit', kb, kb,

     $                one, b, k+ib-1, k+jb-1, descb, work( indaa ), 1,

     $                1, descaa )

*

         CALL pstrsm( 'Right', 'Lower', 'Conj', 'Non-unit', kb, kb, one,

     $                b, k+ib-1, k+jb-1, descb, work( indaa ), 1, 1,

     $                descaa )

*

         CALL pslacpy( 'L', kb, kb, work( indaa ), 1, 1, descaa, a,

     $                 k+ia-1, k+ja-1, desca )

*

         CALL pstrsm( 'Right', 'Lower', 'Conj', 'Non-unit', npk, kb,

     $                one, b, k+ib-1, k+jb-1, descb, a, postk+ia-1,

     $                k+ja-1, desca )

*

         descr( csrc_ ) = mod( descr( csrc_ )+1, npcol )

         descg( csrc_ ) = mod( descg( csrc_ )+1, npcol )

         descrt( rsrc_ ) = mod( descrt( rsrc_ )+1, nprow )

         descaa( rsrc_ ) = mod( descaa( rsrc_ )+1, nprow )

         descaa( csrc_ ) = mod( descaa( csrc_ )+1, npcol )

   30 CONTINUE

*

      work( 1 ) = real( lwopt )

*

      RETURN

      END