de/d10/pslaevswp_8f_source.html

*

*

      SUBROUTINE pslaevswp( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY,

     $                      WORK, LWORK )

*

*  -- ScaLAPACK routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     April 15, 1997

*

*     .. Scalar Arguments ..

      INTEGER            IZ, JZ, LDZI, LWORK, N

*     ..

*     .. Array Arguments ..

      INTEGER            DESCZ( * ), KEY( * ), NVS( * )

      REAL               WORK( * ), Z( * ), ZIN( LDZI, * )

*     ..

*

*  Purpose

*  =======

*

*  PSLAEVSWP moves the eigenvectors (potentially unsorted) from

*  where they are computed, to a ScaLAPACK standard block cyclic

*  array, sorted so that the corresponding eigenvalues are sorted.

*

*  Notes

*  =====

*

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*

*  Arguments

*  =========

*

*     NP = the number of rows local to a given process.

*     NQ = the number of columns local to a given process.

*

*  N       (global input) INTEGER

*          The order of the matrix A.  N >= 0.

*

*  ZIN     (local input) REAL array,

*          dimension ( LDZI, NVS(iam) )

*          The eigenvectors on input.  Each eigenvector resides entirely

*          in one process.  Each process holds a contiguous set of

*          NVS(iam) eigenvectors.  The first eigenvector which the

*          process holds is:  sum for i=[0,iam-1) of NVS(i)

*

*  LDZI    (locl input) INTEGER

*          leading dimension of the ZIN array

*

*  Z       (local output) REAL array

*          global dimension (N, N), local dimension (DESCZ(DLEN_), NQ)

*          The eigenvectors on output.  The eigenvectors are distributed

*          in a block cyclic manner in both dimensions, with a

*          block size of NB.

*

*  IZ      (global input) INTEGER

*          Z's global row index, which points to the beginning of the

*          submatrix which is to be operated on.

*

*  JZ      (global input) INTEGER

*          Z's global column index, which points to the beginning of

*          the submatrix which is to be operated on.

*

*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Z.

*

*  NVS     (global input) INTEGER array, dimension( nprocs+1 )

*          nvs(i) = number of processes

*          number of eigenvectors held by processes [0,i-1)

*          nvs(1) = number of eigen vectors held by [0,1-1) == 0

*          nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) ==

*            total number of eigenvectors

*

*  KEY     (global input) INTEGER array, dimension( N )

*          Indicates the actual index (after sorting) for each of the

*          eigenvectors.

*

*  WORK    (local workspace) REAL array, dimension (LWORK)

*

*  LWORK   (local input) INTEGER dimension of WORK

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,

     $                   mb_, nb_, rsrc_, csrc_, lld_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                   ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                   rsrc_ = 7, csrc_ = 8, lld_ = 9 )

*     ..

*     .. Local Scalars ..

      INTEGER            CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J,

     $                   maxi, maxii, mini, minii, mycol, myrow, nb,

     $                   nbufsize, npcol, nprocs, nprow, pcol, recvcol,

     $                   recvfrom, recvrow, sendcol, sendrow, sendto

*     ..

*     .. External Functions ..

      INTEGER            INDXG2L, INDXG2P

      EXTERNAL           indxg2l, indxg2p

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, sgerv2d, sgesd2d

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, mod

*     ..

*     .. Executable Statements ..

*       This is just to keep ftnchek happy

      IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*

     $    rsrc_.LT.0 )RETURN

      CALL blacs_gridinfo( descz( ctxt_ ), nprow, npcol, myrow, mycol )

      iam = myrow + mycol*nprow

      iam = myrow*npcol + mycol

*

      nb = descz( mb_ )

*

      nprocs = nprow*npcol

*

*     If PxSTEIN operates on a sub-matrix of a global matrix, the

*     key [] that contains the indicies of the eigenvectors is refe-

*     renced to the dimensions of the sub-matrix and not the global

*     distrubited matrix. Because of this, PxLAEVSWP will incorrectly

*     map the eigenvectors to the global eigenvector matrix, Z, unless

*     the key[] elements are shifted as below.

*

      DO 10 j = descz( n_ ), 1, -1

         key( j ) = key( j-jz+1 ) + ( jz-1 )

   10 CONTINUE

*

      DO 110 dist = 0, nprocs - 1

*

         sendto = mod( iam+dist, nprocs )

         recvfrom = mod( nprocs+iam-dist, nprocs )

*

         sendrow = mod( sendto, nprow )

         sendcol = sendto / nprow

         recvrow = mod( recvfrom, nprow )

         recvcol = recvfrom / nprow

*

         sendrow = sendto / npcol

         sendcol = mod( sendto, npcol )

         recvrow = recvfrom / npcol

         recvcol = mod( recvfrom, npcol )

*

*        Figure out what I have that process "sendto" wants

*

         nbufsize = 0

*

*        We are looping through the eigenvectors that I presently own.

*

         DO 40 j = nvs( 1+iam ) + jz, nvs( 1+iam+1 ) + jz - 1

            pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),

     $             npcol )

            IF( sendcol.EQ.pcol ) THEN

               minii = mod( sendrow+descz( rsrc_ ), nprow )*

     $                 descz( mb_ ) + 1

               maxii = descz( m_ )

               incii = descz( mb_ )*nprow

               DO 30 ii = minii, maxii, incii

                  mini = max( ii, iz )

                  maxi = min( ii+descz( mb_ )-1, n+iz-1 )

                  DO 20 i = mini, maxi, 1

                     nbufsize = nbufsize + 1

                     work( nbufsize ) = zin( i+1-iz,

     $                                  j-nvs( 1+iam )+1-jz )

   20             CONTINUE

   30          CONTINUE

            END IF

   40    CONTINUE

*

*

         IF( myrow.NE.sendrow .OR. mycol.NE.sendcol )

     $      CALL sgesd2d( descz( ctxt_ ), nbufsize, 1, work, nbufsize,

     $                    sendrow, sendcol )

*

*

*        Figure out what process "recvfrom" has that I want

*

         nbufsize = 0

         DO 70 j = nvs( 1+recvfrom ) + jz,

     $           nvs( 1+recvfrom+1 ) + jz - 1, 1

            pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),

     $             npcol )

            IF( mycol.EQ.pcol ) THEN

               minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +

     $                 1

               maxii = descz( m_ )

               incii = descz( mb_ )*nprow

               DO 60 ii = minii, maxii, incii

                  mini = max( ii, iz )

                  maxi = min( ii+nb-1, n+iz-1 )

                  DO 50 i = mini, maxi, 1

                     nbufsize = nbufsize + 1

   50             CONTINUE

   60          CONTINUE

            END IF

   70    CONTINUE

*

*

*

         IF( myrow.NE.recvrow .OR. mycol.NE.recvcol )

     $      CALL sgerv2d( descz( ctxt_ ), 1, nbufsize, work, 1, recvrow,

     $                    recvcol )

*

         nbufsize = 0

         DO 100 j = nvs( 1+recvfrom ) + jz,

     $           nvs( 1+recvfrom+1 ) + jz - 1, 1

            pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),

     $             npcol )

            IF( mycol.EQ.pcol ) THEN

               cyclic_j = indxg2l( key( j ), descz( mb_ ), -1, -1,

     $                    npcol )

               cyclic_i = 1

               minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +

     $                 1

               maxii = descz( m_ )

               incii = descz( mb_ )*nprow

               DO 90 ii = minii, maxii, incii

                  mini = max( ii, iz )

                  cyclic_i = indxg2l( mini, descz( mb_ ), -1, -1,

     $                       nprow )

                  maxi = min( ii+nb-1, n+iz-1 )

                  DO 80 i = mini, maxi, 1

                     nbufsize = nbufsize + 1

                     z( cyclic_i+( cyclic_j-1 )*descz( lld_ ) )

     $                  = work( nbufsize )

                     cyclic_i = cyclic_i + 1

   80             CONTINUE

   90          CONTINUE

            END IF

  100    CONTINUE

*

  110 CONTINUE

      RETURN

*

*     End of PSLAEVSWP

*

      END