/* ---------------------------------------------------------------------
*
*  -- PBLAS routine (version 1.0) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     March 17, 1995
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "pblas.h"

void pdswap_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy )
/*
*  .. Scalar Arguments ..
*/
   int         * incx, * incy, * ix, * iy, * jx, * jy, * n;
/* ..
*  .. Array Arguments ..
*/
   int         desc_X[], desc_Y[];
   double      X[], Y[];
{
/*
*  Purpose
*  =======
*
*  PDSWAP swaps two distributed vectors,
*
*     sub( Y ) := sub( X ) and sub( X ) := sub( Y )
*
*  where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
*                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
*
*        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
*                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
*
*  Notes
*  =====
*
*  A description vector is associated with each 2D block-cyclicly dis-
*  tributed matrix.  This vector stores the information required to
*  establish the mapping between a matrix entry and its corresponding
*  process and memory location.
*
*  In the following comments, the character _ should be read as
*  "of the distributed matrix".  Let A be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESC_A:
*
*  NOTATION        STORED IN     EXPLANATION
*  --------------- ------------- ---------------------------------------
*  M_A    (global) desc_A[M_]    The number of rows in the distributed
*                                matrix.
*  N_A    (global) desc_A[N_]    The number of columns in the distribu-
*                                ted matrix.
*  MB_A   (global) desc_A[MB_]   The blocking factor used to distribute
*                                the rows of the matrix.
*  NB_A   (global) desc_A[NB_]   The blocking factor used to distribute
*                                the columns of the matrix.
*  RSRC_A (global) desc_A[RSRC_] The process row over which the first
*                                row of the matrix is distributed.
*  CSRC_A (global) desc_A[CSRC_] The process column over which the first
*                                column of the matrix is distributed.
*  CTXT_A (global) desc_A[CTXT_] The BLACS context handle, indicating
*                                the global context of the operation on
*                                the matrix.
*                                The context itself is global.
*  LLD_A  (local)  desc_A[LLD_]  The leading dimension of the local
*                                array storing the local blocks of the
*                                distributed matrix A.
*                                LLD_A >= MAX(1,LOCp(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCp( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCq( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCp() and LOCq() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC.
*          LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*
*  Because vectors may be seen as particular matrices, a distributed
*  vector is considered to be a distributed matrix.
*
*  If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
*  process column having the first entries of sub( Y ) must also contain
*  the first entries of sub( X ). Moreover, the quantity
*  MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
*
*  If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
*  Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
*  MOD( IY-1, MB_Y ).
*
*  If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
*  Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
*  MOD( JY-1, NB_Y ).
*
*  If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
*  equal to MB_Y, and the process row having the first entries of
*  sub( Y ) must also contain the first entries of sub( X ). Moreover,
*  the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
*
*  Parameters
*  ==========
*
*  N       (global input) pointer to INTEGER
*          The length of the distributed vectors to be swapped. N >= 0.
*
*  X       (local input/local output) DOUBLE PRECISION array
*          containing the local pieces of a distributed matrix of
*          dimension of at least
*              ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
*          This array contains the entries of the distributed vector
*          sub( X ). On exit sub( X ) is overwritten by sub( Y ).
*
*  IX      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix X.
*
*  INCX    (global input) pointer to INTEGER
*          The global increment for the elements of X. Only two values
*          of INCX are supported in this version, namely 1 and M_X.
*
*  Y       (local input/local output) DOUBLE PRECISION array
*          containing the local pieces of a distributed matrix of
*          dimension of at least
*              ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
*          This array contains the entries of the distributed vector
*          sub( Y ). On exit sub( Y ) is overwritten by sub( X ).
*
*  IY      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  JY      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  DESCY   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix Y.
*
*  INCY    (global input) pointer to INTEGER
*          The global increment for the elements of Y. Only two values
*          of INCY are supported in this version, namely 1 and M_Y.
*
*  =====================================================================
*
*  .. Local Scalars ..
*/
   int         ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
               jjy, lcm, lcmp, lcmq, mycol, myrow, nn, np, np0, nprow,
               npcol, nq0, nq, nz, nzx, nzy, one=1, tmp1, wksz;
   double      zero;
/* ..
*  .. PBLAS Buffer ..
*/
   double      * buff;
/* ..
*  .. External Functions ..
*/
   void        blacs_gridinfo_();
   void        dgesd2d_();
   void        dgerv2d_();
   void        pbchkvect();
   void        pberror_();
   char        * getpbbuf();
   F_VOID_FCT  dcopy_();
   F_VOID_FCT  dswap_();
   F_VOID_FCT  pbdtrnv_();
   F_INTG_FCT  ilcm_();
   F_INTG_FCT  numroc_();
/* ..
*  .. Executable Statements ..
*
*  Get grid parameters
*/
   ictxt = desc_X[CTXT_];
   blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
/*
*  Test the input parameters
*/
   info = 0;
   if( nprow == -1 )
      info = -507;
   else
   {
      pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx,
                 &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
      pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy,
                 &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );

      if( info == 0 )
      {
         if( *n != 1 )
         {
            if( *incx == desc_X[M_] )
            {                 /* X is distributed along a process row */
               if( *incy == desc_Y[M_] )
               {               /* Y is distributed over a process row */
                  if( ( ixcol != iycol ) ||
                      ( ( (*jx-1) % desc_X[NB_] ) !=
                        ( (*jy-1) % desc_Y[NB_] ) ) )
                     info = -9;
                  else if( desc_Y[NB_] != desc_X[NB_] )
                     info = -1004;
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
                     info = -8;
                  else if( desc_Y[MB_] != desc_X[NB_] )
                     info = -1003;
               }
               else
               {
                  info = -11;
               }
            }
            else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
            {              /* X is distributed along a process column */
               if( *incy == desc_Y[M_] )
               {               /* Y is distributed over a process row */
                  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
                     info = -9;
                  else if( desc_Y[NB_] != desc_X[MB_] )
                     info = -1004;
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( ixrow != iyrow ) ||
                      ( ( (*ix-1) % desc_X[MB_] ) !=
                        ( (*iy-1) % desc_Y[MB_] ) ) )
                     info = -8;
                  else if( desc_Y[MB_] != desc_X[MB_] )
                     info = -1003;
               }
               else
               {
                  info = -11;
               }
            }
            else
            {
               info = -6;
            }
         }
         if( ictxt != desc_Y[CTXT_] )
            info = -1007;
      }
   }
   if( info )
   {
      pberror_( &ictxt, "PDSWAP", &info );
      return;
   }
/*
*  Quick return if possible.
*/
   if( *n == 0 )
      return;
/*
*  Swap the vectors
*/
   if( *n == 1 )
   {
      if( ( myrow == ixrow ) && ( mycol==ixcol ) )
      {
         buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
         if( (myrow != iyrow) || (mycol != iycol) )
         {
            dgesd2d_( &ictxt, &one, &one, buff, &one, &iyrow, &iycol);
            dgerv2d_( &ictxt, &one, &one, buff, &one, &iyrow, &iycol);
         }
         else
            dswap_( n, buff, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n );
      }
      else if( ( myrow == iyrow ) && ( mycol==iycol ) )
      {
         buff = &Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
         dgesd2d_( &ictxt, &one, &one, buff, &one, &ixrow, &ixcol );
         dgerv2d_( &ictxt, &one, &one, buff, &one, &ixrow, &ixcol );
      }
      return;
   }

   zero = ZERO;
   if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
   {               /* X and Y are both distributed over a process row */
      nz = (*jx-1) % desc_Y[NB_];
      nn = *n + nz;
      nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
      if( mycol == ixcol )
         nq -= nz;
      if( ixrow == iyrow )
      {
         if( myrow == ixrow )
            dswap_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                    &desc_X[LLD_], &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
                    &desc_Y[LLD_] );
      }
      else
      {
         if( myrow == ixrow )
         {
            dgesd2d_( &ictxt, &one, &nq,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &iyrow, &mycol );
            dgerv2d_( &ictxt, &one, &nq,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &iyrow, &mycol );
         }
         else if( myrow == iyrow )
         {
            dgesd2d_( &ictxt, &one, &nq,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &ixrow, &mycol );
            dgerv2d_( &ictxt, &one, &nq,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &ixrow, &mycol );
         }
      }
   }
   else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
            ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
   {            /* X and Y are both distributed over a process column */
      nz = (*ix-1) % desc_X[MB_];
      nn = *n + nz;
      np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
      if( myrow == ixrow )
         np -= nz;
      if( ixcol == iycol )
      {
         if( mycol == ixcol )
            dswap_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                    &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
      }
      else
      {
         if( mycol == ixcol )
         {
            dgesd2d_( &ictxt, &np, &one,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &myrow, &iycol );
            dgerv2d_( &desc_X[CTXT_], &np, &one,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &myrow, &iycol );
         }
         else if( mycol == iycol )
         {
            dgesd2d_( &ictxt, &np, &one,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &myrow, &ixcol );
            dgerv2d_( &ictxt, &np, &one,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &myrow, &ixcol );
         }
      }
   }
   else       /* X and Y are not distributed along the same direction */
   {
      lcm = ilcm_( &nprow, &npcol );
      if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
      {                     /* X is distributed over a process column */
         lcmp = lcm / nprow;
         lcmq = lcm / npcol;
         nzx = (*ix-1) % desc_X[MB_];
         nn = *n + nzx;
         tmp1 = nn / desc_X[MB_];
         np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
         tmp1 = np0 / desc_X[MB_];
         wksz = np + MYROC0( tmp1, np0, desc_X[MB_], lcmp );
         nzy = (*jy-1) % desc_Y[NB_];
         nn = *n + nzy; tmp1 = nn / desc_Y[NB_];
         nq0 = MYROC0( tmp1, nn, desc_Y[NB_], npcol );
         tmp1 = nq0 / desc_Y[NB_];
         wksz += MYROC0( tmp1, nq0, desc_Y[NB_], lcmq );

         buff = (double *)getpbbuf( "PDSWAP", wksz*sizeof(double) );

         if( myrow == ixrow )
             iix -= nzx;
         if( mycol == iycol )
            iiy -= nzy;
         if( mycol == ixcol && np > 0 )
            dcopy_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                    buff, incx );

         pbdtrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_Y[NB_], &nzy, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
                   &desc_Y[LLD_], &zero, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                   incx, &iyrow, &iycol, &ixrow, &ixcol, buff+np );
         pbdtrnv_( &ictxt, C2F_CHAR( "C" ), C2F_CHAR( "T" ), n,
                   &desc_X[MB_], &nzx, buff, incx, &zero,
                   &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                   &ixrow, &ixcol, &iyrow, &iycol, buff+np );
      }
      else                  /* Y is distributed over a process column */
      {
         lcmp = lcm / nprow;
         lcmq = lcm / npcol;
         nzx = (*jx-1) % desc_X[NB_];
         nn = *n + nzx;
         tmp1 = nn / desc_X[NB_];
         nq0 = MYROC0( tmp1, nn, desc_X[NB_], npcol );
         tmp1 = nq0 / desc_X[NB_];
         wksz = MYROC0( tmp1, nq0, desc_X[NB_], lcmq );
         nzy = (*iy-1) % desc_Y[MB_];
         nn = *n + nzy;
         tmp1 = nn / desc_Y[MB_];
         np = numroc_( &nn, &desc_Y[MB_], &myrow, &iyrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
         tmp1 = np0 / desc_Y[MB_];
         wksz += (np + MYROC0( tmp1, np0, desc_Y[MB_], lcmp ));

         buff = (double *)getpbbuf( "PDSWAP", wksz*sizeof(double) );

         if( myrow == iyrow )
            iiy -= nzy;
         if( mycol == ixcol )
            iix -= nzx;
         if( mycol == iycol && np > 0 )
            dcopy_( &np, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy,
                    buff, incy );

         pbdtrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_X[NB_], &nzx, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                   &desc_X[LLD_], &zero, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
                   incy, &ixrow, &ixcol, &iyrow, &iycol, buff+np );
         pbdtrnv_( &ictxt, C2F_CHAR( "C" ), C2F_CHAR( "T" ), n,
                   &desc_Y[MB_], &nzy, buff, incy, &zero,
                   &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                   &iyrow, &iycol, &ixrow, &ixcol, buff+np );
      }
   }
}
