#include "Bdef.h"


#if (INTFACE == C_CALL)
void Cdgamx2d(int ConTxt, char *scope, char *top, int m, int n, double *A,
              int lda, int *rA, int *cA, int ldia, int rdest, int cdest)
#else
F_VOID_FUNC dgamx2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
                     double *A, int *lda, int *rA, int *cA, int *ldia,
                     int *rdest, int *cdest)
#endif
/*
 *  -- V1.1ALPHA (test version) BLACS routine --
 *  University of Tennessee, October 1, 1995
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Combine amx operation for double precision rectangular matrices.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (output) Ptr to double precision two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *  RA      (output) Integer Array, dimension (LDIA, N)
 *          Contains process row that the amx of each element
 *          of A was found on: i.e., rA(1,2) contains the process
 *          row that the amx of A(1,2) was found on.
 *          Values are left on process {rdest, cdest} only, others
 *          may be modified, but not left with interesting data.
 *          If rdest == -1, then result is left on all processes in scope.
 *          If LDIA == -1, this array is not accessed, and need not exist.
 *
 *  CA      (output) Integer Array, dimension (LDIA, N)
 *          Contains process column that the amx of each element
 *          of A was found on: i.e., cA(1,2) contains the process
 *          column that the max/min of A(1,2) was found on.
 *          Values are left on process {rdest, cdest} only, others
 *          may be modified, but not left with interesting data.
 *          If rdest == -1, then result is left on all processes in scope.
 *          If LDIA == -1, this array is not accessed, and need not exist.
 *
 *  LDIA    (input) Ptr to int
 *          If (LDIA == -1), then the arrays RA and CA are not accessed.
 *          ELSE leading dimension of the arrays RA and CA.  LDIA >= M.
 *
 *  RDEST   (input) Ptr to int
 *          The process row of the destination of the amx.
 *          If rdest == -1, then result is left on all processes in scope.
 *
 *  CDEST   (input) Ptr to int
 *          The process column of the destination of the amx.
 *          If rdest == -1, then CDEST ignored.
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   BI_MPI_Datatype, int *);
   BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
   BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   BI_MPI_Datatype, int *);
   void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
   void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                     int, int);
   void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                    int, int);
   void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
   void BI_dvvamx(int, char *, char *);
   void BI_dvvamx2(int, char *, char *);
   void BI_dMPI_amx(void *, void *, int *, BI_MPI_Datatype *);
   void BI_dMPI_amx2(void *, void *, int *, BI_MPI_Datatype *);
/*
 *  Variable Declarations
 */
   VVFUNPTR vvop;
   BLACBUFF *bp, *bp2;
   BLACSCONTEXT *ctxt;
   char ttop, tscope;
   int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
   int len[2];
   BI_MPI_Aint disp[2];
   BI_MPI_Datatype dtypes[2];
   BI_MPI_Op BlacComb;
   BI_MPI_Datatype MyType;
   BI_DistType *dist, mydist;
   extern BLACBUFF *BI_ActiveQ;
   extern BLACBUFF BI_AuxBuff;

   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
/*
 *  If the user has set the default combine topology, use it instead of
 *  BLACS default
 */
#ifdef DefCombTop
   if (ttop == ' ') ttop = DefCombTop;
#endif
   if (Mpval(cdest) == -1) trdest = -1;
   else trdest = Mpval(rdest);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
               Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
   if (Mpval(ldia) < Mpval(m))
   {
      if (Mpval(ldia) != -1)
         BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
                      "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
                      Mpval(m));
   }
#endif
   if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
   else tlda = Mpval(m);
   if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
   else tldia = Mpval(ldia);
   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      if (trdest == -1) dest = -1;
      else dest = Mpval(cdest);
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      dest = trdest;
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      if (trdest == -1) dest = -1;
      else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }


/*
 * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
 * topology if we've got one
 */
   if (ttop == ' ') if ( (Mpval(m) < 1) || (Mpval(n) < 1) ) ttop = '1';
   N = Mpval(m) * Mpval(n);
/*
 * If process who has amx is to be communicated, must set up distance
 * vector after value vector
 */
   if (Mpval(ldia) != -1)
   {
      vvop = BI_dvvamx;
      length = N * sizeof(double);
      i = length % sizeof(BI_DistType);  /* ensure dist vec aligned correctly */
      if (i) length += sizeof(BI_DistType) - i;
      idist = length;
      length += N * sizeof(BI_DistType);
/*
 *    For performance, insist second buffer is at least 8-byte aligned
 */
      j = 8;
      if (sizeof(double) > j) j = sizeof(double);
      i = length % j;
      if (i) length += j - i;
      i = 2 * length;

      bp = BI_GetBuff(i);
      bp2 = &BI_AuxBuff;
      bp2->Buff = &bp->Buff[length];
      BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
/*
 *    Fill in distance vector
 */
      if (dest == -1) mydist = ctxt->scp->Iam;
      else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
      dist = (BI_DistType *) &bp->Buff[idist];
      for (i=0; i < N; i++) dist[i] = mydist;

/*
 *    Create the MPI datatype holding both user's buffer and distance vector
 */
      len[0] = len[1] = N;
      disp[0] = 0;
      disp[1] = idist;
      dtypes[0] = BI_MPI_DOUBLE;
      dtypes[1] = BI_MpiDistType;
#ifdef ZeroByteTypeBug
      if (N > 0)
      {
#endif
      i = 2;
      BI_MPI_Type_struct(i, len, disp, dtypes, &MyType, ierr);
      BI_MPI_Type_commit(&MyType, ierr);
      bp->N = bp2->N = 1;
      bp->dtype = bp2->dtype = MyType;
#ifdef ZeroByteTypeBug
      }
      else
      {
         bp->N = bp2->N = 0;
         bp->dtype = bp2->dtype = BI_MPI_INT;
      }
#endif
   }
   else
   {
      vvop = BI_dvvamx2;
      length = N * sizeof(double);
/*
 *    If A is contiguous, we can use it as one of our buffers
 */
      if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
      {
         bp = &BI_AuxBuff;
         bp->Buff = (char *) A;
         bp2 = BI_GetBuff(length);
      }
      else
      {
         bp = BI_GetBuff(length*2);
         bp2 = &BI_AuxBuff;
         bp2->Buff = &bp->Buff[length];
         BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
      }
      bp->N = bp2->N = N;
      bp->dtype = bp2->dtype = BI_MPI_DOUBLE;
   }

   switch(ttop)
   {
   case ' ':         /* use MPI's reduction by default */
      i = 1;
      if (Mpval(ldia) == -1)
      {
         BI_MPI_Op_create(BI_dMPI_amx2, i, &BlacComb, ierr);
      }
      else
      {
         BI_MPI_Op_create(BI_dMPI_amx, i, &BlacComb, ierr);
         BI_AuxBuff.Len = N;  /* set this up for the MPI OP wrappers */
      }

      if (trdest != -1)
      {
         BI_MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
	 	       ctxt->scp->comm, ierr);
         if (ctxt->scp->Iam == dest)
	 {
	    BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
	    if (Mpval(ldia) != -1)
               BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                            (BI_DistType *) &bp2->Buff[idist],
			    trdest, Mpval(cdest));
	 }
      }
      else
      {
         BI_MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
		          ctxt->scp->comm, ierr);
	 BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
         if (Mpval(ldia) != -1)
            BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                         (BI_DistType *) &bp2->Buff[idist],
                         trdest, Mpval(cdest));
      }
      BI_MPI_Op_free(&BlacComb, ierr);
      if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
         if (N > 0)
#endif
         BI_MPI_Type_free(&MyType, ierr);
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
      break;
   case 'i':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
      break;
   case 'd':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
      break;
   case 's':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
      break;
   case 'm':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->nrings);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
      break;
   case 'f':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
      break;
   case 't':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->nbranches);
      break;
   case 'h':
/*
 *    Use bidirectional exchange if everyone wants answer
 */
      if (trdest == -1)
         BI_BeComb(ctxt, bp, bp2, N, vvop);
      else
         BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
                  ttop);
   }

   if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
      if (N > 0)
#endif
      BI_MPI_Type_free(&MyType, ierr);
/*
 * If I am selected to receive answer
 */
   if ( (ctxt->scp->Iam == dest) || (dest == -1) )
   {
/*
 *    Translate the distances stored in the latter part of bp->Buff into
 *    process grid coordinates, and output these coordinates in the
 *    arrays rA and cA.
 */
      if (Mpval(ldia) != -1)
         BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                      dist, trdest, Mpval(cdest));
/*
 *    Unpack the amx array
 */
      if (bp != &BI_AuxBuff) BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
   }
}
