ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
cgamn2d_.c
Go to the documentation of this file.
00001 #include "Bdef.h"
00002 
00003 
00004 
00005 #if (INTFACE == C_CALL)
00006 void Ccgamn2d(int ConTxt, char *scope, char *top, int m, int n, float *A,
00007               int lda, int *rA, int *cA, int ldia, int rdest, int cdest)
00008 #else
00009 F_VOID_FUNC cgamn2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
00010                      float *A, int *lda, int *rA, int *cA, int *ldia,
00011                      int *rdest, int *cdest)
00012 #endif
00013 /*
00014  *  -- V1.1 BLACS routine --
00015  *  University of Tennessee, May 1, 1996
00016  *  Written by Clint Whaley.
00017  *
00018  *  Purpose
00019  *  =======
00020  *  Combine amn operation for complex rectangular matrices.
00021  *
00022  *  Arguments
00023  *  =========
00024  *
00025  *  ConTxt  (input) Ptr to int
00026  *          Index into MyConTxts00 (my contexts array).
00027  *
00028  *  SCOPE   (input) Ptr to char
00029  *          Limit the scope of the operation.
00030  *          = 'R' :   Operation is performed by a process row.
00031  *          = 'C' :   Operation is performed by a process column.
00032  *          = 'A' :   Operation is performed by all processes in grid.
00033  *
00034  *  TOP     (input) Ptr to char
00035  *          Controls fashion in which messages flow within the operation.
00036  *
00037  *  M       (input) Ptr to int
00038  *          The number of rows of the matrix A.  M >= 0.
00039  *
00040  *  N       (input) Ptr to int
00041  *          The number of columns of the matrix A.  N >= 0.
00042  *
00043  *  A       (output) Ptr to complex two dimensional array
00044  *          The m by n matrix A.  Fortran77 (column-major) storage
00045  *          assumed.
00046  *
00047  *  LDA     (input) Ptr to int
00048  *          The leading dimension of the array A.  LDA >= M.
00049  *
00050  *  RA      (output) Integer Array, dimension (LDIA, N)
00051  *          Contains process row that the amn of each element
00052  *          of A was found on: i.e., rA(1,2) contains the process
00053  *          row that the amn of A(1,2) was found on.
00054  *          Values are left on process {rdest, cdest} only, others
00055  *          may be modified, but not left with interesting data.
00056  *          If rdest == -1, then result is left on all processes in scope.
00057  *          If LDIA == -1, this array is not accessed, and need not exist.
00058  *
00059  *  CA      (output) Integer Array, dimension (LDIA, N)
00060  *          Contains process column that the amn of each element
00061  *          of A was found on: i.e., cA(1,2) contains the process
00062  *          column that the max/min of A(1,2) was found on.
00063  *          Values are left on process {rdest, cdest} only, others
00064  *          may be modified, but not left with interesting data.
00065  *          If rdest == -1, then result is left on all processes in scope.
00066  *          If LDIA == -1, this array is not accessed, and need not exist.
00067  *
00068  *  LDIA    (input) Ptr to int
00069  *          If (LDIA == -1), then the arrays RA and CA are not accessed.
00070  *          ELSE leading dimension of the arrays RA and CA.  LDIA >= M.
00071  *
00072  *  RDEST   (input) Ptr to int
00073  *          The process row of the destination of the amn.
00074  *          If rdest == -1, then result is left on all processes in scope.
00075  *
00076  *  CDEST   (input) Ptr to int
00077  *          The process column of the destination of the amn.
00078  *          If rdest == -1, then CDEST ignored.
00079  *
00080  * ------------------------------------------------------------------------
00081  */
00082 {
00083    void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
00084                     int *, int *);
00085    void BI_UpdateBuffs(BLACBUFF *);
00086    BLACBUFF *BI_GetBuff(int);
00087    int BI_BuffIsFree(BLACBUFF *, int);
00088    MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
00089                                    MPI_Datatype, int *);
00090    BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
00091    MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
00092                                    MPI_Datatype, int *);
00093    void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
00094    void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
00095                      int, int);
00096    void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
00097                     int, int);
00098    void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
00099    void BI_cvvamn(int, char *, char *);
00100    void BI_cvvamn2(int, char *, char *);
00101    void BI_cMPI_amn(void *, void *, int *, MPI_Datatype *);
00102    void BI_cMPI_amn2(void *, void *, int *, MPI_Datatype *);
00103 /*
00104  *  Variable Declarations
00105  */
00106    VVFUNPTR vvop;
00107    BLACBUFF *bp, *bp2;
00108    BLACSCONTEXT *ctxt;
00109    char ttop, tscope;
00110    int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
00111    int len[2];
00112    MPI_Aint disp[2];
00113    MPI_Datatype dtypes[2];
00114    MPI_Op BlacComb;
00115    MPI_Datatype MyType;
00116    BI_DistType *dist, mydist;
00117    extern BLACBUFF *BI_ActiveQ;
00118    extern BLACBUFF BI_AuxBuff;
00119 
00120    MGetConTxt(Mpval(ConTxt), ctxt);
00121    ttop = F2C_CharTrans(top);
00122    ttop = Mlowcase(ttop);
00123    tscope = F2C_CharTrans(scope);
00124    tscope = Mlowcase(tscope);
00125 /*
00126  *  If the user has set the default combine topology, use it instead of
00127  *  BLACS default
00128  */
00129 #ifdef DefCombTop
00130    if (ttop == ' ') ttop = DefCombTop;
00131 #endif
00132    if (Mpval(cdest) == -1) trdest = -1;
00133    else trdest = Mpval(rdest);
00134 #if (BlacsDebugLvl > 0)
00135    BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
00136                Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
00137    if (Mpval(ldia) < Mpval(m))
00138    {
00139       if (Mpval(ldia) != -1)
00140          BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
00141                       "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
00142                       Mpval(m));
00143    }
00144 #endif
00145    if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
00146    else tlda = Mpval(m);
00147    if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
00148    else tldia = Mpval(ldia);
00149    switch(tscope)
00150    {
00151    case 'r':
00152       ctxt->scp = &ctxt->rscp;
00153       if (trdest == -1) dest = -1;
00154       else dest = Mpval(cdest);
00155       break;
00156    case 'c':
00157       ctxt->scp = &ctxt->cscp;
00158       dest = trdest;
00159       break;
00160    case 'a':
00161       ctxt->scp = &ctxt->ascp;
00162       if (trdest == -1) dest = -1;
00163       else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
00164       break;
00165    default:
00166       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
00167                   tscope);
00168    }
00169 
00170 
00171 /*
00172  * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
00173  * topology if we've got one
00174  */
00175    if (ttop == ' ')
00176       if ( (Mpval(m) < 1) || (Mpval(n) < 1) || (ctxt->TopsRepeat)  ) ttop = '1';
00177    N = Mpval(m) * Mpval(n);
00178 /*
00179  * If process who has amn is to be communicated, must set up distance
00180  * vector after value vector
00181  */
00182    if (Mpval(ldia) != -1)
00183    {
00184       vvop = BI_cvvamn;
00185       length = N * sizeof(SCOMPLEX);
00186       i = length % sizeof(BI_DistType);  /* ensure dist vec aligned correctly */
00187       if (i) length += sizeof(BI_DistType) - i;
00188       idist = length;
00189       length += N * sizeof(BI_DistType);
00190 /*
00191  *    For performance, insist second buffer is at least 8-byte aligned
00192  */
00193       j = 8;
00194       if (sizeof(SCOMPLEX) > j) j = sizeof(SCOMPLEX);
00195       i = length % j;
00196       if (i) length += j - i;
00197       i = 2 * length;
00198 
00199       bp = BI_GetBuff(i);
00200       bp2 = &BI_AuxBuff;
00201       bp2->Buff = &bp->Buff[length];
00202       BI_cmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
00203 /*
00204  *    Fill in distance vector
00205  */
00206       if (dest == -1) mydist = ctxt->scp->Iam;
00207       else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
00208       dist = (BI_DistType *) &bp->Buff[idist];
00209       for (i=0; i < N; i++) dist[i] = mydist;
00210 
00211 /*
00212  *    Create the MPI datatype holding both user's buffer and distance vector
00213  */
00214       len[0] = len[1] = N;
00215       disp[0] = 0;
00216       disp[1] = idist;
00217       dtypes[0] = MPI_COMPLEX;
00218       dtypes[1] = BI_MpiDistType;
00219 #ifdef ZeroByteTypeBug
00220       if (N > 0)
00221       {
00222 #endif
00223       i = 2;
00224       ierr=MPI_Type_struct(i, len, disp, dtypes, &MyType);
00225       ierr=MPI_Type_commit(&MyType);
00226       bp->N = bp2->N = 1;
00227       bp->dtype = bp2->dtype = MyType;
00228 #ifdef ZeroByteTypeBug
00229       }
00230       else
00231       {
00232          bp->N = bp2->N = 0;
00233          bp->dtype = bp2->dtype = MPI_INT;
00234       }
00235 #endif
00236    }
00237    else
00238    {
00239       vvop = BI_cvvamn2;
00240       length = N * sizeof(SCOMPLEX);
00241 /*
00242  *    If A is contiguous, we can use it as one of our buffers
00243  */
00244       if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
00245       {
00246          bp = &BI_AuxBuff;
00247          bp->Buff = (char *) A;
00248          bp2 = BI_GetBuff(length);
00249       }
00250       else
00251       {
00252          bp = BI_GetBuff(length*2);
00253          bp2 = &BI_AuxBuff;
00254          bp2->Buff = &bp->Buff[length];
00255          BI_cmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
00256       }
00257       bp->N = bp2->N = N;
00258       bp->dtype = bp2->dtype = MPI_COMPLEX;
00259    }
00260 
00261    switch(ttop)
00262    {
00263    case ' ':         /* use MPI's reduction by default */
00264       i = 1;
00265       if (Mpval(ldia) == -1)
00266       {
00267          ierr=MPI_Op_create(BI_cMPI_amn2, i, &BlacComb);
00268       }
00269       else
00270       {
00271          ierr=MPI_Op_create(BI_cMPI_amn, i, &BlacComb);
00272          BI_AuxBuff.Len = N;  /* set this up for the MPI OP wrappers */
00273       }
00274 
00275       if (trdest != -1)
00276       {
00277          ierr=MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
00278                        ctxt->scp->comm);
00279          if (ctxt->scp->Iam == dest)
00280          {
00281             BI_cvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
00282             if (Mpval(ldia) != -1)
00283                BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
00284                             (BI_DistType *) &bp2->Buff[idist],
00285                             trdest, Mpval(cdest));
00286          }
00287       }
00288       else
00289       {
00290          ierr=MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
00291                           ctxt->scp->comm);
00292          BI_cvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
00293          if (Mpval(ldia) != -1)
00294             BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
00295                          (BI_DistType *) &bp2->Buff[idist],
00296                          trdest, Mpval(cdest));
00297       }
00298       ierr=MPI_Op_free(&BlacComb);
00299       if (Mpval(ldia) != -1)
00300 #ifdef ZeroByteTypeBug
00301          if (N > 0)
00302 #endif
00303          ierr=BI_MPI_TYPE_FREE(&MyType);
00304       if (BI_ActiveQ) BI_UpdateBuffs(NULL);
00305       return;
00306       break;
00307    case 'i':
00308       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
00309       break;
00310    case 'd':
00311       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
00312       break;
00313    case 's':
00314       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
00315       break;
00316    case 'm':
00317       BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co);
00318       break;
00319    case '1':
00320    case '2':
00321    case '3':
00322    case '4':
00323    case '5':
00324    case '6':
00325    case '7':
00326    case '8':
00327    case '9':
00328       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
00329       break;
00330    case 'f':
00331       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
00332       break;
00333    case 't':
00334       BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co);
00335       break;
00336    case 'h':
00337 /*
00338  *    Use bidirectional exchange if everyone wants answer
00339  */
00340       if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
00341          BI_BeComb(ctxt, bp, bp2, N, vvop);
00342       else
00343          BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
00344       break;
00345    default :
00346       BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
00347                   ttop);
00348    }
00349 
00350    if (Mpval(ldia) != -1)
00351 #ifdef ZeroByteTypeBug
00352       if (N > 0)
00353 #endif
00354       ierr=BI_MPI_TYPE_FREE(&MyType);
00355 /*
00356  * If I am selected to receive answer
00357  */
00358    if ( (ctxt->scp->Iam == dest) || (dest == -1) )
00359    {
00360 /*
00361  *    Translate the distances stored in the latter part of bp->Buff into
00362  *    process grid coordinates, and output these coordinates in the
00363  *    arrays rA and cA.
00364  */
00365       if (Mpval(ldia) != -1)
00366          BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
00367                       dist, trdest, Mpval(cdest));
00368 /*
00369  *    Unpack the amn array
00370  */
00371       if (bp != &BI_AuxBuff) BI_cvmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
00372    }
00373 }