ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pdzsum1.f
Go to the documentation of this file.
00001       SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX )
00002 *
00003 *  -- ScaLAPACK auxiliary routine (version 1.7) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     May 1, 1997
00007 *
00008 *     .. Scalar Arguments ..
00009       INTEGER        IX, INCX, JX, N
00010       DOUBLE PRECISION   ASUM
00011 *     ..
00012 *     .. Array Arguments ..
00013       INTEGER       DESCX( * )
00014       COMPLEX*16         X( * )
00015 *     ..
00016 *
00017 *  Purpose
00018 *  =======
00019 *
00020 *  PDZSUM1 returns the sum of absolute values of a complex
00021 *  distributed vector sub( X ) in ASUM,
00022 *
00023 *  where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1,
00024 *                         X(IX:IX,JX:JX+N-1), if INCX = M_X.
00025 *
00026 *  Based on PDZASUM from the Level 1 PBLAS. The change is
00027 *  to use the 'genuine' absolute value.
00028 *
00029 *  The serial version of this routine was originally contributed by
00030 *  Nick Higham for use with ZLACON.
00031 *
00032 *  Notes
00033 *  =====
00034 *
00035 *  Each global data object is described by an associated description
00036 *  vector.  This vector stores the information required to establish
00037 *  the mapping between an object element and its corresponding process
00038 *  and memory location.
00039 *
00040 *  Let A be a generic term for any 2D block cyclicly distributed array.
00041 *  Such a global array has an associated description vector DESCA.
00042 *  In the following comments, the character _ should be read as
00043 *  "of the global array".
00044 *
00045 *  NOTATION        STORED IN      EXPLANATION
00046 *  --------------- -------------- --------------------------------------
00047 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00048 *                                 DTYPE_A = 1.
00049 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00050 *                                 the BLACS process grid A is distribu-
00051 *                                 ted over. The context itself is glo-
00052 *                                 bal, but the handle (the integer
00053 *                                 value) may vary.
00054 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00055 *                                 array A.
00056 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00057 *                                 array A.
00058 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00059 *                                 the rows of the array.
00060 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00061 *                                 the columns of the array.
00062 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00063 *                                 row of the array A is distributed.
00064 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00065 *                                 first column of the array A is
00066 *                                 distributed.
00067 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00068 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00069 *
00070 *  Let K be the number of rows or columns of a distributed matrix,
00071 *  and assume that its process grid has dimension p x q.
00072 *  LOCr( K ) denotes the number of elements of K that a process
00073 *  would receive if K were distributed over the p processes of its
00074 *  process column.
00075 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00076 *  process would receive if K were distributed over the q processes of
00077 *  its process row.
00078 *  The values of LOCr() and LOCc() may be determined via a call to the
00079 *  ScaLAPACK tool function, NUMROC:
00080 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00081 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00082 *  An upper bound for these quantities may be computed by:
00083 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00084 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00085 *
00086 *  Because vectors may be viewed as a subclass of matrices, a
00087 *  distributed vector is considered to be a distributed matrix.
00088 *
00089 *  When the result of a vector-oriented PBLAS call is a scalar, it will
00090 *  be made available only within the scope which owns the vector(s)
00091 *  being operated on.  Let X be a generic term for the input vector(s).
00092 *  Then, the processes which receive the answer will be (note that if
00093 *  an operation involves more than one vector, the processes which re-
00094 *  ceive the result will be the union of the following calculation for
00095 *  each vector):
00096 *
00097 *  If N = 1, M_X = 1 and INCX = 1, then one can't determine if a process
00098 *  row or process column owns the vector operand, therefore only the
00099 *  process of coordinate {RSRC_X, CSRC_X} receives the result;
00100 *
00101 *  If INCX = M_X, then sub( X ) is a vector distributed over a process
00102 *  row. Each process part of this row receives the result;
00103 *
00104 *  If INCX = 1, then sub( X ) is a vector distributed over a process
00105 *  column. Each process part of this column receives the result;
00106 *
00107 *  Parameters
00108 *  ==========
00109 *
00110 *  N       (global input) pointer to INTEGER
00111 *          The number of components of the distributed vector sub( X ).
00112 *          N >= 0.
00113 *
00114 *  ASUM    (local output) pointer to DOUBLE PRECISION
00115 *          The sum of absolute values of the distributed vector sub( X )
00116 *          only in its scope.
00117 *
00118 *  X       (local input) COMPLEX*16 array containing the local
00119 *          pieces of a distributed matrix of dimension of at least
00120 *              ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
00121 *          This array contains the entries of the distributed vector
00122 *          sub( X ).
00123 *
00124 *  IX      (global input) pointer to INTEGER
00125 *          The global row index of the submatrix of the distributed
00126 *          matrix X to operate on.
00127 *
00128 *  JX      (global input) pointer to INTEGER
00129 *          The global column index of the submatrix of the distributed
00130 *          matrix X to operate on.
00131 *
00132 *  DESCX   (global and local input) INTEGER array of dimension 8.
00133 *          The array descriptor of the distributed matrix X.
00134 *
00135 *  INCX    (global input) pointer to INTEGER
00136 *          The global increment for the elements of X. Only two values
00137 *          of INCX are supported in this version, namely 1 and M_X.
00138 *
00139 *  =====================================================================
00140 *
00141 *     .. Parameters ..
00142       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00143      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00144       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00145      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00146      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00147       DOUBLE PRECISION   ZERO
00148       PARAMETER          ( ZERO = 0.0D+0 )
00149 *     ..
00150 *     .. Local Scalars ..
00151       CHARACTER          CCTOP, RCTOP
00152       INTEGER            ICOFF, ICTXT, IIX, IROFF, IXCOL, IXROW, JJX,
00153      $                   LDX, MYCOL, MYROW, NP, NPCOL, NPROW, NQ
00154 *     ..
00155 *     .. External Subroutines ..
00156       EXTERNAL           BLACS_GRIDINFO, DGSUM2D, INFOG2L, PB_TOPGET
00157 *     ..
00158 *     .. External Functions ..
00159       INTEGER            NUMROC
00160       DOUBLE PRECISION   DZSUM1
00161       EXTERNAL           DZSUM1, NUMROC
00162 *     ..
00163 *     .. Intrinsic Functions ..
00164       INTRINSIC          ABS, MOD
00165 *     ..
00166 *     .. Executable Statements ..
00167 *
00168       ICTXT = DESCX( CTXT_ )
00169       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00170 *
00171 *     Quick return if possible
00172 *
00173       ASUM = ZERO
00174       IF( N.LE.0 )
00175      $   RETURN
00176 *
00177       LDX = DESCX( LLD_ )
00178       CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX,
00179      $              IXROW, IXCOL )
00180 *
00181       IF( INCX.EQ.1 .AND. DESCX( M_ ).EQ.1 .AND. N.EQ.1 ) THEN
00182          IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.IXCOL ) THEN
00183             ASUM = ABS( X( IIX+(JJX-1)*LDX ) )
00184          END IF
00185          RETURN
00186       END IF
00187 *
00188       IF( INCX.EQ.DESCX( M_ ) ) THEN
00189 *
00190 *        X is distributed over a process row
00191 *
00192          IF( MYROW.EQ.IXROW ) THEN
00193             CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', RCTOP )
00194             ICOFF = MOD( JX-1, DESCX( NB_ ) )
00195             NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL )
00196             IF( MYCOL.EQ.IXCOL )
00197      $         NQ = NQ-ICOFF
00198             ASUM = DZSUM1( NQ, X( IIX+(JJX-1)*LDX ), LDX )
00199             CALL DGSUM2D( ICTXT, 'Rowwise', RCTOP, 1, 1, ASUM, 1,
00200      $                    -1, MYCOL )
00201          END IF
00202 *
00203       ELSE
00204 *
00205 *        X is distributed over a process column
00206 *
00207          IF( MYCOL.EQ.IXCOL ) THEN
00208             CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', CCTOP )
00209             IROFF = MOD( IX-1, DESCX( MB_ ) )
00210             NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW )
00211             IF( MYROW.EQ.IXROW )
00212      $         NP = NP-IROFF
00213             ASUM = DZSUM1( NP, X( IIX+(JJX-1)*LDX ), 1 )
00214             CALL DGSUM2D( ICTXT, 'Columnwise', CCTOP, 1, 1, ASUM, 1,
00215      $                    -1, MYCOL )
00216          END IF
00217 *
00218       END IF
00219 *
00220       RETURN
00221 *
00222 *     End of PDZSUM1
00223 *
00224       END