ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdzsum1.f
Go to the documentation of this file.
1  SUBROUTINE pdzsum1( N, ASUM, X, IX, JX, DESCX, INCX )
2 *
3 * -- ScaLAPACK auxiliary routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * .. Scalar Arguments ..
9  INTEGER IX, INCX, JX, N
10  DOUBLE PRECISION ASUM
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCX( * )
14  COMPLEX*16 X( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDZSUM1 returns the sum of absolute values of a complex
21 * distributed vector sub( X ) in ASUM,
22 *
23 * where sub( X ) denotes X(IX:IX+N-1,JX:JX), if INCX = 1,
24 * X(IX:IX,JX:JX+N-1), if INCX = M_X.
25 *
26 * Based on PDZASUM from the Level 1 PBLAS. The change is
27 * to use the 'genuine' absolute value.
28 *
29 * The serial version of this routine was originally contributed by
30 * Nick Higham for use with ZLACON.
31 *
32 * Notes
33 * =====
34 *
35 * Each global data object is described by an associated description
36 * vector. This vector stores the information required to establish
37 * the mapping between an object element and its corresponding process
38 * and memory location.
39 *
40 * Let A be a generic term for any 2D block cyclicly distributed array.
41 * Such a global array has an associated description vector DESCA.
42 * In the following comments, the character _ should be read as
43 * "of the global array".
44 *
45 * NOTATION STORED IN EXPLANATION
46 * --------------- -------------- --------------------------------------
47 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
48 * DTYPE_A = 1.
49 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
50 * the BLACS process grid A is distribu-
51 * ted over. The context itself is glo-
52 * bal, but the handle (the integer
53 * value) may vary.
54 * M_A (global) DESCA( M_ ) The number of rows in the global
55 * array A.
56 * N_A (global) DESCA( N_ ) The number of columns in the global
57 * array A.
58 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
59 * the rows of the array.
60 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
61 * the columns of the array.
62 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
63 * row of the array A is distributed.
64 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
65 * first column of the array A is
66 * distributed.
67 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
68 * array. LLD_A >= MAX(1,LOCr(M_A)).
69 *
70 * Let K be the number of rows or columns of a distributed matrix,
71 * and assume that its process grid has dimension p x q.
72 * LOCr( K ) denotes the number of elements of K that a process
73 * would receive if K were distributed over the p processes of its
74 * process column.
75 * Similarly, LOCc( K ) denotes the number of elements of K that a
76 * process would receive if K were distributed over the q processes of
77 * its process row.
78 * The values of LOCr() and LOCc() may be determined via a call to the
79 * ScaLAPACK tool function, NUMROC:
80 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
81 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
82 * An upper bound for these quantities may be computed by:
83 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
84 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
85 *
86 * Because vectors may be viewed as a subclass of matrices, a
87 * distributed vector is considered to be a distributed matrix.
88 *
89 * When the result of a vector-oriented PBLAS call is a scalar, it will
90 * be made available only within the scope which owns the vector(s)
91 * being operated on. Let X be a generic term for the input vector(s).
92 * Then, the processes which receive the answer will be (note that if
93 * an operation involves more than one vector, the processes which re-
94 * ceive the result will be the union of the following calculation for
95 * each vector):
96 *
97 * If N = 1, M_X = 1 and INCX = 1, then one can't determine if a process
98 * row or process column owns the vector operand, therefore only the
99 * process of coordinate {RSRC_X, CSRC_X} receives the result;
100 *
101 * If INCX = M_X, then sub( X ) is a vector distributed over a process
102 * row. Each process part of this row receives the result;
103 *
104 * If INCX = 1, then sub( X ) is a vector distributed over a process
105 * column. Each process part of this column receives the result;
106 *
107 * Parameters
108 * ==========
109 *
110 * N (global input) pointer to INTEGER
111 * The number of components of the distributed vector sub( X ).
112 * N >= 0.
113 *
114 * ASUM (local output) pointer to DOUBLE PRECISION
115 * The sum of absolute values of the distributed vector sub( X )
116 * only in its scope.
117 *
118 * X (local input) COMPLEX*16 array containing the local
119 * pieces of a distributed matrix of dimension of at least
120 * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
121 * This array contains the entries of the distributed vector
122 * sub( X ).
123 *
124 * IX (global input) pointer to INTEGER
125 * The global row index of the submatrix of the distributed
126 * matrix X to operate on.
127 *
128 * JX (global input) pointer to INTEGER
129 * The global column index of the submatrix of the distributed
130 * matrix X to operate on.
131 *
132 * DESCX (global and local input) INTEGER array of dimension 8.
133 * The array descriptor of the distributed matrix X.
134 *
135 * INCX (global input) pointer to INTEGER
136 * The global increment for the elements of X. Only two values
137 * of INCX are supported in this version, namely 1 and M_X.
138 *
139 * =====================================================================
140 *
141 * .. Parameters ..
142  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
143  $ LLD_, MB_, M_, NB_, N_, RSRC_
144  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
145  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
146  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
147  DOUBLE PRECISION ZERO
148  parameter( zero = 0.0d+0 )
149 * ..
150 * .. Local Scalars ..
151  CHARACTER CCTOP, RCTOP
152  INTEGER ICOFF, ICTXT, IIX, IROFF, IXCOL, IXROW, JJX,
153  $ LDX, MYCOL, MYROW, NP, NPCOL, NPROW, NQ
154 * ..
155 * .. External Subroutines ..
156  EXTERNAL blacs_gridinfo, dgsum2d, infog2l, pb_topget
157 * ..
158 * .. External Functions ..
159  INTEGER NUMROC
160  DOUBLE PRECISION DZSUM1
161  EXTERNAL dzsum1, numroc
162 * ..
163 * .. Intrinsic Functions ..
164  INTRINSIC abs, mod
165 * ..
166 * .. Executable Statements ..
167 *
168  ictxt = descx( ctxt_ )
169  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
170 *
171 * Quick return if possible
172 *
173  asum = zero
174  IF( n.LE.0 )
175  $ RETURN
176 *
177  ldx = descx( lld_ )
178  CALL infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix, jjx,
179  $ ixrow, ixcol )
180 *
181  IF( incx.EQ.1 .AND. descx( m_ ).EQ.1 .AND. n.EQ.1 ) THEN
182  IF( myrow.EQ.ixrow .AND. mycol.EQ.ixcol ) THEN
183  asum = abs( x( iix+(jjx-1)*ldx ) )
184  END IF
185  RETURN
186  END IF
187 *
188  IF( incx.EQ.descx( m_ ) ) THEN
189 *
190 * X is distributed over a process row
191 *
192  IF( myrow.EQ.ixrow ) THEN
193  CALL pb_topget( ictxt, 'Combine', 'Rowwise', rctop )
194  icoff = mod( jx-1, descx( nb_ ) )
195  nq = numroc( n+icoff, descx( nb_ ), mycol, ixcol, npcol )
196  IF( mycol.EQ.ixcol )
197  $ nq = nq-icoff
198  asum = dzsum1( nq, x( iix+(jjx-1)*ldx ), ldx )
199  CALL dgsum2d( ictxt, 'Rowwise', rctop, 1, 1, asum, 1,
200  $ -1, mycol )
201  END IF
202 *
203  ELSE
204 *
205 * X is distributed over a process column
206 *
207  IF( mycol.EQ.ixcol ) THEN
208  CALL pb_topget( ictxt, 'Combine', 'Columnwise', cctop )
209  iroff = mod( ix-1, descx( mb_ ) )
210  np = numroc( n+iroff, descx( mb_ ), myrow, ixrow, nprow )
211  IF( myrow.EQ.ixrow )
212  $ np = np-iroff
213  asum = dzsum1( np, x( iix+(jjx-1)*ldx ), 1 )
214  CALL dgsum2d( ictxt, 'Columnwise', cctop, 1, 1, asum, 1,
215  $ -1, mycol )
216  END IF
217 *
218  END IF
219 *
220  RETURN
221 *
222 * End of PDZSUM1
223 *
224  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdzsum1
subroutine pdzsum1(N, ASUM, X, IX, JX, DESCX, INCX)
Definition: pdzsum1.f:2