ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzmatadd.f
Go to the documentation of this file.
1  SUBROUTINE pzmatadd( M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC,
2  $ JC, DESCC )
3 *
4 * -- ScaLAPACK tools routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IC, JA, JC, M, N
11  COMPLEX*16 ALPHA, BETA
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCC( * )
15  COMPLEX*16 A( * ), C( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PZMATADD performs a distributed matrix-matrix addition
22 *
23 * sub( C ) := alpha * sub( A ) + beta * sub( C ),
24 *
25 * where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1) and sub( A ) denotes
26 * A(IA:IA+M-1,JA:JA+N-1). No communications are performed in this
27 * routine, the arrays are supposed to be aligned.
28 *
29 * Notes
30 * =====
31 *
32 * Each global data object is described by an associated description
33 * vector. This vector stores the information required to establish
34 * the mapping between an object element and its corresponding process
35 * and memory location.
36 *
37 * Let A be a generic term for any 2D block cyclicly distributed array.
38 * Such a global array has an associated description vector DESCA.
39 * In the following comments, the character _ should be read as
40 * "of the global array".
41 *
42 * NOTATION STORED IN EXPLANATION
43 * --------------- -------------- --------------------------------------
44 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45 * DTYPE_A = 1.
46 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47 * the BLACS process grid A is distribu-
48 * ted over. The context itself is glo-
49 * bal, but the handle (the integer
50 * value) may vary.
51 * M_A (global) DESCA( M_ ) The number of rows in the global
52 * array A.
53 * N_A (global) DESCA( N_ ) The number of columns in the global
54 * array A.
55 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56 * the rows of the array.
57 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58 * the columns of the array.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the array A is distributed.
61 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62 * first column of the array A is
63 * distributed.
64 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65 * array. LLD_A >= MAX(1,LOCr(M_A)).
66 *
67 * Let K be the number of rows or columns of a distributed matrix,
68 * and assume that its process grid has dimension p x q.
69 * LOCr( K ) denotes the number of elements of K that a process
70 * would receive if K were distributed over the p processes of its
71 * process column.
72 * Similarly, LOCc( K ) denotes the number of elements of K that a
73 * process would receive if K were distributed over the q processes of
74 * its process row.
75 * The values of LOCr() and LOCc() may be determined via a call to the
76 * ScaLAPACK tool function, NUMROC:
77 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79 * An upper bound for these quantities may be computed by:
80 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82 *
83 * Arguments
84 * =========
85 *
86 * M (global input) INTEGER
87 * The number of rows to be operated on i.e the number of rows
88 * of the distributed submatrices sub( A ) and sub( C ). M >= 0.
89 *
90 * N (global input) INTEGER
91 * The number of columns to be operated on i.e the number of
92 * columns of the distributed submatrices sub( A ) and
93 * sub( C ). N >= 0.
94 *
95 * ALPHA (global input) COMPLEX*16
96 * The scalar ALPHA.
97 *
98 * A (local input) COMPLEX*16 pointer into the local memory
99 * to a local array of dimension (LLD_A, LOCc(JA+N-1) ). This
100 * array contains the local pieces of the distributed matrix
101 * sub( A ).
102 *
103 * IA (global input) INTEGER
104 * The row index in the global array A indicating the first
105 * row of sub( A ).
106 *
107 * JA (global input) INTEGER
108 * The column index in the global array A indicating the
109 * first column of sub( A ).
110 *
111 * DESCA (global and local input) INTEGER array of dimension DLEN_.
112 * The array descriptor for the distributed matrix A.
113 *
114 * BETA (global input) COMPLEX*16
115 * The scalar BETA.
116 *
117 * C (local input/local output) COMPLEX*16 pointer into the
118 * local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
119 * This array contains the local pieces of the distributed
120 * matrix sub( C ). On exit, this array contains the local
121 * pieces of the resulting distributed matrix.
122 *
123 * IC (global input) INTEGER
124 * The row index in the global array C indicating the first
125 * row of sub( C ).
126 *
127 * JC (global input) INTEGER
128 * The column index in the global array C indicating the
129 * first column of sub( C ).
130 *
131 * DESCC (global and local input) INTEGER array of dimension DLEN_.
132 * The array descriptor for the distributed matrix C.
133 *
134 * =====================================================================
135 *
136 * .. Parameters ..
137  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
138  $ lld_, mb_, m_, nb_, n_, rsrc_
139  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
140  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
141  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
142  COMPLEX*16 ZERO, ONE
143  parameter( zero = ( 0.0d+0, 0.0d+0 ),
144  $ one = ( 1.0d+0, 0.0d+0 ) )
145 * ..
146 * .. Local Scalars ..
147  INTEGER I, IACOL, IAROW, ICCOL, ICOFF, ICROW, IIA,
148  $ iic, ioffa, ioffc, iroff, j, jja, jjc, lda,
149  $ ldc, mp, mycol, myrow, npcol, nprow, nq
150 * ..
151 * .. External Subroutines ..
152  EXTERNAL blacs_gridinfo, infog2l
153 * ..
154 * .. External Functions ..
155  INTEGER NUMROC
156  EXTERNAL numroc
157 * ..
158 * .. Executable Statements ..
159 *
160 * Get grid parameters.
161 *
162  CALL blacs_gridinfo( desca(ctxt_), nprow, npcol, myrow, mycol )
163 *
164 * Quick return if possible.
165 *
166  IF( (m.EQ.0).OR.(n.EQ.0).OR.
167  $ ((alpha.EQ.zero).AND.(beta.EQ.one)) )
168  $ RETURN
169 *
170  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol,
171  $ iia, jja, iarow, iacol )
172  CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,
173  $ iic, jjc, icrow, iccol )
174 *
175  iroff = mod( ia-1, desca(mb_) )
176  icoff = mod( ja-1, desca(nb_) )
177  mp = numroc( m+iroff, desca(mb_), myrow, iarow, nprow )
178  nq = numroc( n+icoff, desca(nb_), mycol, iacol, npcol )
179  IF( myrow.EQ.iarow )
180  $ mp = mp-iroff
181  IF( mycol.EQ.iacol )
182  $ nq = nq-icoff
183  lda = desca(lld_)
184  ldc = descc(lld_)
185 *
186  IF( nq.EQ.1 ) THEN
187  IF( beta.EQ.zero ) THEN
188  IF( alpha.EQ.zero ) THEN
189  ioffc = iic + (jjc-1)*ldc
190  DO 10 i = ioffc, ioffc+mp-1
191  c( i ) = zero
192  10 CONTINUE
193  ELSE
194  ioffa = iia + (jja-1)*lda
195  ioffc = iic + (jjc-1)*ldc
196  DO 20 i = ioffc, ioffc+mp-1
197  c( i ) = alpha * a( ioffa )
198  ioffa = ioffa + 1
199  20 CONTINUE
200  END IF
201  ELSE
202  IF( alpha.EQ.one ) THEN
203  IF( beta.EQ.one ) THEN
204  ioffa = iia + (jja-1)*lda
205  ioffc = iic + (jjc-1)*ldc
206  DO 30 i = ioffc, ioffc+mp-1
207  c( i ) = c( i ) + a( ioffa )
208  ioffa = ioffa + 1
209  30 CONTINUE
210  ELSE
211  ioffa = iia + (jja-1)*lda
212  ioffc = iic + (jjc-1)*ldc
213  DO 40 i = ioffc, ioffc+mp-1
214  c( i ) = beta * c( i ) + a( ioffa )
215  ioffa = ioffa + 1
216  40 CONTINUE
217  END IF
218  ELSE IF( beta.EQ.one ) THEN
219  ioffa = iia + (jja-1)*lda
220  ioffc = iic + (jjc-1)*ldc
221  DO 50 i = ioffc, ioffc+mp-1
222  c( i ) = c( i ) + alpha * a( ioffa )
223  ioffa = ioffa + 1
224  50 CONTINUE
225  ELSE
226  ioffa = iia + (jja-1)*lda
227  ioffc = iic + (jjc-1)*ldc
228  DO 60 i = ioffc, ioffc+mp-1
229  c( i ) = beta * c( i ) + alpha * a( ioffa )
230  ioffa = ioffa + 1
231  60 CONTINUE
232  END IF
233  END IF
234  ELSE
235  IF( beta.EQ.zero ) THEN
236  IF( alpha.EQ.zero ) THEN
237  ioffc = iic+(jjc-1)*ldc
238  DO 80 j = 1, nq
239  DO 70 i = ioffc, ioffc+mp-1
240  c( i ) = zero
241  70 CONTINUE
242  ioffc = ioffc + ldc
243  80 CONTINUE
244  ELSE
245  ioffa = iia+(jja-1)*lda
246  ioffc = iic+(jjc-1)*ldc
247  DO 100 j = 1, nq
248  DO 90 i = ioffc, ioffc+mp-1
249  c( i ) = alpha * a( ioffa )
250  ioffa = ioffa + 1
251  90 CONTINUE
252  ioffa = ioffa + lda - mp
253  ioffc = ioffc + ldc
254  100 CONTINUE
255  END IF
256  ELSE
257  IF( alpha.EQ.one ) THEN
258  IF( beta.EQ.one ) THEN
259  ioffa = iia+(jja-1)*lda
260  ioffc = iic+(jjc-1)*ldc
261  DO 120 j = 1, nq
262  DO 110 i = ioffc, ioffc+mp-1
263  c( i ) = c( i ) + a( ioffa )
264  ioffa = ioffa + 1
265  110 CONTINUE
266  ioffa = ioffa + lda - mp
267  ioffc = ioffc + ldc
268  120 CONTINUE
269  ELSE
270  ioffa = iia+(jja-1)*lda
271  ioffc = iic+(jjc-1)*ldc
272  DO 140 j = 1, nq
273  DO 130 i = ioffc, ioffc+mp-1
274  c( i ) = beta * c( i ) + a( ioffa )
275  ioffa = ioffa + 1
276  130 CONTINUE
277  ioffa = ioffa + lda - mp
278  ioffc = ioffc + ldc
279  140 CONTINUE
280  END IF
281  ELSE IF( beta.EQ.one ) THEN
282  ioffa = iia+(jja-1)*lda
283  ioffc = iic+(jjc-1)*ldc
284  DO 160 j = 1, nq
285  DO 150 i = ioffc, ioffc+mp-1
286  c( i ) = c( i ) + alpha * a( ioffa )
287  ioffa = ioffa + 1
288  150 CONTINUE
289  ioffa = ioffa + lda - mp
290  ioffc = ioffc + ldc
291  160 CONTINUE
292  ELSE
293  ioffa = iia+(jja-1)*lda
294  ioffc = iic+(jjc-1)*ldc
295  DO 180 j = 1, nq
296  DO 170 i = ioffc, ioffc+mp-1
297  c( i ) = beta * c( i ) + alpha * a( ioffa )
298  ioffa = ioffa + 1
299  170 CONTINUE
300  ioffa = ioffa + lda - mp
301  ioffc = ioffc + ldc
302  180 CONTINUE
303  END IF
304  END IF
305  END IF
306 *
307  RETURN
308 *
309 * End of PZMATADD
310 *
311  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzmatadd
subroutine pzmatadd(M, N, ALPHA, A, IA, JA, DESCA, BETA, C, IC, JC, DESCC)
Definition: pzmatadd.f:3