SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cinfog2l.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cinfog2l( Int I, Int J, Int * DESC, Int NPROW, Int NPCOL,
21 Int MYROW, Int MYCOL, Int * II, Int * JJ,
22 Int * PROW, Int * PCOL )
23#else
24void PB_Cinfog2l( I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, JJ,
25 PROW, PCOL )
26 Int I, * II, J, * JJ, MYCOL, MYROW, NPCOL, NPROW, * PCOL,
27 * PROW;
28/*
29* .. Scalar Arguments ..
30*/
31/*
32* .. Array Arguments ..
33*/
34 Int * DESC;
35#endif
36{
37/*
38* Purpose
39* =======
40*
41* PB_Cinfog2l computes the starting local index II, JJ corresponding to
42* the submatrix starting globally at the entry pointed by I, J. This
43* routine returns the coordinates in the grid of the process owning the
44* matrix entry of global indexes I, J, namely PROW and PCOL.
45*
46* Notes
47* =====
48*
49* A description vector is associated with each 2D block-cyclicly dis-
50* tributed matrix. This vector stores the information required to
51* establish the mapping between a matrix entry and its corresponding
52* process and memory location.
53*
54* In the following comments, the character _ should be read as
55* "of the distributed matrix". Let A be a generic term for any 2D
56* block cyclicly distributed matrix. Its description vector is DESC_A:
57*
58* NOTATION STORED IN EXPLANATION
59* ---------------- --------------- ------------------------------------
60* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
61* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
62* the NPROW x NPCOL BLACS process grid
63* A is distributed over. The context
64* itself is global, but the handle
65* (the integer value) may vary.
66* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
67* ted matrix A, M_A >= 0.
68* N_A (global) DESCA[ N_ ] The number of columns in the distri-
69* buted matrix A, N_A >= 0.
70* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
71* block of the matrix A, IMB_A > 0.
72* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
73* left block of the matrix A,
74* INB_A > 0.
75* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
76* bute the last M_A-IMB_A rows of A,
77* MB_A > 0.
78* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
79* bute the last N_A-INB_A columns of
80* A, NB_A > 0.
81* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
82* row of the matrix A is distributed,
83* NPROW > RSRC_A >= 0.
84* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
85* first column of A is distributed.
86* NPCOL > CSRC_A >= 0.
87* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
88* array storing the local blocks of
89* the distributed matrix A,
90* IF( Lc( 1, N_A ) > 0 )
91* LLD_A >= MAX( 1, Lr( 1, M_A ) )
92* ELSE
93* LLD_A >= 1.
94*
95* Let K be the number of rows of a matrix A starting at the global in-
96* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
97* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
98* receive if these K rows were distributed over NPROW processes. If K
99* is the number of columns of a matrix A starting at the global index
100* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
101* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
102* these K columns were distributed over NPCOL processes.
103*
104* The values of Lr() and Lc() may be determined via a call to the func-
105* tion PB_Cnumroc:
106* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
107* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
108*
109* Arguments
110* =========
111*
112* I (global input) INTEGER
113* On entry, I specifies the global starting row index of the
114* submatrix. I must at least zero.
115*
116* J (global input) INTEGER
117* On entry, J specifies the global starting column index of
118* the submatrix. J must at least zero.
119*
120* DESC (global and local input) INTEGER array
121* On entry, DESC is an integer array of dimension DLEN_. This
122* is the array descriptor of the underlying matrix.
123*
124* NPROW (global input) INTEGER
125* On entry, NPROW specifies the total number of process rows
126* over which the matrix is distributed. NPROW must be at least
127* one.
128*
129* NPCOL (global input) INTEGER
130* On entry, NPCOL specifies the total number of process columns
131* over which the matrix is distributed. NPCOL must be at least
132* one.
133*
134* MYROW (local input) INTEGER
135* On entry, MYROW specifies the row coordinate of the process
136* whose local index II is determined. MYROW must be at least
137* zero and strictly less than NPROW.
138*
139* MYCOL (local input) INTEGER
140* On entry, MYCOL specifies the column coordinate of the pro-
141* cess whose local index JJ is determined. MYCOL must be at
142* least zero and strictly less than NPCOL.
143*
144* II (local output) INTEGER
145* On exit, II specifies the local starting row index of the
146* submatrix. On exit, II is at least zero.
147*
148* JJ (local output) INTEGER
149* On exit, JJ specifies the local starting column index of the
150* submatrix. On exit, JJ is at least zero.
151*
152* PROW (global output) INTEGER
153* On exit, PROW specifies the row coordinate of the process
154* that possesses the first row of the submatrix. On exit, PROW
155* is -1 if DESC( RSRC_ ) is -1 on input, and, at least zero
156* and strictly less than NPROW otherwise.
157*
158* PCOL (global output) INTEGER
159* On exit, PCOL specifies the column coordinate of the process
160* that possesses the first column of the submatrix. On exit,
161* PCOL is -1 if DESC( CSRC_ ) is -1 on input, and, at least
162* zero and strictly less than NPCOL otherwise.
163*
164* -- Written on April 1, 1998 by
165* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
166*
167* ---------------------------------------------------------------------
168*/
169/*
170* .. Local Scalars ..
171*/
172 Int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
173/* ..
174* .. Executable Statements ..
175*
176*/
177/*
178* Retrieve the row distribution parameters
179*/
180 imb = DESC[IMB_ ];
181 *PROW = DESC[RSRC_];
182
183 if( ( *PROW == -1 ) || ( NPROW == 1 ) )
184 {
185/*
186* The data is not distributed, or there is just one process row in the grid.
187*/
188 *II = I;
189 }
190 else if( I < imb )
191 {
192/*
193* I refers to an entry in the first block of rows
194*/
195 *II = ( MYROW == *PROW ? I : 0 );
196 }
197 else
198 {
199 mb = DESC[MB_];
200 rsrc = *PROW;
201/*
202* The discussion goes as follows: compute my distance from the source process
203* so that within this process coordinate system, the source process is the
204* process such that mydist = 0, or equivalently MYROW == rsrc.
205*
206* Find out the global coordinate of the block I belongs to (nblocks), as well
207* as the minimum local number of blocks that every process has.
208*
209* when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks,
210* when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks,
211* when mydist = nblocks - ilocblk * NPROCS, I own ilocblk full blocks
212* but not I, or I own ilocblk + 1 blocks and the entry I refers to.
213*/
214 if( MYROW == rsrc )
215 {
216/*
217* I refers to an entry that is not in the first block, find out which process
218* has it.
219*/
220 nblocks = ( I - imb ) / mb + 1;
221 *PROW += nblocks;
222 *PROW -= ( *PROW / NPROW ) * NPROW;
223/*
224* Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only three
225* possible cases:
226*
227* 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I don't own I, in
228* which case II = IMB + ( ilocblk - 1 ) * MB. Note that this case cannot
229* happen when ilocblk is zero, since nblocks is at least one.
230*
231* 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in which
232* case I and II can respectively be written as IMB + (nblocks-1)*NB + IL
233* and IMB + (ilocblk-1) * MB + IL. That is II = I + (ilocblk-nblocks)*MB.
234* Note that this case cannot happen when ilocblk is zero, since nblocks
235* is at least one.
236*
237* 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns
238* ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. Note
239* that when ilocblk is zero, II is just IMB.
240*/
241 if( nblocks < NPROW )
242 {
243 *II = imb;
244 }
245 else
246 {
247 ilocblk = nblocks / NPROW;
248 if( ilocblk * NPROW >= nblocks )
249 {
250 *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb :
251 imb + ( ilocblk - 1 ) * mb );
252 }
253 else
254 {
255 *II = imb + ilocblk * mb;
256 }
257 }
258 }
259 else
260 {
261/*
262* I refers to an entry that is not in the first block, find out which process
263* has it.
264*/
265 nblocks = ( I -= imb ) / mb + 1;
266 *PROW += nblocks;
267 *PROW -= ( *PROW / NPROW ) * NPROW;
268/*
269* Compute my distance from the source process so that within this process
270* coordinate system, the source process is the process such that mydist=0.
271*/
272 if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW;
273/*
274* When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of
275* size MB since I am not the source process, i.e. II = ( ilocblk + 1 ) * MB.
276* When mydist >= nblocks - ilocblk * NPROW and I don't own I, I own ilocblk
277* full blocks of size MB, i.e. II = ilocblk * MB, otherwise I own ilocblk
278* blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + IL
279* and II = ilocblk*MB + IL = I - IMB + ( ilocblk - nblocks + 1 )*MB.
280*/
281 if( nblocks < NPROW )
282 {
283 mydist -= nblocks;
284 *II = ( ( mydist < 0 ) ? mb :
285 ( ( MYROW == *PROW ) ? I + ( 1 - nblocks ) * mb : 0 ) );
286 }
287 else
288 {
289 ilocblk = nblocks / NPROW;
290 mydist -= nblocks - ilocblk * NPROW;
291 *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
292 ( ( MYROW == *PROW ) ?
293 ( ilocblk - nblocks + 1 ) * mb + I : ilocblk * mb ) );
294 }
295 }
296 }
297/*
298* Idem for the columns
299*/
300 inb = DESC[INB_ ];
301 *PCOL = DESC[CSRC_];
302
303 if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
304 {
305 *JJ = J;
306 }
307 else if( J < inb )
308 {
309 *JJ = ( MYCOL == *PCOL ? J : 0 );
310 }
311 else
312 {
313 nb = DESC[NB_];
314 csrc = *PCOL;
315
316 if( MYCOL == csrc )
317 {
318 nblocks = ( J - inb ) / nb + 1;
319 *PCOL += nblocks;
320 *PCOL -= ( *PCOL / NPCOL ) * NPCOL;
321
322 if( nblocks < NPCOL )
323 {
324 *JJ = inb;
325 }
326 else
327 {
328 ilocblk = nblocks / NPCOL;
329 if( ilocblk * NPCOL >= nblocks )
330 {
331 *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb :
332 inb + ( ilocblk - 1 ) * nb );
333 }
334 else
335 {
336 *JJ = inb + ilocblk * nb;
337 }
338 }
339 }
340 else
341 {
342 nblocks = ( J -= inb ) / nb + 1;
343 *PCOL += nblocks;
344 *PCOL -= ( *PCOL / NPCOL ) * NPCOL;
345
346 if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
347
348 if( nblocks < NPCOL )
349 {
350 mydist -= nblocks;
351 *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
352 J + ( 1 - nblocks )*nb : 0 ) );
353 }
354 else
355 {
356 ilocblk = nblocks / NPCOL;
357 mydist -= nblocks - ilocblk * NPCOL;
358 *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
359 ( ( MYCOL == *PCOL ) ?
360 ( ilocblk - nblocks + 1 ) * nb + J : ilocblk * nb ) );
361 }
362 }
363 }
364/*
365* End of PB_Cinfog2l
366*/
367}
#define Int
Definition Bconfig.h:22
#define MB_
Definition PBtools.h:43
void PB_Cinfog2l()
#define RSRC_
Definition PBtools.h:45
#define INB_
Definition PBtools.h:42
#define CSRC_
Definition PBtools.h:46
#define IMB_
Definition PBtools.h:41
#define NB_
Definition PBtools.h:44