SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cainfog2l.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cainfog2l( Int M, Int N, Int I, Int J, Int * DESC, Int NPROW,
21 Int NPCOL, Int MYROW, Int MYCOL, Int * IMB1,
22 Int * INB1, Int * MP, Int * NQ, Int * II, Int * JJ,
23 Int * PROW, Int * PCOL, Int * RPROW, Int * RPCOL )
24#else
25void PB_Cainfog2l( M, N, I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, IMB1,
26 INB1, MP, NQ, II, JJ, PROW, PCOL, RPROW, RPCOL )
27/*
28* .. Scalar Arguments ..
29*/
30 Int I, * II, * IMB1, * INB1, J, * JJ, M, * MP, MYCOL,
31 MYROW, N, NPCOL, NPROW, * NQ, * PCOL, * PROW, * RPCOL,
32 * RPROW;
33/*
34* .. Array Arguments ..
35*/
36 Int * DESC;
37#endif
38{
39/*
40* Purpose
41* =======
42*
43* PB_Cainfog2l computes the starting local row and column indexes II,
44* JJ corresponding to the submatrix starting globally at the entry
45* pointed by I, J. This routine returns the coordinates in the grid of
46* the process owning the matrix entry of global indexes I, J, namely
47* PROW and PCOL. In addition, this routine computes the quantities MP
48* and NQ, which are respectively the local number of rows and columns
49* owned by the process of coordinate MYROW, MYCOL corresponding to the
50* global submatrix A(I:I+M-1,J:J+N-1). Finally, the size of the first
51* partial block and the relative process coordinates are also returned
52* respectively in IMB, INB and RPROW, RPCOL.
53*
54* Notes
55* =====
56*
57* A description vector is associated with each 2D block-cyclicly dis-
58* tributed matrix. This vector stores the information required to
59* establish the mapping between a matrix entry and its corresponding
60* process and memory location.
61*
62* In the following comments, the character _ should be read as
63* "of the distributed matrix". Let A be a generic term for any 2D
64* block cyclicly distributed matrix. Its description vector is DESC_A:
65*
66* NOTATION STORED IN EXPLANATION
67* ---------------- --------------- ------------------------------------
68* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
69* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
70* the NPROW x NPCOL BLACS process grid
71* A is distributed over. The context
72* itself is global, but the handle
73* (the integer value) may vary.
74* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
75* ted matrix A, M_A >= 0.
76* N_A (global) DESCA[ N_ ] The number of columns in the distri-
77* buted matrix A, N_A >= 0.
78* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
79* block of the matrix A, IMB_A > 0.
80* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
81* left block of the matrix A,
82* INB_A > 0.
83* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
84* bute the last M_A-IMB_A rows of A,
85* MB_A > 0.
86* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
87* bute the last N_A-INB_A columns of
88* A, NB_A > 0.
89* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
90* row of the matrix A is distributed,
91* NPROW > RSRC_A >= 0.
92* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
93* first column of A is distributed.
94* NPCOL > CSRC_A >= 0.
95* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
96* array storing the local blocks of
97* the distributed matrix A,
98* IF( Lc( 1, N_A ) > 0 )
99* LLD_A >= MAX( 1, Lr( 1, M_A ) )
100* ELSE
101* LLD_A >= 1.
102*
103* Let K be the number of rows of a matrix A starting at the global in-
104* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
105* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
106* receive if these K rows were distributed over NPROW processes. If K
107* is the number of columns of a matrix A starting at the global index
108* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
109* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
110* these K columns were distributed over NPCOL processes.
111*
112* The values of Lr() and Lc() may be determined via a call to the func-
113* tion PB_Cnumroc:
114* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
115* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
116*
117* Arguments
118* =========
119*
120* M (global input) INTEGER
121* On entry, M specifies the global number of rows of the subma-
122* trix. M must be at least zero.
123*
124* N (global input) INTEGER
125* On entry, N specifies the global number of columns of the
126* submatrix. N must be at least zero.
127*
128* I (global input) INTEGER
129* On entry, I specifies the global starting row index of the
130* submatrix. I must at least zero.
131*
132* J (global input) INTEGER
133* On entry, J specifies the global starting column index of
134* the submatrix. J must at least zero.
135*
136* DESC (global and local input) INTEGER array
137* On entry, DESC is an integer array of dimension DLEN_. This
138* is the array descriptor of the underlying matrix.
139*
140* NPROW (global input) INTEGER
141* On entry, NPROW specifies the total number of process rows
142* over which the matrix is distributed. NPROW must be at least
143* one.
144*
145* NPCOL (global input) INTEGER
146* On entry, NPCOL specifies the total number of process columns
147* over which the matrix is distributed. NPCOL must be at least
148* one.
149*
150* MYROW (local input) INTEGER
151* On entry, MYROW specifies the row coordinate of the process
152* whose local index II is determined. MYROW must be at least
153* zero and strictly less than NPROW.
154*
155* MYCOL (local input) INTEGER
156* On entry, MYCOL specifies the column coordinate of the pro-
157* cess whose local index JJ is determined. MYCOL must be at
158* least zero and strictly less than NPCOL.
159*
160* IMB1 (global output) INTEGER
161* On exit, IMB1 specifies the number of rows of the upper left
162* block of the submatrix. On exit, IMB1 is less or equal than
163* M and greater or equal than MIN( 1, M ).
164*
165* INB1 (global output) INTEGER
166* On exit, INB1 specifies the number of columns of the upper
167* left block of the submatrix. On exit, INB1 is less or equal
168* than N and greater or equal than MIN( 1, N ).
169*
170* MP (local output) INTEGER
171* On exit, MP specifies the local number of rows of the subma-
172* trix, that the processes of row coordinate MYROW own. MP is
173* at least zero.
174*
175* NQ (local output) INTEGER
176* On exit, NQ specifies the local number of columns of the
177* submatrix, that the processes of column coordinate MYCOL
178* own. NQ is at least zero.
179*
180* II (local output) INTEGER
181* On exit, II specifies the local starting row index of the
182* submatrix. On exit, II is at least zero.
183*
184* JJ (local output) INTEGER
185* On exit, JJ specifies the local starting column index of
186* the submatrix. On exit, II is at least zero.
187*
188* PROW (global output) INTEGER
189* On exit, PROW specifies the row coordinate of the process
190* that possesses the first row of the submatrix. On exit, PROW
191* is -1 if DESC(RSRC_) is -1 on input, and, at least zero and
192* strictly less than NPROW otherwise.
193*
194* PCOL (global output) INTEGER
195* On exit, PCOL specifies the column coordinate of the process
196* that possesses the first column of the submatrix. On exit,
197* PCOL is -1 if DESC(CSRC_) is -1 on input, and, at least zero
198* and strictly less than NPCOL otherwise.
199*
200* RPROW (global output) INTEGER
201* On exit, RPROW specifies the relative row coordinate of the
202* process that possesses the first row I of the submatrix. On
203* exit, RPROW is -1 if DESC(RSRC_) is -1 on input, and, at
204* least zero and strictly less than NPROW otherwise.
205*
206* RPCOL (global output) INTEGER
207* On exit, RPCOL specifies the relative column coordinate of
208* the process that possesses the first column J of the subma-
209* trix. On exit, RPCOL is -1 if DESC(CSRC_) is -1 on input,
210* and, at least zero and strictly less than NPCOL otherwise.
211*
212* -- Written on April 1, 1998 by
213* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
214*
215* ---------------------------------------------------------------------
216*/
217/*
218* .. Local Scalars ..
219*/
220 Int i1, ilocblk, j1, mb, mydist, nb, nblocks, csrc, rsrc;
221/* ..
222* .. Executable Statements ..
223*
224*/
225/*
226* Retrieve the row distribution parameters
227*/
228 mb = DESC[ MB_ ];
229 rsrc = DESC[ RSRC_ ];
230
231 if( ( rsrc == -1 ) || ( NPROW == 1 ) )
232 {
233/*
234* The rows are not distributed, or there is just one process row in the grid.
235* Therefore, the local and global indexes are the same, as well as the local
236* and global number of rows. Finally, the relative row process coordinate is
237* zero, since every process owns all rows. Note that the size of the first
238* row block can be zero only if M is zero.
239*/
240 *II = I;
241 if( ( *IMB1 = DESC[IMB_] - I ) <= 0 )
242 *IMB1 += ( ( -(*IMB1) ) / mb + 1 ) * mb;
243 *IMB1 = MIN( *IMB1, M );
244 *MP = M;
245 *PROW = rsrc;
246 *RPROW = 0;
247 }
248 else
249 {
250/*
251* Figure out PROW, II and IMB1 first.
252*/
253 *IMB1 = DESC[IMB_];
254 if( I < *IMB1 ) /* Is I in first block range ? */
255 {
256/*
257* If I is in the first block of rows, then PROW is simply rsrc, II is I in
258* this process and zero elsewhere, and the size of the first block is the
259* IMB complement.
260*/
261 *PROW = rsrc;
262 *II = ( ( MYROW == *PROW ) ? I : 0 );
263 *IMB1 -= I;
264 }
265 else
266 {
267/*
268* The discussion goes as follows: compute my distance from the source process
269* so that within this process coordinate system, the source row process is the
270* process such that mydist=0, or equivalently MYROW == rsrc.
271*
272* Find out the global coordinate of the block of rows I belongs to (nblocks),
273* as well as the minimum local number of row blocks that every process has.
274*
275* when mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks,
276* when mydist > nblocks - ilocblk * NPROW, I own ilocblk full blocks,
277* when mydist = nblocks - ilocblk * NPROW, I own ilocblk full blocks
278* but not I, or I own ilocblk + 1 blocks and the entry I refers to.
279*/
280 i1 = I - *IMB1;
281 if( MYROW == rsrc )
282 {
283/*
284* I refers to an entry that is not in the first block, find out which process
285* has it.
286*/
287 nblocks = i1 / mb + 1;
288 *PROW = rsrc + nblocks;
289 *PROW -= ( *PROW / NPROW ) * NPROW;
290/*
291* Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only three
292* possible cases:
293*
294* 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I don't own I, in
295* which case II = IMB + ( ilocblk - 1 ) * MB. Note that this case cannot
296* happen when ilocblk is zero, since nblocks is at least one.
297*
298* 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in which
299* case I and II can respectively be written as IMB + (nblocks-1)*MB + IL
300* and IMB+(ilocblk-1) * MB + IL. That is II = I + (ilocblk - nblocks)*MB.
301* Note that this case cannot happen when ilocblk is zero, since nblocks
302* is at least one.
303*
304* 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns
305* ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. Note
306* that when ilocblk is zero, II is just IMB.
307*/
308 if( nblocks < NPROW )
309 {
310 *II = *IMB1;
311 }
312 else
313 {
314 ilocblk = nblocks / NPROW;
315 if( ilocblk * NPROW >= nblocks )
316 {
317 *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb :
318 *IMB1 + ( ilocblk - 1 ) * mb );
319 }
320 else
321 {
322 *II = *IMB1 + ilocblk * mb;
323 }
324 }
325 }
326 else
327 {
328/*
329* I is not in the first block, find out which process has it.
330*/
331 nblocks = i1 / mb + 1;
332 *PROW = rsrc + nblocks;
333 *PROW -= ( *PROW / NPROW ) * NPROW;
334/*
335* Compute my distance from the source process so that within this process
336* coordinate system, the source process is the process such that mydist=0.
337*/
338 if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW;
339/*
340* When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of
341* size MB since I am not the source process, i.e. II = ( ilocblk + 1 ) * MB.
342* When mydist >= nblocks - ilocblk * NPROW and I don't own I, I own ilocblk
343* full blocks of size MB, i.e. II = ilocblk * MB, otherwise I own ilocblk
344* blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + IL
345* and II = ilocblk*MB + IL = I - IMB + ( ilocblk - nblocks + 1 )*MB.
346*/
347 if( nblocks < NPROW )
348 {
349 mydist -= nblocks;
350 *II = ( ( mydist < 0 ) ? mb : ( ( MYROW == *PROW ) ?
351 i1 + ( 1 - nblocks ) * mb : 0 ) );
352 }
353 else
354 {
355 ilocblk = nblocks / NPROW;
356 mydist -= nblocks - ilocblk * NPROW;
357 *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
358 ( ( MYROW == *PROW ) ?
359 ( ilocblk - nblocks + 1 ) * mb + i1 :
360 ilocblk * mb ) );
361 }
362 }
363/*
364* Update the size of first block
365*/
366 *IMB1 = nblocks * mb - i1;
367 }
368/*
369* Now everything is just like M, I=0, IMB1, MB, PROW, NPROW. The discussion
370* goes as follows: compute my distance from the source process PROW so that
371* within this process coordinate system, the source process is the process
372* such that mydist = 0. Figure out MP.
373*/
374 if( M <= *IMB1 )
375 {
376/*
377* M <= IMB1: if I am the source process, i.e. I own I (mydist = 0), MP is M
378* and 0 otherwise.
379*/
380 *MP = ( ( MYROW == *PROW ) ? M : 0 );
381 }
382 else
383 {
384/*
385* Find out how many full blocks are globally (nblocks) and locally (ilocblk)
386* in those M entries
387*/
388 nblocks = ( M - *IMB1 ) / mb + 1;
389
390 if( MYROW == *PROW )
391 {
392/*
393* Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only two
394* possible cases:
395*
396* 1) When mydist = nblocks - ilocblk * NPROW = 0, that is NPROW divides
397* the global number of full blocks, then the source process PROW owns
398* one more block than the other processes; and M can be rewritten as
399* M = IMB1 + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block.
400* Similarly, the local value MP corresponding to M can be written as
401* MP = IMB1 + (ilocblk-1) * MB + LMB = M + ( ilocblk-1 - (nblocks-1) )*MB.
402* Note that this case cannot happen when ilocblk is zero, since nblocks
403* is at least one.
404*
405* 2) mydist = 0 < nblocks - ilocblk * NPROW, the source process only owns
406* full blocks, and therefore MP = IMB1 + ilocblk * MB. Note that when
407* ilocblk is zero, MP is just IMB1.
408*/
409 if( nblocks < NPROW )
410 {
411 *MP = *IMB1;
412 }
413 else
414 {
415 ilocblk = nblocks / NPROW;
416 *MP = ( ( nblocks - ilocblk * NPROW ) ?
417 *IMB1 + ilocblk * mb :
418 M + ( ilocblk - nblocks ) * mb );
419 }
420 }
421 else
422 {
423/*
424* Compute my distance from the source process so that within this process
425* coordinate system, the source process is the process such that mydist=0.
426*/
427 if( ( mydist = MYROW - *PROW ) < 0 ) mydist += NPROW;
428/*
429* When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of
430* size MB since I am not the source process,
431*
432* when mydist > nblocks - ilocblk * NPROW, I own ilocblk full blocks of
433* size MB since I am not the source process,
434*
435* when mydist = nblocks - ilocblk * NPROW,
436* either the last block is not full and I own it, in which case
437* M = IMB1 + (nblocks - 1)*MB + LMB with LNB the size of the last block
438* such that MB > LMB > 0; the local value MP corresponding to M is given
439* by MP = ilocblk * MB + LMB = M - IMB1 + ( ilocblk - nblocks + 1 ) * MB;
440* or the last block is full and I am the first process owning only ilocblk
441* full blocks of size MB, that is M = IMB + ( nblocks - 1 ) * MB and
442* MP = ilocblk * MB = M - IMB + ( ilocblk - nblocks + 1 ) * MB.
443*/
444 if( nblocks < NPROW )
445 {
446 mydist -= nblocks;
447 *MP = ( ( mydist < 0 ) ? mb : ( ( mydist > 0 ) ? 0 :
448 M - *IMB1 + mb * ( 1 - nblocks ) ) );
449 }
450 else
451 {
452 ilocblk = nblocks / NPROW;
453 mydist -= nblocks - ilocblk * NPROW;
454 *MP = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
455 ( ( mydist > 0 ) ? ilocblk * mb :
456 M - *IMB1 + mb * ( ilocblk - nblocks + 1 ) ) );
457 }
458 }
459 }
460/*
461* Finally figure out IMB1 and RPROW. Note that IMB1 can be zero when M = 0.
462*/
463 *IMB1 = MIN( *IMB1, M );
464 if( ( *RPROW = MYROW - *PROW ) < 0 ) *RPROW += NPROW;
465 }
466/*
467* Idem for the columns
468*/
469 nb = DESC[ NB_ ];
470 csrc = DESC[ CSRC_ ];
471
472 if( ( csrc == -1 ) || ( NPCOL == 1 ) )
473 {
474 *JJ = J;
475 if( ( *INB1 = DESC[INB_] - J ) <= 0 )
476 *INB1 += ( ( -(*INB1) ) / nb + 1 ) * nb;
477 *INB1 = MIN( *INB1, N );
478 *NQ = N;
479 *PCOL = csrc;
480 *RPCOL = 0;
481 }
482 else
483 {
484 *INB1 = DESC[INB_];
485 if( J < *INB1 )
486 {
487 *PCOL = csrc;
488 *JJ = ( ( MYCOL == *PCOL ) ? J : 0 );
489 *INB1 -= J;
490 }
491 else
492 {
493 j1 = J - *INB1;
494 if( MYCOL == csrc )
495 {
496 nblocks = j1 / nb + 1;
497 *PCOL = csrc + nblocks;
498 *PCOL -= ( *PCOL / NPCOL ) * NPCOL;
499
500 if( nblocks < NPCOL )
501 {
502 *JJ = *INB1;
503 }
504 else
505 {
506 ilocblk = nblocks / NPCOL;
507 if( ilocblk * NPCOL >= nblocks )
508 {
509 *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb :
510 *INB1 + ( ilocblk - 1 ) * nb );
511 }
512 else
513 {
514 *JJ = *INB1 + ilocblk * nb;
515 }
516 }
517 }
518 else
519 {
520 nblocks = j1 / nb + 1;
521 *PCOL = csrc + nblocks;
522 *PCOL -= ( *PCOL / NPCOL ) * NPCOL;
523
524 if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
525
526 if( nblocks < NPCOL )
527 {
528 mydist -= nblocks;
529 *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
530 j1 + ( 1 - nblocks ) * nb : 0 ) );
531 }
532 else
533 {
534 ilocblk = nblocks / NPCOL;
535 mydist -= nblocks - ilocblk * NPCOL;
536 *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
537 ( ( MYCOL == *PCOL ) ?
538 ( ilocblk - nblocks + 1 ) * nb + j1 :
539 ilocblk * nb ) );
540 }
541 }
542 *INB1 = nblocks * nb - j1;
543 }
544
545 if( N <= *INB1 )
546 {
547 *NQ = ( ( MYCOL == *PCOL ) ? N : 0 );
548 }
549 else
550 {
551 nblocks = ( N - *INB1 ) / nb + 1;
552
553 if( MYCOL == *PCOL )
554 {
555 if( nblocks < NPCOL )
556 {
557 *NQ = *INB1;
558 }
559 else
560 {
561 ilocblk = nblocks / NPCOL;
562 *NQ = ( ( nblocks - ilocblk * NPCOL ) ?
563 *INB1 + ilocblk * nb :
564 N + ( ilocblk - nblocks ) * nb );
565 }
566 }
567 else
568 {
569 if( ( mydist = MYCOL - *PCOL ) < 0 ) mydist += NPCOL;
570
571 if( nblocks < NPCOL )
572 {
573 mydist -= nblocks;
574 *NQ = ( ( mydist < 0 ) ? nb : ( ( mydist > 0 ) ? 0 :
575 N - *INB1 + nb * ( 1 - nblocks ) ) );
576 }
577 else
578 {
579 ilocblk = nblocks / NPCOL;
580 mydist -= nblocks - ilocblk * NPCOL;
581 *NQ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
582 ( ( mydist > 0 ) ? ilocblk * nb :
583 N - *INB1 + nb * ( ilocblk - nblocks + 1 ) ) );
584 }
585 }
586 }
587 *INB1 = MIN( *INB1, N );
588 if( ( *RPCOL = MYCOL - *PCOL ) < 0 ) *RPCOL += NPCOL;
589 }
590/*
591* End of PB_Cainfog2l
592*/
593}
#define Int
Definition Bconfig.h:22
#define MB_
Definition PBtools.h:43
#define MIN(a_, b_)
Definition PBtools.h:76
#define RSRC_
Definition PBtools.h:45
#define INB_
Definition PBtools.h:42
#define CSRC_
Definition PBtools.h:46
#define IMB_
Definition PBtools.h:41
void PB_Cainfog2l()
#define NB_
Definition PBtools.h:44