ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_COutV.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_COutV( PBTYP_T * TYPE, char * ROWCOL, char * ZEROIT, int M,
21  int N, int * DESCA, int K, char * * YAPTR, int * DYA,
22  int * YAFREE, int * YASUM )
23 #else
24 void PB_COutV( TYPE, ROWCOL, ZEROIT, M, N, DESCA, K, YAPTR, DYA,
25  YAFREE, YASUM )
26 /*
27 * .. Scalar Arguments ..
28 */
29  char * ROWCOL, * ZEROIT;
30  int * YAFREE, K, M, N, * YASUM;
31  PBTYP_T * TYPE;
32 /*
33 * .. Array Arguments ..
34 */
35  int * DESCA, * DYA;
36  char * * YAPTR;
37 #endif
38 {
39 /*
40 * Purpose
41 * =======
42 *
43 * PB_COutV returns a pointer to an array that contains a one-dimensio-
44 * nal ouput zero subvector which is replicated over the rows or columns
45 * of a submatrix described by DESCA. On return, the subvector is speci-
46 * fied by a pointer to some data, a descriptor array describing its
47 * layout, a logical value indicating if this local piece of data has
48 * been dynamically allocated by this function, a logical value speci-
49 * fying if sum reduction should occur. This routine is specifically
50 * designed for traditional Level 2 and 3 PBLAS operations using an out-
51 * put only vector such as PxTRMV, or PxTRMM.
52 *
53 * Notes
54 * =====
55 *
56 * A description vector is associated with each 2D block-cyclicly dis-
57 * tributed matrix. This vector stores the information required to
58 * establish the mapping between a matrix entry and its corresponding
59 * process and memory location.
60 *
61 * In the following comments, the character _ should be read as
62 * "of the distributed matrix". Let A be a generic term for any 2D
63 * block cyclicly distributed matrix. Its description vector is DESC_A:
64 *
65 * NOTATION STORED IN EXPLANATION
66 * ---------------- --------------- ------------------------------------
67 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
68 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
69 * the NPROW x NPCOL BLACS process grid
70 * A is distributed over. The context
71 * itself is global, but the handle
72 * (the integer value) may vary.
73 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
74 * ted matrix A, M_A >= 0.
75 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
76 * buted matrix A, N_A >= 0.
77 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
78 * block of the matrix A, IMB_A > 0.
79 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
80 * left block of the matrix A,
81 * INB_A > 0.
82 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
83 * bute the last M_A-IMB_A rows of A,
84 * MB_A > 0.
85 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
86 * bute the last N_A-INB_A columns of
87 * A, NB_A > 0.
88 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
89 * row of the matrix A is distributed,
90 * NPROW > RSRC_A >= 0.
91 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
92 * first column of A is distributed.
93 * NPCOL > CSRC_A >= 0.
94 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
95 * array storing the local blocks of
96 * the distributed matrix A,
97 * IF( Lc( 1, N_A ) > 0 )
98 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
99 * ELSE
100 * LLD_A >= 1.
101 *
102 * Let K be the number of rows of a matrix A starting at the global in-
103 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
104 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
105 * receive if these K rows were distributed over NPROW processes. If K
106 * is the number of columns of a matrix A starting at the global index
107 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
108 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
109 * these K columns were distributed over NPCOL processes.
110 *
111 * The values of Lr() and Lc() may be determined via a call to the func-
112 * tion PB_Cnumroc:
113 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
114 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
115 *
116 * Arguments
117 * =========
118 *
119 * TYPE (local input) pointer to a PBTYP_T structure
120 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
121 * that contains type information (See pblas.h).
122 *
123 * ROWCOL (global input) pointer to CHAR
124 * On entry, ROWCOL specifies if this routine should return a
125 * row or column subvector replicated over the underlying subma-
126 * trix as follows:
127 * = 'R' or 'r': A row subvector is returned,
128 * = 'C' or 'c': A column subvector is returned.
129 *
130 * M (global input) INTEGER
131 * On entry, M specifies the number of rows of the underlying
132 * submatrix described by DESCA. M must be at least zero.
133 *
134 * N (global input) INTEGER
135 * On entry, N specifies the number of columns of the underlying
136 * submatrix described by DESCA. N must be at least zero.
137 *
138 * DESCA (global and local input) INTEGER array
139 * On entry, DESCA is an integer array of dimension DLEN_. This
140 * is the array descriptor for the matrix A.
141 *
142 * K (global input) INTEGER
143 * On entry, K specifies the length of the non-distributed di-
144 * mension of the subvector sub( Y ). K must be at least zero.
145 *
146 * YAPTR (local output) pointer to pointer to CHAR
147 * On exit, * YAPTR is an array containing the same data as the
148 * subvector sub( Y ) which is replicated over the rows or co-
149 * lumns of the underlying matrix as specified by ROWCOL and
150 * DESCA.
151 *
152 * DYA (global and local output) INTEGER array
153 * On exit, DYA is a descriptor array of dimension DLEN_ descri-
154 * bing the data layout of the data pointed to by * YAPTR.
155 *
156 * YAFREE (local output) INTEGER
157 * On exit, YAFREE specifies if it was possible to reuse the
158 * subvector sub( Y ), i.e., if some dynamic memory was alloca-
159 * ted for the data pointed to by * YAPTR or not. When YAFREE is
160 * zero, no dynamic memory was allocated. Otherwise, some dyna-
161 * mic memory was allocated by this function that one MUST re-
162 * lease as soon as possible.
163 *
164 * YASUM (global output) INTEGER
165 * On exit, YASUM specifies if a global sum reduction should be
166 * performed to obtain the correct sub( Y ). When YASUM is zero,
167 * no reduction is to be performed, otherwise reduction should
168 * occur.
169 *
170 * -- Written on April 1, 1998 by
171 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
172 *
173 * ---------------------------------------------------------------------
174 */
175 /*
176 * .. Local Scalars ..
177 */
178  int Acol, Aimb, Ainb, Amb, Amp, Anb, Anq, Arow, Yld, ctxt,
179  izero=0, nprow, myrow, npcol, mycol;
180  char * zero;
181 /* ..
182 * .. Executable Statements ..
183 *
184 */
185 /*
186 * Initialize the output parameters to a default value
187 */
188  *YAFREE = 0;
189  *YASUM = 0;
190  *YAPTR = NULL;
191 /*
192 * Quick return if possible
193 */
194  if( ( M <= 0 ) || ( N <= 0 ) || ( K <= 0 ) )
195  {
196  if( Mupcase( ROWCOL[0] ) == CROW )
197  {
198  PB_Cdescset( DYA, K, N, 1, DESCA[INB_], 1, DESCA[NB_], DESCA[RSRC_],
199  DESCA[CSRC_], DESCA[CTXT_], 1 );
200  }
201  else
202  {
203  PB_Cdescset( DYA, M, K, DESCA[IMB_], 1, DESCA[MB_], 1, DESCA[RSRC_],
204  DESCA[CSRC_], DESCA[CTXT_], DESCA[LLD_] );
205  }
206  return;
207  }
208 /*
209 * Retrieve process grid information
210 */
211  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
212 
213  Arow = DESCA[RSRC_]; Acol = DESCA[CSRC_];
214 
215  if( Mupcase( ROWCOL[0] ) == CROW )
216  {
217 /*
218 * Want a row vector
219 */
220  Ainb = DESCA[INB_]; Anb = DESCA[NB_];
221  Anq = PB_Cnumroc( N, 0, Ainb, Anb, mycol, Acol, npcol );
222  Yld = MAX( 1, K );
223 
224  if( ( Arow < 0 ) || ( nprow == 1 ) ||
225  ( PB_Cspan( M, 0, DESCA[IMB_], DESCA[MB_], Arow, nprow ) ) )
226  {
227 /*
228 * A spans all process rows. Y should be reduced iff A is not replicated and
229 * there is more than just one process row in the process grid.
230 */
231  *YASUM = ( ( Arow >= 0 ) && ( nprow > 1 ) );
232 /*
233 * Allocate the space for Y in the processes owning at least one column of A,
234 * and initialize it to zero if requested.
235 */
236  if( Anq > 0 )
237  {
238  *YAPTR = PB_Cmalloc( K * Anq * TYPE->size );
239  *YAFREE = 1;
240  if( Mupcase( ZEROIT[0] ) == CINIT )
241  {
242  zero = TYPE->zero;
243  TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq,
244  &izero, zero, zero, *YAPTR, &Yld );
245  }
246  }
247 /*
248 * Describe the newly created operand
249 */
250  PB_Cdescset( DYA, K, N, K, Ainb, 1, Anb, -1, Acol, ctxt, Yld );
251  }
252  else
253  {
254 /*
255 * A spans only one process row. There is no need to reduce Y or even to
256 * allocate some space for it outside this process row.
257 */
258  *YASUM = 0;
259  if( ( myrow == Arow ) && ( Anq > 0 ) )
260  {
261  *YAPTR = PB_Cmalloc( K * Anq * TYPE->size );
262  *YAFREE = 1;
263  if( Mupcase( ZEROIT[0] ) == CINIT )
264  {
265  zero = TYPE->zero;
266  TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &K, &Anq,
267  &izero, zero, zero, *YAPTR, &Yld );
268  }
269  }
270 /*
271 * Describe the newly created operand
272 */
273  PB_Cdescset( DYA, K, N, K, Ainb, 1, Anb, Arow, Acol, ctxt, Yld );
274  }
275  }
276  else
277  {
278 /*
279 * Want a column vector
280 */
281  Aimb = DESCA[ IMB_ ]; Amb = DESCA[ MB_ ];
282  Amp = PB_Cnumroc( M, 0, Aimb, Amb, myrow, Arow, nprow );
283  Yld = MAX( 1, Amp );
284 
285  if( ( Acol < 0 ) || ( npcol == 1 ) ||
286  ( PB_Cspan( N, 0, DESCA[INB_], DESCA[NB_], Acol, npcol ) ) )
287  {
288 /*
289 * A spans all process columns. Y should be reduced iff A is not replicated and
290 * there is more than just one process column in the process grid.
291 */
292  *YASUM = ( ( Acol >= 0 ) && ( npcol > 1 ) );
293 /*
294 * Allocate the space for Y in the processes owning at least one row of A, and
295 * initialize it to zero if requested.
296 */
297  if( Amp > 0 )
298  {
299  *YAPTR = PB_Cmalloc( Amp * K * TYPE->size );
300  *YAFREE = 1;
301  if( Mupcase( ZEROIT[0] ) == CINIT )
302  {
303  zero = TYPE->zero;
304  TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K,
305  &izero, zero, zero, *YAPTR, &Yld );
306  }
307  }
308 /*
309 * Describe the newly created operand
310 */
311  PB_Cdescset( DYA, M, K, Aimb, K, Amb, 1, Arow, -1, ctxt, Yld );
312  }
313  else
314  {
315 /*
316 * A spans only one process column. There is no need to reduce Y or even to
317 * allocate some space for it outside this process column.
318 */
319  *YASUM = 0;
320  if( ( mycol == Acol ) && ( Amp > 0 ) )
321  {
322  *YAPTR = PB_Cmalloc( Amp * K * TYPE->size );
323  *YAFREE = 1;
324  if( Mupcase( ZEROIT[0] ) == CINIT )
325  {
326  zero = TYPE->zero;
327  TYPE->Ftzpad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &K,
328  &izero, zero, zero, *YAPTR, &Yld );
329  }
330  }
331 /*
332 * Describe the newly created operand
333 */
334  PB_Cdescset( DYA, M, K, Aimb, K, Amb, 1, Arow, Acol, ctxt, Yld );
335  }
336  }
337 /*
338 * End of PB_COutV
339 */
340 }
TYPE
#define TYPE
Definition: clamov.c:7
MB_
#define MB_
Definition: PBtools.h:43
NB_
#define NB_
Definition: PBtools.h:44
CSRC_
#define CSRC_
Definition: PBtools.h:46
PB_COutV
void PB_COutV(PBTYP_T *TYPE, char *ROWCOL, char *ZEROIT, int M, int N, int *DESCA, int K, char **YAPTR, int *DYA, int *YAFREE, int *YASUM)
Definition: PB_COutV.c:24
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
LLD_
#define LLD_
Definition: PBtools.h:47
CROW
#define CROW
Definition: PBblacs.h:21
IMB_
#define IMB_
Definition: PBtools.h:41
PB_Cdescset
void PB_Cdescset()
RSRC_
#define RSRC_
Definition: PBtools.h:45
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
ALL
#define ALL
Definition: PBblas.h:50
INB_
#define INB_
Definition: PBtools.h:42
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
PB_Cspan
int PB_Cspan()
MAX
#define MAX(a_, b_)
Definition: PBtools.h:77
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CTXT_
#define CTXT_
Definition: PBtools.h:38
CINIT
#define CINIT
Definition: PBblas.h:35