ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cplascal.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_Cplascal( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M,
21  int N, char * ALPHA, char * A, int IA, int JA,
22  int * DESCA )
23 #else
24 void PB_Cplascal( TYPE, UPLO, CONJUG, M, N, ALPHA, A, IA, JA, DESCA )
25 /*
26 * .. Scalar Arguments ..
27 */
28  char * CONJUG, * UPLO;
29  int IA, JA, M, N;
30  char * ALPHA;
31  PBTYP_T * TYPE;
32 /*
33 * .. Array Arguments ..
34 */
35  int * DESCA;
36  char * A;
37 #endif
38 {
39 /*
40 * Purpose
41 * =======
42 *
43 * PB_Cplascal scales by alpha an m by n submatrix sub( A ) denoting
44 * A(IA:IA+M-1,JA:JA+N-1).
45 *
46 * Notes
47 * =====
48 *
49 * A description vector is associated with each 2D block-cyclicly dis-
50 * tributed matrix. This vector stores the information required to
51 * establish the mapping between a matrix entry and its corresponding
52 * process and memory location.
53 *
54 * In the following comments, the character _ should be read as
55 * "of the distributed matrix". Let A be a generic term for any 2D
56 * block cyclicly distributed matrix. Its description vector is DESC_A:
57 *
58 * NOTATION STORED IN EXPLANATION
59 * ---------------- --------------- ------------------------------------
60 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
61 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
62 * the NPROW x NPCOL BLACS process grid
63 * A is distributed over. The context
64 * itself is global, but the handle
65 * (the integer value) may vary.
66 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
67 * ted matrix A, M_A >= 0.
68 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
69 * buted matrix A, N_A >= 0.
70 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
71 * block of the matrix A, IMB_A > 0.
72 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
73 * left block of the matrix A,
74 * INB_A > 0.
75 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
76 * bute the last M_A-IMB_A rows of A,
77 * MB_A > 0.
78 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
79 * bute the last N_A-INB_A columns of
80 * A, NB_A > 0.
81 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
82 * row of the matrix A is distributed,
83 * NPROW > RSRC_A >= 0.
84 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
85 * first column of A is distributed.
86 * NPCOL > CSRC_A >= 0.
87 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
88 * array storing the local blocks of
89 * the distributed matrix A,
90 * IF( Lc( 1, N_A ) > 0 )
91 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
92 * ELSE
93 * LLD_A >= 1.
94 *
95 * Let K be the number of rows of a matrix A starting at the global in-
96 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
97 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
98 * receive if these K rows were distributed over NPROW processes. If K
99 * is the number of columns of a matrix A starting at the global index
100 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
101 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
102 * these K columns were distributed over NPCOL processes.
103 *
104 * The values of Lr() and Lc() may be determined via a call to the func-
105 * tion PB_Cnumroc:
106 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
107 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
108 *
109 * Arguments
110 * =========
111 *
112 * TYPE (local input) pointer to a PBTYP_T structure
113 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
114 * that contains type information (See pblas.h).
115 *
116 * UPLO (global input) pointer to CHAR
117 * On entry, UPLO specifies the part of the submatrix sub( A )
118 * to be scaled as follows:
119 * = 'L' or 'l': Lower triangular part is scaled; the
120 * strictly upper triangular part of sub( A ) is not changed;
121 * = 'U' or 'u': Upper triangular part is scaled; the
122 * strictly lower triangular part of sub( A ) is not changed;
123 * Otherwise: All of the submatrix sub( A ) is scaled.
124 *
125 * CONJUG (global input) pointer to CHAR
126 * On entry, CONJUG specifies what kind of scaling should be
127 * done as follows: when UPLO is 'L', 'l', 'U' or 'u' and CONJUG
128 * is 'Z' or 'z', alpha is assumed to be real and the imaginary
129 * part of the diagonals are set to zero. Otherwise, alpha is of
130 * the same type as the entries of sub( A ) and nothing particu-
131 * lar is done to the diagonals of sub( A ).
132 *
133 * M (global input) INTEGER
134 * On entry, M specifies the number of rows of the submatrix
135 * sub( A ). M must be at least zero.
136 *
137 * N (global input) INTEGER
138 * On entry, N specifies the number of columns of the submatrix
139 * sub( A ). N must be at least zero.
140 *
141 * ALPHA (global input) pointer to CHAR
142 * On entry, ALPHA specifies the scalar alpha, i.e., the cons-
143 * tant with which the matrix elements are to be scaled.
144 *
145 * A (local input/local output) pointer to CHAR
146 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
147 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
148 * the local entries of the matrix A to be scaled. On exit, the
149 * local entries of this array corresponding to the to the en-
150 * tries of the submatrix sub( A ) are overwritten by the local
151 * entries of the m by n scaled submatrix.
152 *
153 * IA (global input) INTEGER
154 * On entry, IA specifies A's global row index, which points to
155 * the beginning of the submatrix sub( A ).
156 *
157 * JA (global input) INTEGER
158 * On entry, JA specifies A's global column index, which points
159 * to the beginning of the submatrix sub( A ).
160 *
161 * DESCA (global and local input) INTEGER array
162 * On entry, DESCA is an integer array of dimension DLEN_. This
163 * is the array descriptor for the matrix A.
164 *
165 * -- Written on April 1, 1998 by
166 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
167 *
168 * ---------------------------------------------------------------------
169 */
170 /*
171 * .. Local Scalars ..
172 */
173  char UploA, herm, type;
174  int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp,
175  Amp0, Anb, Anq, Anq0, ctxt, izero=0, k, kb, ktmp, mn, mycol,
176  myrow, nb, npcol, nprow, size;
177  TZSCAL_T scal;
178 /*
179 * .. Local Arrays ..
180 */
181  int Ad0[DLEN_];
182  char * Aptr = NULL;
183 /* ..
184 * .. Executable Statements ..
185 *
186 */
187 /*
188 * Quick return if possible
189 */
190  if( ( M <= 0 ) || ( N <= 0 ) ) return;
191 /*
192 * If alpha is zero, then call PB_Cplapad instead.
193 */
194  type = TYPE->type;
195  UploA = Mupcase( UPLO[0] );
196  herm = ( UploA == CALL ? CNOCONJG : Mupcase( CONJUG[0] ) );
197 
198  if( type == SREAL )
199  {
200  if( ((float*)(ALPHA))[REAL_PART] == ZERO )
201  {
202  PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA,
203  JA, DESCA );
204  return;
205  }
206  else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return;
207  }
208  else if( type == DREAL )
209  {
210  if( ((double*)(ALPHA))[REAL_PART] == ZERO )
211  {
212  PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA,
213  JA, DESCA );
214  return;
215  }
216  else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return;
217  }
218  else if( type == SCPLX )
219  {
220  if( herm == CCONJG )
221  {
222  if( ((float*)(ALPHA))[REAL_PART] == ZERO )
223  {
224  PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
225  IA, JA, DESCA );
226  return;
227  }
228  }
229  else
230  {
231  if( ((float*)(ALPHA))[IMAG_PART] == ZERO )
232  {
233  if( ((float*)(ALPHA))[REAL_PART] == ZERO )
234  {
235  PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
236  IA, JA, DESCA );
237  return;
238  }
239  else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return;
240  }
241  }
242  }
243  else if( type == DCPLX )
244  {
245  if( herm == CCONJG )
246  {
247  if( ((double*)(ALPHA))[REAL_PART] == ZERO )
248  {
249  PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
250  IA, JA, DESCA );
251  return;
252  }
253  }
254  else
255  {
256  if( ((double*)(ALPHA))[IMAG_PART] == ZERO )
257  {
258  if( ((double*)(ALPHA))[REAL_PART] == ZERO )
259  {
260  PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
261  IA, JA, DESCA );
262  return;
263  }
264  else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return;
265  }
266  }
267  }
268 /*
269 * Retrieve process grid information
270 */
271  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
272 /*
273 * Compute descriptor Ad0 for sub( A )
274 */
275  PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
276  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
277 /*
278 * Quick return if I don't own any of sub( A ).
279 */
280  Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow );
281  Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
282  if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
283 
284  size = TYPE->size;
285  scal = ( herm == CCONJG ? TYPE->Fhescal : TYPE->Ftzscal );
286  Aptr = Mptr( A, Aii, Ajj, Ald, size );
287 /*
288 * When the entire sub( A ) needs to be scaled or when sub( A ) is replicated in
289 * all processes, just call the local routine.
290 */
291  if( ( Mupcase( UPLO[0] ) == CALL ) ||
292  ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
293  ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
294  {
295  scal( C2F_CHAR( UPLO ), &Amp, &Anq, &izero, ALPHA, Aptr, &Ald );
296  return;
297  }
298 /*
299 * Computational partitioning size is computed as the product of the logical
300 * value returned by pilaenv_ and two times the least common multiple of nprow
301 * and npcol.
302 */
303  nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) *
304  PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
305 
306  mn = MIN( M, N );
307 
308  if( Mupcase( UPLO[0] ) == CLOWER )
309  {
310 /*
311 * Lower triangle of sub( A ): proceed by block of columns. For each block of
312 * columns, operate on the logical diagonal block first and then the remaining
313 * rows of that block of columns.
314 */
315  for( k = 0; k < mn; k += nb )
316  {
317  kb = mn - k; ktmp = k + ( kb = MIN( kb, nb ) );
318  PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
319  Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
320  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
321  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
322  if( ( Amp0 = Amp - Akp ) > 0 )
323  scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr,
324  Akp, Akq, Ald, size ), &Ald );
325  }
326  }
327  else if( Mupcase( UPLO[0] ) == CUPPER )
328  {
329 /*
330 * Upper triangle of sub( A ): proceed by block of columns. For each block of
331 * columns, operate on the trailing rows and then the logical diagonal block
332 * of that block of columns. When M < N, the last columns of sub( A ) are
333 * handled together.
334 */
335  for( k = 0; k < mn; k += nb )
336  {
337  kb = mn - k; kb = MIN( kb, nb );
338  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
339  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
340  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
341  if( Akp > 0 )
342  scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr,
343  0, Akq, Ald, size ), &Ald );
344  PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
345  }
346  if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
347  scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0,
348  Akq, Ald, size ), &Ald );
349  }
350  else
351  {
352 /*
353 * All of sub( A ): proceed by block of columns. For each block of columns,
354 * operate on the trailing rows, then the logical diagonal block, and finally
355 * the remaining rows of that block of columns. When M < N, the last columns
356 * of sub( A ) are handled together.
357 */
358  for( k = 0; k < mn; k += nb )
359  {
360  kb = mn - k; kb = MIN( kb, nb );
361  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
362  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
363  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
364  if( Akp > 0 )
365  scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr,
366  0, Akq, Ald, size ), &Ald );
367  PB_Cplasca2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
368  Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
369  if( ( Amp0 = Amp - Akp ) > 0 )
370  scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr,
371  Akp, Akq, Ald, size ), &Ald );
372  }
373  if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
374  scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0,
375  Akq, Ald, size ), &Ald );
376  }
377 /*
378 * End of PB_Cplascal
379 */
380 }
CCONJG
#define CCONJG
Definition: PBblas.h:21
TYPE
#define TYPE
Definition: clamov.c:7
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
REAL_PART
#define REAL_PART
Definition: pblas.h:135
DLEN_
#define DLEN_
Definition: PBtools.h:48
DCPLX
#define DCPLX
Definition: pblas.h:472
PB_Cdescribe
void PB_Cdescribe()
CNOCONJG
#define CNOCONJG
Definition: PBblas.h:19
ZERO
#define ZERO
Definition: PBtools.h:66
pilaenv_
int pilaenv_()
CALL
#define CALL
Definition: PBblas.h:24
CLOWER
#define CLOWER
Definition: PBblas.h:25
PB_Cplapad
void PB_Cplapad()
PB_Cplascal
void PB_Cplascal(PBTYP_T *TYPE, char *UPLO, char *CONJUG, int M, int N, char *ALPHA, char *A, int IA, int JA, int *DESCA)
Definition: PB_Cplascal.c:24
ONE
#define ONE
Definition: PBtools.h:64
PB_Cplasca2
void PB_Cplasca2()
SREAL
#define SREAL
Definition: pblas.h:469
PB_Cnumroc
int PB_Cnumroc()
ALL
#define ALL
Definition: PBblas.h:50
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
DREAL
#define DREAL
Definition: pblas.h:470
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
TZSCAL_T
F_VOID_FCT(* TZSCAL_T)()
Definition: pblas.h:291
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
CUPPER
#define CUPPER
Definition: PBblas.h:26
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38
IMAG_PART
#define IMAG_PART
Definition: pblas.h:136
SCPLX
#define SCPLX
Definition: pblas.h:471
PB_Clcm
int PB_Clcm()