ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzher2_.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
20 void pzher2_( F_CHAR_T UPLO, int * N, double * ALPHA,
21  double * X, int * IX, int * JX, int * DESCX, int * INCX,
22  double * Y, int * IY, int * JY, int * DESCY, int * INCY,
23  double * A, int * IA, int * JA, int * DESCA )
24 #else
25 void pzher2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY,
26  DESCY, INCY, A, IA, JA, DESCA )
27 /*
28 * .. Scalar Arguments ..
29 */
30  F_CHAR_T UPLO;
31  int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
32  * N;
33  double * ALPHA;
34 /*
35 * .. Array Arguments ..
36 */
37  int * DESCA, * DESCX, * DESCY;
38  double * A, * X, * Y;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PZHER2 performs the Hermitian rank 2 operation
46 *
47 * sub( A ) := alpha*sub( X )*conjg( sub( Y )' ) +
48 * conjg( alpha )*sub( Y )*conjg( sub( X )' ) + sub( A ),
49 *
50 * where
51 *
52 * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
53 *
54 * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
55 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
56 *
57 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
58 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
59 *
60 * Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and
61 * sub( A ) is an n by n Hermitian submatrix.
62 *
63 * Notes
64 * =====
65 *
66 * A description vector is associated with each 2D block-cyclicly dis-
67 * tributed matrix. This vector stores the information required to
68 * establish the mapping between a matrix entry and its corresponding
69 * process and memory location.
70 *
71 * In the following comments, the character _ should be read as
72 * "of the distributed matrix". Let A be a generic term for any 2D
73 * block cyclicly distributed matrix. Its description vector is DESC_A:
74 *
75 * NOTATION STORED IN EXPLANATION
76 * ---------------- --------------- ------------------------------------
77 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
79 * the NPROW x NPCOL BLACS process grid
80 * A is distributed over. The context
81 * itself is global, but the handle
82 * (the integer value) may vary.
83 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
84 * ted matrix A, M_A >= 0.
85 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
86 * buted matrix A, N_A >= 0.
87 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
88 * block of the matrix A, IMB_A > 0.
89 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
90 * left block of the matrix A,
91 * INB_A > 0.
92 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
93 * bute the last M_A-IMB_A rows of A,
94 * MB_A > 0.
95 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
96 * bute the last N_A-INB_A columns of
97 * A, NB_A > 0.
98 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
99 * row of the matrix A is distributed,
100 * NPROW > RSRC_A >= 0.
101 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
102 * first column of A is distributed.
103 * NPCOL > CSRC_A >= 0.
104 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
105 * array storing the local blocks of
106 * the distributed matrix A,
107 * IF( Lc( 1, N_A ) > 0 )
108 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
109 * ELSE
110 * LLD_A >= 1.
111 *
112 * Let K be the number of rows of a matrix A starting at the global in-
113 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115 * receive if these K rows were distributed over NPROW processes. If K
116 * is the number of columns of a matrix A starting at the global index
117 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
118 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
119 * these K columns were distributed over NPCOL processes.
120 *
121 * The values of Lr() and Lc() may be determined via a call to the func-
122 * tion PB_Cnumroc:
123 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125 *
126 * Arguments
127 * =========
128 *
129 * UPLO (global input) CHARACTER*1
130 * On entry, UPLO specifies whether the local pieces of
131 * the array A containing the upper or lower triangular part
132 * of the Hermitian submatrix sub( A ) are to be referenced as
133 * follows:
134 *
135 * UPLO = 'U' or 'u' Only the local pieces corresponding to
136 * the upper triangular part of the
137 * Hermitian submatrix sub( A ) are to be
138 * referenced,
139 *
140 * UPLO = 'L' or 'l' Only the local pieces corresponding to
141 * the lower triangular part of the
142 * Hermitian submatrix sub( A ) are to be
143 * referenced.
144 *
145 * N (global input) INTEGER
146 * On entry, N specifies the order of the submatrix sub( A ).
147 * N must be at least zero.
148 *
149 * ALPHA (global input) COMPLEX*16
150 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
151 * supplied as zero then the local entries of the arrays X
152 * and Y corresponding to the entries of the subvectors sub( X )
153 * and sub( Y ) respectively need not be set on input.
154 *
155 * X (local input) COMPLEX*16 array
156 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
157 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
158 * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
159 * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
160 * Before entry, this array contains the local entries of the
161 * matrix X.
162 *
163 * IX (global input) INTEGER
164 * On entry, IX specifies X's global row index, which points to
165 * the beginning of the submatrix sub( X ).
166 *
167 * JX (global input) INTEGER
168 * On entry, JX specifies X's global column index, which points
169 * to the beginning of the submatrix sub( X ).
170 *
171 * DESCX (global and local input) INTEGER array
172 * On entry, DESCX is an integer array of dimension DLEN_. This
173 * is the array descriptor for the matrix X.
174 *
175 * INCX (global input) INTEGER
176 * On entry, INCX specifies the global increment for the
177 * elements of X. Only two values of INCX are supported in
178 * this version, namely 1 and M_X. INCX must not be zero.
179 *
180 * Y (local input) COMPLEX*16 array
181 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
182 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
183 * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
184 * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
185 * Before entry, this array contains the local entries of the
186 * matrix Y.
187 *
188 * IY (global input) INTEGER
189 * On entry, IY specifies Y's global row index, which points to
190 * the beginning of the submatrix sub( Y ).
191 *
192 * JY (global input) INTEGER
193 * On entry, JY specifies Y's global column index, which points
194 * to the beginning of the submatrix sub( Y ).
195 *
196 * DESCY (global and local input) INTEGER array
197 * On entry, DESCY is an integer array of dimension DLEN_. This
198 * is the array descriptor for the matrix Y.
199 *
200 * INCY (global input) INTEGER
201 * On entry, INCY specifies the global increment for the
202 * elements of Y. Only two values of INCY are supported in
203 * this version, namely 1 and M_Y. INCY must not be zero.
204 *
205 * A (local input/local output) COMPLEX*16 array
206 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
207 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
208 * the local entries of the matrix A.
209 * Before entry with UPLO = 'U' or 'u', this array contains
210 * the local entries corresponding to the upper triangular part
211 * of the Hermitian submatrix sub( A ), and the local entries
212 * corresponding to the strictly lower triangular of sub( A )
213 * are not referenced. On exit, the upper triangular part of
214 * sub( A ) is overwritten by the upper triangular part of the
215 * updated submatrix.
216 * Before entry with UPLO = 'L' or 'l', this array contains
217 * the local entries corresponding to the lower triangular part
218 * of the Hermitian submatrix sub( A ), and the local entries
219 * corresponding to the strictly upper triangular of sub( A )
220 * are not referenced. On exit, the lower triangular part of
221 * sub( A ) is overwritten by the lower triangular part of the
222 * updated submatrix.
223 * Note that the imaginary parts of the local entries corres-
224 * ponding to the diagonal elements of sub( A ) need not be
225 * set, they are assumed to be zero, and on exit they are set
226 * to zero.
227 *
228 * IA (global input) INTEGER
229 * On entry, IA specifies A's global row index, which points to
230 * the beginning of the submatrix sub( A ).
231 *
232 * JA (global input) INTEGER
233 * On entry, JA specifies A's global column index, which points
234 * to the beginning of the submatrix sub( A ).
235 *
236 * DESCA (global and local input) INTEGER array
237 * On entry, DESCA is an integer array of dimension DLEN_. This
238 * is the array descriptor for the matrix A.
239 *
240 * -- Written on April 1, 1998 by
241 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
242 *
243 * ---------------------------------------------------------------------
244 */
245 /*
246 * .. Local Scalars ..
247 */
248  char UploA;
249  int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
250  Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld,
251  Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1,
252  k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper;
253  cmplx16 Calpha;
254  PBTYP_T * type;
255 /*
256 * .. Local Arrays ..
257 */
258  int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_],
259  YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_];
260  char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL,
261  * YR = NULL;
262 /* ..
263 * .. Executable Statements ..
264 *
265 */
266  upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
267  PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
268  PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
269  PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
270 #ifndef NO_ARGCHK
271 /*
272 * Test the input parameters
273 */
274  Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
275  if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
276  {
277  if( ( !upper ) && ( UploA != CLOWER ) )
278  {
279  PB_Cwarn( ctxt, __LINE__, "PZHER2", "Illegal UPLO = %c\n", UploA );
280  info = -1;
281  }
282  PB_Cchkvec( ctxt, "PZHER2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info );
283  PB_Cchkvec( ctxt, "PZHER2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info );
284  PB_Cchkmat( ctxt, "PZHER2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info );
285  }
286  if( info ) { PB_Cabort( ctxt, "PZHER2", info ); return; }
287 #endif
288 /*
289 * Quick return if possible
290 */
291  if( ( *N == 0 ) ||
292  ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) )
293  return;
294 /*
295 * Retrieve process grid information
296 */
297 #ifdef NO_ARGCHK
298  Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
299 #endif
300 /*
301 * Get type structure
302 */
303  type = PB_Cztypeset();
304 /*
305 * Compute descriptor Ad0 for sub( A )
306 */
307  PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
308  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
309 /*
310 * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by
311 * sub( A )
312 */
313  if( *INCX == Xd[M_] )
314  {
315  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
316  Xd, ROW, &XR, XRd0, &XRfr );
317  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0,
318  XRd0, ROW, &XC, XCd0, &XCfr );
319  }
320  else
321  {
322  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
323  Xd, COLUMN, &XC, XCd0, &XCfr );
324  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0,
325  XCd0, COLUMN, &XR, XRd0, &XRfr );
326  }
327 /*
328 * Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by
329 * sub( A )
330 */
331  if( *INCY == Yd[M_] )
332  {
333  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
334  Yd, ROW, &YR, YRd0, &YRfr );
335  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0,
336  YRd0, ROW, &YC, YCd0, &YCfr );
337  }
338  else
339  {
340  PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
341  Yd, COLUMN, &YC, YCd0, &YCfr );
342  PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0,
343  YCd0, COLUMN, &YR, YRd0, &YRfr );
344  }
345 /*
346 * Local rank-2 update if I own some data
347 */
348  Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
349  Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
350 
351  if( ( Amp > 0 ) && ( Anq > 0 ) )
352  {
353  size = type->size;
354  Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
355 
356  XCld = XCd0[LLD_]; YCld = YCd0[LLD_];
357  XRld = XRd0[LLD_]; YRld = YRd0[LLD_];
358  Calpha[REAL_PART] = ALPHA[REAL_PART];
359  Calpha[IMAG_PART] = -ALPHA[IMAG_PART];
360 /*
361 * Computational partitioning size is computed as the product of the logical
362 * value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
363 */
364  nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
365  PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
366  if( upper )
367  {
368  for( k = 0; k < *N; k += nb )
369  {
370  kb = *N - k; kb = MIN( kb, nb );
371  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
372  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
373  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
374  if( Akp > 0 && Anq0 > 0 )
375  {
376  zgerc_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione,
377  Mptr( YR, 0, Akq, YRld, size ), &YRld,
378  Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
379  zgerc_( &Akp, &Anq0, ((char *) Calpha), YC, &ione,
380  Mptr( XR, 0, Akq, XRld, size ), &XRld,
381  Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
382  }
383  PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA),
384  Mptr( XC, Akp, 0, XCld, size ), XCld,
385  Mptr( XR, 0, Akq, XRld, size ), XRld,
386  Mptr( YC, Akp, 0, YCld, size ), YCld,
387  Mptr( YR, 0, Akq, YRld, size ), YRld,
388  Aptr, k, k, Ad0, PB_Ctzher2 );
389  }
390  }
391  else
392  {
393  for( k = 0; k < *N; k += nb )
394  {
395  kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
396  Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
397  Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
398  PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA),
399  Mptr( XC, Akp, 0, XCld, size ), XCld,
400  Mptr( XR, 0, Akq, XRld, size ), XRld,
401  Mptr( YC, Akp, 0, YCld, size ), YCld,
402  Mptr( YR, 0, Akq, YRld, size ), YRld,
403  Aptr, k, k, Ad0, PB_Ctzher2 );
404  Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
405  Amp0 = Amp - Akp;
406  Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
407  if( Amp0 > 0 && Anq0 > 0 )
408  {
409  zgerc_( &Amp0, &Anq0, ((char *) ALPHA),
410  Mptr( XC, Akp, 0, XCld, size ), &ione,
411  Mptr( YR, 0, Akq, YRld, size ), &YRld,
412  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
413  zgerc_( &Amp0, &Anq0, ((char *) Calpha),
414  Mptr( YC, Akp, 0, YCld, size ), &ione,
415  Mptr( XR, 0, Akq, XRld, size ), &XRld,
416  Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
417  }
418  }
419  }
420  }
421  if( XRfr ) free( XR );
422  if( XCfr ) free( XC );
423  if( YRfr ) free( YR );
424  if( YCfr ) free( YC );
425 /*
426 * End of PZHER2
427 */
428 }
M_
#define M_
Definition: PBtools.h:39
ROW
#define ROW
Definition: PBblacs.h:46
PB_Cwarn
void PB_Cwarn()
pzher2_
void pzher2_(F_CHAR_T UPLO, int *N, double *ALPHA, double *X, int *IX, int *JX, int *DESCX, int *INCX, double *Y, int *IY, int *JY, int *DESCY, int *INCY, double *A, int *IA, int *JA, int *DESCA)
Definition: pzher2_.c:25
COLUMN
#define COLUMN
Definition: PBblacs.h:45
PBblacs.h
PBtools.h
PBblas.h
NOCONJG
#define NOCONJG
Definition: PBblas.h:45
REAL_PART
#define REAL_PART
Definition: pblas.h:135
PB_Ctzher2
void PB_Ctzher2()
PBTYP_T::type
char type
Definition: pblas.h:327
PBpblas.h
PB_Cztypeset
PBTYP_T * PB_Cztypeset()
Definition: PB_Cztypeset.c:19
DLEN_
#define DLEN_
Definition: PBtools.h:48
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cpsyr2
void PB_Cpsyr2()
PB_Cdescribe
void PB_Cdescribe()
F_CHAR_T
char * F_CHAR_T
Definition: pblas.h:118
ZERO
#define ZERO
Definition: PBtools.h:66
PB_Cchkvec
void PB_Cchkvec()
UPPER
#define UPPER
Definition: PBblas.h:52
pilaenv_
int pilaenv_()
zgerc_
F_VOID_FCT zgerc_()
PB_Cabort
void PB_Cabort()
CLOWER
#define CLOWER
Definition: PBblas.h:25
F2C_CHAR
#define F2C_CHAR(a)
Definition: pblas.h:120
cmplx16
double cmplx16[2]
Definition: pblas.h:133
PB_CargFtoC
void PB_CargFtoC()
PBTYP_T::size
int size
Definition: pblas.h:329
PB_Cchkmat
void PB_Cchkmat()
PB_Cnumroc
int PB_Cnumroc()
PB_CInV
void PB_CInV()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
LOWER
#define LOWER
Definition: PBblas.h:51
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
pblas.h
CUPPER
#define CUPPER
Definition: PBblas.h:26
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38
IMAG_PART
#define IMAG_PART
Definition: pblas.h:136
PB_Clcm
int PB_Clcm()