ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pcrot.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- Mark R. Fahey
4 * June 28, 2000
5 *
6 * ---------------------------------------------------------------------
7 */
8 /*
9 * Include files
10 */
11 #include "pblas.h"
12
13 void pcrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s )
14 /*
15 * Mark Fahey
16 * June 22, 2000
17 */
18 /*
19 * .. Scalar Arguments ..
20 */
21  int * incx, * incy, * ix, * iy, * jx, * jy, * n;
22  float * c;
23  complex * s;
24 /*
25 * ..
26 * .. Array Arguments ..
27 */
28  int desc_X[], desc_Y[];
29  complex X[], Y[];
30 {
31 /*
32 * Purpose
33 * =======
34 *
35 * PCROT applies a plane rotation, where the cos (C) is real and the
36 * sin (S) is complex, and the vectors CX and CY are complex, i.e.,
37 *
38 * [ sub( X ) ] := [ C S ] [ sub( X ) ]
39 * [ sub( Y ) ] := [ -conjg(S) C ] [ sub( Y ) ]
40 *
41 * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
42 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
43 *
44 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
45 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y,
46 *
47 * and where C*C + S*CONJG(S) = 1.0.
48 *
49 * Notes
50 * =====
51 *
52 * Each global data object is described by an associated description
53 * vector. This vector stores the information required to establish
54 * the mapping between an object element and its corresponding process
55 * and memory location.
56 *
57 * Let A be a generic term for any 2D block cyclicly distributed array.
58 * Such a global array has an associated description vector DESCA.
59 * In the following comments, the character _ should be read as
60 * "of the global array".
61 *
62 * NOTATION STORED IN EXPLANATION
63 * --------------- -------------- --------------------------------------
64 * DT_A (global) descA[ DT_ ] The descriptor type. In this case,
65 * DT_A = 1.
66 * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
67 * the BLACS process grid A is distribu-
68 * ted over. The context itself is glo-
69 * bal, but the handle (the integer
70 * value) may vary.
71 * M_A (global) descA[ M_ ] The number of rows in the global
72 * array A.
73 * N_A (global) descA[ N_ ] The number of columns in the global
74 * array A.
75 * MB_A (global) descA[ MB_ ] The blocking factor used to distribu-
76 * te the rows of the array.
77 * NB_A (global) descA[ NB_ ] The blocking factor used to distribu-
78 * te the columns of the array.
79 * RSRC_A (global) descA[ RSRC_ ] The process row over which the first
80 * row of the array A is distributed.
81 * CSRC_A (global) descA[ CSRC_ ] The process column over which the
82 * first column of the array A is
83 * distributed.
84 * LLD_A (local) descA[ LLD_ ] The leading dimension of the local
85 * array. LLD_A >= MAX(1,LOCr(M_A)).
86 *
87 * Let K be the number of rows or columns of a distributed matrix,
88 * and assume that its process grid has dimension p x q.
89 * LOCr( K ) denotes the number of elements of K that a process
90 * would receive if K were distributed over the p processes of its
91 * process column.
92 * Similarly, LOCc( K ) denotes the number of elements of K that a
93 * process would receive if K were distributed over the q processes of
94 * its process row.
95 * The values of LOCr() and LOCc() may be determined via a call to the
96 * ScaLAPACK tool function, NUMROC:
97 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
98 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
99 * An upper bound for these quantities may be computed by:
100 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
101 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
102 *
103 * Because vectors may be seen as particular matrices, a distributed
104 * vector is considered to be a distributed matrix.
105 *
106 * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
107 * process column having the first entries of sub( Y ) must also contain
108 * the first entries of sub( X ). Moreover, the quantity
109 * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
110 *
111 * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
112 * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
113 * MOD( IY-1, MB_Y ).
114 *
115 * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
116 * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
117 * MOD( JY-1, NB_Y ).
118 *
119 * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
120 * equal to MB_Y, and the process row having the first entries of
121 * sub( Y ) must also contain the first entries of sub( X ). Moreover,
122 * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
123 *
124 * Arguments
125 * =========
126 *
127 * N (input) INTEGER
128 * The number of elements in the vectors CX and CY.
129 *
130 * X (local input) COMPLEX array containing the local
131 * pieces of a distributed matrix of dimension of at least
132 * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
133 * This array contains the entries of the distributed vector
134 * sub( X ).
135 * On output, CX is overwritten with C*X + S*Y.
136 *
137 * IX (global input) pointer to INTEGER
138 * The global row index of the submatrix of the distributed
139 * matrix X to operate on.
140 *
141 * JX (global input) pointer to INTEGER
142 * The global column index of the submatrix of the distributed
143 * matrix X to operate on.
144 *
145 * DESCX (global and local input) INTEGER array of dimension 8.
146 * The array descriptor of the distributed matrix X.
147 *
148 * INCX (global input) pointer to INTEGER
149 * The global increment for the elements of X. Only two values
150 * of INCX are supported in this version, namely 1 and M_X.
151 *
152 * Y (local input) COMPLEX array containing the local
153 * pieces of a distributed matrix of dimension of at least
154 * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
155 * This array contains the entries of the distributed vector
156 * sub( Y ).
157 * On output, CY is overwritten with -CONJG(S)*X + C*Y.
158 *
159 * IY (global input) pointer to INTEGER
160 * The global row index of the submatrix of the distributed
161 * matrix Y to operate on.
162 *
163 * JY (global input) pointer to INTEGER
164 * The global column index of the submatrix of the distributed
165 * matrix Y to operate on.
166 *
167 * DESCY (global and local input) INTEGER array of dimension 8.
168 * The array descriptor of the distributed matrix Y.
169 *
170 * INCY (global input) pointer to INTEGER
171 * The global increment for the elements of Y. Only two values
172 * of INCY are supported in this version, namely 1 and M_Y.
173 *
174 * C (input) pointer to FLOAT
175 * S (input) pointer COMPLEX
176 * C and S define a rotation
177 * [ C S ]
178 * [ -conjg(S) C ]
179 * where C*C + S*CONJG(S) = 1.0.
180 *
181 * =====================================================================
182 *
183 * .. Local Scalars ..
184 */
185  int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
186  jjy, lcm, lcmp, mycol, myrow, nn, np, np0,
187  nprow, npcol, nq, nz, ione=1, tmp1, wksz;
188  complex xwork[1], ywork[1], zero;
189 /* ..
190 * .. PBLAS Buffer ..
191 */
192  complex * buff;
193 /* ..
194 * .. External Functions ..
195 */
196  void blacs_gridinfo_();
197  void cgerv2d_();
198  void cgesd2d_();
199  void pbchkvect();
200  void PB_Cabort();
201  char * getpbbuf();
202  F_INTG_FCT pbctrnv_();
203  F_INTG_FCT crot_();
204  F_INTG_FCT ilcm_();
205 /* ..
206 * .. Executable Statements ..
207 *
208 * Get grid parameters
209 */
210  ictxt = desc_X[CTXT_];
211  blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
212 /*
213 * Test the input parameters
214 */
215  info = 0;
216  if( nprow == -1 )
217  info = -(500+CTXT_+1);
218  else
219  {
220  pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx,
221  &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
222  pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy,
223  &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );
224
225  if( info == 0 )
226  {
227  if( *n != 1 )
228  {
229  if( *incx == desc_X[M_] )
230  { /* X is distributed along a process row */
231  if( *incy == desc_Y[M_] )
232  { /* Y is distributed over a process row */
233  if( ( ixcol != iycol ) ||
234  ( ( (*jx-1) % desc_X[NB_] ) !=
235  ( (*jy-1) % desc_Y[NB_] ) ) )
236  info = -9;
237  else if( desc_Y[NB_] != desc_X[NB_] )
238  info = -(1000+NB_+1);
239  }
240  else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
241  { /* Y is distributed over a process column */
242  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
243  info = -8;
244  else if( desc_Y[MB_] != desc_X[NB_] )
245  info = -(1000+MB_+1);
246  }
247  else
248  {
249  info = -11;
250  }
251  }
252  else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
253  { /* X is distributed along a process column */
254  if( *incy == desc_Y[M_] )
255  { /* Y is distributed over a process row */
256  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
257  info = -9;
258  else if( desc_Y[NB_] != desc_X[MB_] )
259  info = -(1000+NB_+1);
260  }
261  else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
262  { /* Y is distributed over a process column */
263  if( ( ixrow != iyrow ) ||
264  ( ( (*ix-1) % desc_X[MB_] ) !=
265  ( (*iy-1) % desc_Y[MB_] ) ) )
266  info = -8;
267  else if( desc_Y[MB_] != desc_X[MB_] )
268  info = -(1000+MB_+1);
269  }
270  else
271  {
272  info = -11;
273  }
274  }
275  else
276  {
277  info = -6;
278  }
279  }
280  if( ictxt != desc_Y[CTXT_] )
281  info = -(1000+CTXT_+1);
282  }
283  }
284  if( info ) { PB_Cabort( ictxt, "PCROT", info ); return; }
285 /*
286  if( info )
287  {
288  pberror_( &ictxt, "PCROT", &info );
289  return;
290  }
291 */
292 /*
293 * Quick return if possible.
294 */
295  zero.re = ZERO;
296  zero.im = ZERO;
297  if( *n == 0 ) return;
298 /*
299 * rotation
300 */
301  if( *n == 1 )
302  {
303  if( ( myrow == ixrow ) && ( mycol == ixcol ) )
304  {
305  buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
306  if( ( myrow != iyrow ) || ( mycol != iycol ) )
307  {
308  cgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol );
309  cgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol );
310  }
311  else
312  *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
313  crot_( n, buff, n, ywork, n, c, s );
314  X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff;
315  if( ( myrow == iyrow ) && ( mycol == iycol ) )
316  Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork;
317  }
318  else if( ( myrow == iyrow ) && ( mycol == iycol ) )
319  {
320  cgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n,
321  &ixrow, &ixcol );
322  cgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol );
323  crot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s );
324  }
325  return;
326  }
327
328  if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
329  { /* X and Y are both distributed over a process row */
330  nz = (*jx-1) % desc_Y[NB_];
331  nn = *n + nz;
332  nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
333  if( mycol == ixcol )
334  nq -= nz;
335  if( ixrow == iyrow )
336  {
337  if( myrow == ixrow )
338  {
339  crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
340  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
341  }
342  }
343  else
344  {
345  if( myrow == ixrow )
346  {
347  cgesd2d_( &ictxt, &ione, &nq,
348  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
349  &iyrow, &mycol );
350  buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) );
351  cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &iyrow, &mycol );
352  crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
353  buff, &ione, c, s );
354  }
355  else if( myrow == iyrow )
356  {
357  cgesd2d_( &ictxt, &ione, &nq,
358  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
359  &ixrow, &mycol );
360  buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) );
361  cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &ixrow, &mycol );
362  crot_( &nq, buff, &ione,
363  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
364  }
365  }
366  }
367  else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
368  ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
369  { /* X and Y are both distributed over a process column */
370  nz = (*ix-1) % desc_X[MB_];
371  nn = *n + nz;
372  np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
373  if( myrow == ixrow )
374  np -= nz;
375  if( ixcol == iycol )
376  {
377  if( mycol == ixcol )
378  {
379  crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
380  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
381  }
382  }
383  else
384  {
385  if( mycol == ixcol )
386  {
387  cgesd2d_( &ictxt, &np, &ione,
388  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
389  &myrow, &iycol );
390  buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) );
391  cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &iycol );
392  crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
393  buff, &ione, c, s );
394  }
395  else if( mycol == iycol )
396  {
397  cgesd2d_( &ictxt, &np, &ione,
398  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
399  &myrow, &ixcol );
400  buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) );
401  cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &ixcol );
402  crot_( &np, buff, &ione,
403  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
404  }
405  }
406  }
407  else /* X and Y are not distributed along the same direction */
408  {
409  lcm = ilcm_( &nprow, &npcol );
410  if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
411  { /* X is distributed over a process column */
412  lcmp = lcm / nprow;
413  nz = (*jy-1) % desc_Y[NB_];
414  nn = *n + nz;
415  tmp1 = nn / desc_Y[MB_];
416  np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
417  np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
418  tmp1 = np0 / desc_X[MB_];
419  wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp );
420  wksz = np + wksz;
421
422  buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) );
423
424  if( mycol == iycol )
425  jjy -= nz;
426  if( myrow == ixrow )
427  np -= nz;
428  pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
429  &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
430  &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol,
431  &ixrow, &ixcol, buff+np );
432  if( mycol == ixcol )
433  {
434  crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]],
435  incx, buff, &ione, c, s );
436  }
437  pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
438  &desc_Y[NB_], &nz, buff, &ione, &zero,
439  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
440  &ixrow, &ixcol, &iyrow, &iycol, buff+np );
441  }
442  else /* Y is distributed over a process column */
443  {
444  lcmp = lcm / nprow;
445  nz = (*jx-1) % desc_X[NB_];
446  nn = *n + nz;
447  tmp1 = nn / desc_X[MB_];
448  np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow );
449  np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
450  tmp1 = np0 / desc_Y[MB_];
451  wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
452  wksz = np + wksz;
453
454  buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) );
455
456  if( myrow == iyrow )
457  np -= nz;
458  pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
459  &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
460  &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
461  &iyrow, &iycol, buff+np );
462  if( mycol == iycol )
463  {
464  crot_( &np, buff, &ione,
465  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
466  }
467  pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
468  &desc_X[NB_], &nz, buff, &ione, &zero,
469  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
470  &iyrow, &iycol, &ixrow, &ixcol, buff+np );
471  }
472  }
473 }
M_
#define M_
Definition: PBtools.h:39
complex::im
float im
Definition: pblas.h:92
getpbbuf
char * getpbbuf(char *mess, int length)
Definition: getpbbuf.c:3
MB_
#define MB_
Definition: PBtools.h:43
NB_
#define NB_
Definition: PBtools.h:44
crot_
#define crot_
Definition: pblas.h:474
cgesd2d_
F_VOID_FUNC cgesd2d_(int *ConTxt, int *m, int *n, float *A, int *lda, int *rdest, int *cdest)
Definition: cgesd2d_.c:7
LLD_
#define LLD_
Definition: PBtools.h:47
ZERO
#define ZERO
Definition: PBtools.h:66
blacs_gridinfo_
F_VOID_FUNC blacs_gridinfo_(int *ConTxt, int *nprow, int *npcol, int *myrow, int *mycol)
Definition: blacs_info_.c:6
pbchkvect
void pbchkvect(int n, int npos0, int ix, int jx, desc_X, int incx, int dpos0, int *iix, int *jjx, int *ixrow, int *ixcol, int nprow, int npcol, int myrow, int mycol, int *info)
Definition: pbchkvect.c:15
PB_Cabort
void PB_Cabort()
pcrot_
void pcrot_(int *n, X, int *ix, int *jx, desc_X, int *incx, Y, int *iy, int *jy, desc_Y, int *incy, float *c, complex *s)
Definition: pcrot.c:13
pblas.h
F_INTG_FCT
#define F_INTG_FCT
Definition: pblas.h:124
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
MYROC0
#define MYROC0(nblocks, n, nb, nprocs)
Definition: pblas.h:191
complex::re
float re
Definition: pblas.h:92
CTXT_
#define CTXT_
Definition: PBtools.h:38
cgerv2d_
F_VOID_FUNC cgerv2d_(int *ConTxt, int *m, int *n, float *A, int *lda, int *rsrc, int *csrc)
Definition: cgerv2d_.c:6
complex
Definition: pcgemr.c:160