ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclarfg.f
Go to the documentation of this file.
1  SUBROUTINE pclarfg( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX,
2  $ TAU )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IAX, INCX, IX, JAX, JX, N
11  COMPLEX ALPHA
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCX( * )
15  COMPLEX TAU( * ), X( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PCLARFG generates a complex elementary reflector H of order n, such
22 * that
23 *
24 * H * sub( X ) = H * ( x(iax,jax) ) = ( alpha ), H' * H = I.
25 * ( x ) ( 0 )
26 *
27 * where alpha is a real scalar, and sub( X ) is an (N-1)-element
28 * complex distributed vector X(IX:IX+N-2,JX) if INCX = 1 and
29 * X(IX,JX:JX+N-2) if INCX = DESCX(M_). H is represented in the form
30 *
31 * H = I - tau * ( 1 ) * ( 1 v' ) ,
32 * ( v )
33 *
34 * where tau is a complex scalar and v is a complex (N-1)-element
35 * vector. Note that H is not Hermitian.
36 *
37 * If the elements of sub( X ) are all zero and X(IAX,JAX) is real,
38 * then tau = 0 and H is taken to be the unit matrix.
39 *
40 * Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1.
41 *
42 * Notes
43 * =====
44 *
45 * Each global data object is described by an associated description
46 * vector. This vector stores the information required to establish
47 * the mapping between an object element and its corresponding process
48 * and memory location.
49 *
50 * Let A be a generic term for any 2D block cyclicly distributed array.
51 * Such a global array has an associated description vector DESCA.
52 * In the following comments, the character _ should be read as
53 * "of the global array".
54 *
55 * NOTATION STORED IN EXPLANATION
56 * --------------- -------------- --------------------------------------
57 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
58 * DTYPE_A = 1.
59 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
60 * the BLACS process grid A is distribu-
61 * ted over. The context itself is glo-
62 * bal, but the handle (the integer
63 * value) may vary.
64 * M_A (global) DESCA( M_ ) The number of rows in the global
65 * array A.
66 * N_A (global) DESCA( N_ ) The number of columns in the global
67 * array A.
68 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
69 * the rows of the array.
70 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
71 * the columns of the array.
72 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
73 * row of the array A is distributed.
74 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
75 * first column of the array A is
76 * distributed.
77 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
78 * array. LLD_A >= MAX(1,LOCr(M_A)).
79 *
80 * Let K be the number of rows or columns of a distributed matrix,
81 * and assume that its process grid has dimension p x q.
82 * LOCr( K ) denotes the number of elements of K that a process
83 * would receive if K were distributed over the p processes of its
84 * process column.
85 * Similarly, LOCc( K ) denotes the number of elements of K that a
86 * process would receive if K were distributed over the q processes of
87 * its process row.
88 * The values of LOCr() and LOCc() may be determined via a call to the
89 * ScaLAPACK tool function, NUMROC:
90 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
91 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
92 * An upper bound for these quantities may be computed by:
93 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
94 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
95 *
96 * Because vectors may be viewed as a subclass of matrices, a
97 * distributed vector is considered to be a distributed matrix.
98 *
99 * Arguments
100 * =========
101 *
102 * N (global input) INTEGER
103 * The global order of the elementary reflector. N >= 0.
104 *
105 * ALPHA (local output) COMPLEX
106 * On exit, alpha is computed in the process scope having the
107 * vector sub( X ).
108 *
109 * IAX (global input) INTEGER
110 * The global row index in X of X(IAX,JAX).
111 *
112 * JAX (global input) INTEGER
113 * The global column index in X of X(IAX,JAX).
114 *
115 * X (local input/local output) COMPLEX, pointer into the
116 * local memory to an array of dimension (LLD_X,*). This array
117 * contains the local pieces of the distributed vector sub( X ).
118 * Before entry, the incremented array sub( X ) must contain
119 * the vector x. On exit, it is overwritten with the vector v.
120 *
121 * IX (global input) INTEGER
122 * The row index in the global array X indicating the first
123 * row of sub( X ).
124 *
125 * JX (global input) INTEGER
126 * The column index in the global array X indicating the
127 * first column of sub( X ).
128 *
129 * DESCX (global and local input) INTEGER array of dimension DLEN_.
130 * The array descriptor for the distributed matrix X.
131 *
132 * INCX (global input) INTEGER
133 * The global increment for the elements of X. Only two values
134 * of INCX are supported in this version, namely 1 and M_X.
135 * INCX must not be zero.
136 *
137 * TAU (local output) COMPLEX, array, dimension LOCc(JX)
138 * if INCX = 1, and LOCr(IX) otherwise. This array contains the
139 * Householder scalars related to the Householder vectors.
140 * TAU is tied to the distributed matrix X.
141 *
142 * =====================================================================
143 *
144 * .. Parameters ..
145  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
146  $ lld_, mb_, m_, nb_, n_, rsrc_
147  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
148  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
149  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
150  REAL ONE, ZERO
151  parameter( one = 1.0e+0, zero = 0.0e+0 )
152 * ..
153 * .. Local Scalars ..
154  INTEGER ICTXT, IIAX, INDXTAU, IXCOL, IXROW, J, JJAX,
155  $ knt, mycol, myrow, npcol, nprow
156  REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM
157 * ..
158 * .. External Subroutines ..
159  EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d, pcscal,
160  $ pcsscal, infog2l, pscnrm2
161 * ..
162 * .. External Functions ..
163  REAL SLAMCH, SLAPY3
164  COMPLEX CLADIV
165  EXTERNAL cladiv, slapy3, slamch
166 * ..
167 * .. Intrinsic Functions ..
168  INTRINSIC abs, aimag, cmplx, real, sign
169 * ..
170 * .. Executable Statements ..
171 *
172 * Get grid parameters.
173 *
174  ictxt = descx( ctxt_ )
175  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
176 *
177  IF( incx.EQ.descx( m_ ) ) THEN
178 *
179 * sub( X ) is distributed across a process row.
180 *
181  CALL infog2l( ix, jax, descx, nprow, npcol, myrow, mycol,
182  $ iiax, jjax, ixrow, ixcol )
183 *
184  IF( myrow.NE.ixrow )
185  $ RETURN
186 *
187 * Broadcast X(IAX,JAX) across the process row.
188 *
189  IF( mycol.EQ.ixcol ) THEN
190  j = iiax+(jjax-1)*descx( lld_ )
191  CALL cgebs2d( ictxt, 'Rowwise', ' ', 1, 1, x( j ), 1 )
192  alpha = x( j )
193  ELSE
194  CALL cgebr2d( ictxt, 'Rowwise', ' ', 1, 1, alpha, 1,
195  $ myrow, ixcol )
196  END IF
197 *
198  indxtau = iiax
199 *
200  ELSE
201 *
202 * sub( X ) is distributed across a process column.
203 *
204  CALL infog2l( iax, jx, descx, nprow, npcol, myrow, mycol,
205  $ iiax, jjax, ixrow, ixcol )
206 *
207  IF( mycol.NE.ixcol )
208  $ RETURN
209 *
210 * Broadcast X(IAX,JAX) across the process column.
211 *
212  IF( myrow.EQ.ixrow ) THEN
213  j = iiax+(jjax-1)*descx( lld_ )
214  CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 1, x( j ), 1 )
215  alpha = x( j )
216  ELSE
217  CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 1, alpha, 1,
218  $ ixrow, mycol )
219  END IF
220 *
221  indxtau = jjax
222 *
223  END IF
224 *
225  IF( n.LE.0 ) THEN
226  tau( indxtau ) = zero
227  RETURN
228  END IF
229 *
230  CALL pscnrm2( n-1, xnorm, x, ix, jx, descx, incx )
231  alphr = real( alpha )
232  alphi = aimag( alpha )
233 *
234  IF( xnorm.EQ.zero .AND. alphi.EQ.zero ) THEN
235 *
236 * H = I
237 *
238  tau( indxtau ) = zero
239 *
240  ELSE
241 *
242 * General case
243 *
244  beta = -sign( slapy3( alphr, alphi, xnorm ), alphr )
245  safmin = slamch( 'S' )
246  rsafmn = one / safmin
247  IF( abs( beta ).LT.safmin ) THEN
248 *
249 * XNORM, BETA may be inaccurate; scale X and recompute them
250 *
251  knt = 0
252  10 CONTINUE
253  knt = knt + 1
254  CALL pcsscal( n-1, rsafmn, x, ix, jx, descx, incx )
255  beta = beta*rsafmn
256  alphi = alphi*rsafmn
257  alphr = alphr*rsafmn
258  IF( abs( beta ).LT.safmin )
259  $ GO TO 10
260 *
261 * New BETA is at most 1, at least SAFMIN
262 *
263  CALL pscnrm2( n-1, xnorm, x, ix, jx, descx, incx )
264  alpha = cmplx( alphr, alphi )
265  beta = -sign( slapy3( alphr, alphi, xnorm ), alphr )
266  tau( indxtau ) = cmplx( ( beta-alphr ) / beta,
267  $ -alphi / beta )
268  alpha = cladiv( cmplx( one ), alpha-beta )
269  CALL pcscal( n-1, alpha, x, ix, jx, descx, incx )
270 *
271 * If ALPHA is subnormal, it may lose relative accuracy
272 *
273  alpha = beta
274  DO 20 j = 1, knt
275  alpha = alpha*safmin
276  20 CONTINUE
277  ELSE
278  tau( indxtau ) = cmplx( ( beta-alphr ) / beta,
279  $ -alphi / beta )
280  alpha = cladiv( cmplx( one ), alpha-beta )
281  CALL pcscal( n-1, alpha, x, ix, jx, descx, incx )
282  alpha = beta
283  END IF
284  END IF
285 *
286  RETURN
287 *
288 * End of PCLARFG
289 *
290  END
cmplx
float cmplx[2]
Definition: pblas.h:132
pclarfg
subroutine pclarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pclarfg.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3