ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pzlarfg.f
Go to the documentation of this file.
1  SUBROUTINE pzlarfg( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX,
2  \$ TAU )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IAX, INCX, IX, JAX, JX, N
11  COMPLEX*16 ALPHA
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCX( * )
15  COMPLEX*16 TAU( * ), X( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PZLARFG generates a complex elementary reflector H of order n, such
22 * that
23 *
24 * H * sub( X ) = H * ( x(iax,jax) ) = ( alpha ), H' * H = I.
25 * ( x ) ( 0 )
26 *
27 * where alpha is a real scalar, and sub( X ) is an (N-1)-element
28 * complex distributed vector X(IX:IX+N-2,JX) if INCX = 1 and
29 * X(IX,JX:JX+N-2) if INCX = DESCX(M_). H is represented in the form
30 *
31 * H = I - tau * ( 1 ) * ( 1 v' ) ,
32 * ( v )
33 *
34 * where tau is a complex scalar and v is a complex (N-1)-element
35 * vector. Note that H is not Hermitian.
36 *
37 * If the elements of sub( X ) are all zero and X(IAX,JAX) is real,
38 * then tau = 0 and H is taken to be the unit matrix.
39 *
40 * Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1.
41 *
42 * Notes
43 * =====
44 *
45 * Each global data object is described by an associated description
46 * vector. This vector stores the information required to establish
47 * the mapping between an object element and its corresponding process
48 * and memory location.
49 *
50 * Let A be a generic term for any 2D block cyclicly distributed array.
51 * Such a global array has an associated description vector DESCA.
52 * In the following comments, the character _ should be read as
53 * "of the global array".
54 *
55 * NOTATION STORED IN EXPLANATION
56 * --------------- -------------- --------------------------------------
57 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
58 * DTYPE_A = 1.
59 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
60 * the BLACS process grid A is distribu-
61 * ted over. The context itself is glo-
62 * bal, but the handle (the integer
63 * value) may vary.
64 * M_A (global) DESCA( M_ ) The number of rows in the global
65 * array A.
66 * N_A (global) DESCA( N_ ) The number of columns in the global
67 * array A.
68 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
69 * the rows of the array.
70 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
71 * the columns of the array.
72 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
73 * row of the array A is distributed.
74 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
75 * first column of the array A is
76 * distributed.
77 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
78 * array. LLD_A >= MAX(1,LOCr(M_A)).
79 *
80 * Let K be the number of rows or columns of a distributed matrix,
81 * and assume that its process grid has dimension p x q.
82 * LOCr( K ) denotes the number of elements of K that a process
83 * would receive if K were distributed over the p processes of its
84 * process column.
85 * Similarly, LOCc( K ) denotes the number of elements of K that a
86 * process would receive if K were distributed over the q processes of
87 * its process row.
88 * The values of LOCr() and LOCc() may be determined via a call to the
89 * ScaLAPACK tool function, NUMROC:
90 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
91 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
92 * An upper bound for these quantities may be computed by:
93 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
94 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
95 *
96 * Because vectors may be viewed as a subclass of matrices, a
97 * distributed vector is considered to be a distributed matrix.
98 *
99 * Arguments
100 * =========
101 *
102 * N (global input) INTEGER
103 * The global order of the elementary reflector. N >= 0.
104 *
105 * ALPHA (local output) COMPLEX*16
106 * On exit, alpha is computed in the process scope having the
107 * vector sub( X ).
108 *
109 * IAX (global input) INTEGER
110 * The global row index in X of X(IAX,JAX).
111 *
112 * JAX (global input) INTEGER
113 * The global column index in X of X(IAX,JAX).
114 *
115 * X (local input/local output) COMPLEX*16, pointer into the
116 * local memory to an array of dimension (LLD_X,*). This array
117 * contains the local pieces of the distributed vector sub( X ).
118 * Before entry, the incremented array sub( X ) must contain
119 * the vector x. On exit, it is overwritten with the vector v.
120 *
121 * IX (global input) INTEGER
122 * The row index in the global array X indicating the first
123 * row of sub( X ).
124 *
125 * JX (global input) INTEGER
126 * The column index in the global array X indicating the
127 * first column of sub( X ).
128 *
129 * DESCX (global and local input) INTEGER array of dimension DLEN_.
130 * The array descriptor for the distributed matrix X.
131 *
132 * INCX (global input) INTEGER
133 * The global increment for the elements of X. Only two values
134 * of INCX are supported in this version, namely 1 and M_X.
135 * INCX must not be zero.
136 *
137 * TAU (local output) COMPLEX*16, array, dimension LOCc(JX)
138 * if INCX = 1, and LOCr(IX) otherwise. This array contains the
139 * Householder scalars related to the Householder vectors.
140 * TAU is tied to the distributed matrix X.
141 *
142 * =====================================================================
143 *
144 * .. Parameters ..
145  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
146  \$ lld_, mb_, m_, nb_, n_, rsrc_
147  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
148  \$ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
149  \$ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
150  DOUBLE PRECISION ONE, ZERO
151  parameter( one = 1.0d+0, zero = 0.0d+0 )
152 * ..
153 * .. Local Scalars ..
154  INTEGER ICTXT, IIAX, INDXTAU, IXCOL, IXROW, J, JJAX,
155  \$ knt, mycol, myrow, npcol, nprow
156  DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM
157 * ..
158 * .. External Subroutines ..
159  EXTERNAL blacs_gridinfo, infog2l, pdznrm2,
160  \$ zgebr2d, zgebs2d, pzscal,
161  \$ pzdscal
162 * ..
163 * .. External Functions ..
164  DOUBLE PRECISION DLAMCH, DLAPY3
167 * ..
168 * .. Intrinsic Functions ..
169  INTRINSIC abs, dble, dcmplx, dimag, sign
170 * ..
171 * .. Executable Statements ..
172 *
173 * Get grid parameters.
174 *
175  ictxt = descx( ctxt_ )
176  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
177 *
178  IF( incx.EQ.descx( m_ ) ) THEN
179 *
180 * sub( X ) is distributed across a process row.
181 *
182  CALL infog2l( ix, jax, descx, nprow, npcol, myrow, mycol,
183  \$ iiax, jjax, ixrow, ixcol )
184 *
185  IF( myrow.NE.ixrow )
186  \$ RETURN
187 *
188 * Broadcast X(IAX,JAX) across the process row.
189 *
190  IF( mycol.EQ.ixcol ) THEN
191  j = iiax+(jjax-1)*descx( lld_ )
192  CALL zgebs2d( ictxt, 'Rowwise', ' ', 1, 1, x( j ), 1 )
193  alpha = x( j )
194  ELSE
195  CALL zgebr2d( ictxt, 'Rowwise', ' ', 1, 1, alpha, 1,
196  \$ myrow, ixcol )
197  END IF
198 *
199  indxtau = iiax
200 *
201  ELSE
202 *
203 * sub( X ) is distributed across a process column.
204 *
205  CALL infog2l( iax, jx, descx, nprow, npcol, myrow, mycol,
206  \$ iiax, jjax, ixrow, ixcol )
207 *
208  IF( mycol.NE.ixcol )
209  \$ RETURN
210 *
211 * Broadcast X(IAX,JAX) across the process column.
212 *
213  IF( myrow.EQ.ixrow ) THEN
214  j = iiax+(jjax-1)*descx( lld_ )
215  CALL zgebs2d( ictxt, 'Columnwise', ' ', 1, 1, x( j ), 1 )
216  alpha = x( j )
217  ELSE
218  CALL zgebr2d( ictxt, 'Columnwise', ' ', 1, 1, alpha, 1,
219  \$ ixrow, mycol )
220  END IF
221 *
222  indxtau = jjax
223 *
224  END IF
225 *
226  IF( n.LE.0 ) THEN
227  tau( indxtau ) = zero
228  RETURN
229  END IF
230 *
231  CALL pdznrm2( n-1, xnorm, x, ix, jx, descx, incx )
232  alphr = dble( alpha )
233  alphi = dimag( alpha )
234 *
235  IF( xnorm.EQ.zero .AND. alphi.EQ.zero ) THEN
236 *
237 * H = I
238 *
239  tau( indxtau ) = zero
240 *
241  ELSE
242 *
243 * General case
244 *
245  beta = -sign( dlapy3( alphr, alphi, xnorm ), alphr )
246  safmin = dlamch( 'S' )
247  rsafmn = one / safmin
248  IF( abs( beta ).LT.safmin ) THEN
249 *
250 * XNORM, BETA may be inaccurate; scale X and recompute them
251 *
252  knt = 0
253  10 CONTINUE
254  knt = knt + 1
255  CALL pzdscal( n-1, rsafmn, x, ix, jx, descx, incx )
256  beta = beta*rsafmn
257  alphi = alphi*rsafmn
258  alphr = alphr*rsafmn
259  IF( abs( beta ).LT.safmin )
260  \$ GO TO 10
261 *
262 * New BETA is at most 1, at least SAFMIN
263 *
264  CALL pdznrm2( n-1, xnorm, x, ix, jx, descx, incx )
265  alpha = dcmplx( alphr, alphi )
266  beta = -sign( dlapy3( alphr, alphi, xnorm ), alphr )
267  tau( indxtau ) = dcmplx( ( beta-alphr ) / beta,
268  \$ -alphi / beta )
269  alpha = zladiv( dcmplx( one ), alpha-beta )
270  CALL pzscal( n-1, alpha, x, ix, jx, descx, incx )
271 *
272 * If ALPHA is subnormal, it may lose relative accuracy
273 *
274  alpha = beta
275  DO 20 j = 1, knt
276  alpha = alpha*safmin
277  20 CONTINUE
278  ELSE
279  tau( indxtau ) = dcmplx( ( beta-alphr ) / beta,
280  \$ -alphi / beta )
281  alpha = zladiv( dcmplx( one ), alpha-beta )
282  CALL pzscal( n-1, alpha, x, ix, jx, descx, incx )
283  alpha = beta
284  END IF
285  END IF
286 *
287  RETURN
288 *
289 * End of PZLARFG
290 *
291  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzlarfg
subroutine pzlarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pzlarfg.f:3