ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pclahrd.f
Go to the documentation of this file.
1  SUBROUTINE pclahrd( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,
2  \$ DESCY, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IY, JA, JY, K, N, NB
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCY( * )
14  COMPLEX A( * ), T( * ), TAU( * ), WORK( * ), Y( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PCLAHRD reduces the first NB columns of a complex general
21 * N-by-(N-K+1) distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that
22 * elements below the k-th subdiagonal are zero. The reduction is
23 * performed by an unitary similarity transformation Q' * A * Q. The
24 * routine returns the matrices V and T which determine Q as a block
25 * reflector I - V*T*V', and also the matrix Y = A * V * T.
26 *
27 * This is an auxiliary routine called by PCGEHRD. In the following
28 * comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
29 *
30 * Arguments
31 * =========
32 *
33 * N (global input) INTEGER
34 * The number of rows and columns to be operated on, i.e. the
35 * order of the distributed submatrix sub( A ).
36 * N >= 0.
37 *
38 * K (global input) INTEGER
39 * The offset for the reduction. Elements below the k-th
40 * subdiagonal in the first NB columns are reduced to zero.
41 *
42 * NB (global input) INTEGER
43 * The number of columns to be reduced.
44 *
45 * A (local input/local output) COMPLEX pointer into
46 * the local memory to an array of dimension (LLD_A,
47 * LOCc(JA+N-K)). On entry, this array contains the the local
48 * pieces of the N-by-(N-K+1) general distributed matrix
49 * A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above
50 * the k-th subdiagonal in the first NB columns are overwritten
51 * with the corresponding elements of the reduced distributed
52 * matrix; the elements below the k-th subdiagonal, with the
53 * array TAU, represent the matrix Q as a product of elementary
54 * reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are
55 * unchanged. See Further Details.
56 *
57 * IA (global input) INTEGER
58 * The row index in the global array A indicating the first
59 * row of sub( A ).
60 *
61 * JA (global input) INTEGER
62 * The column index in the global array A indicating the
63 * first column of sub( A ).
64 *
65 * DESCA (global and local input) INTEGER array of dimension DLEN_.
66 * The array descriptor for the distributed matrix A.
67 *
68 * TAU (local output) COMPLEX array, dimension LOCc(JA+N-2)
69 * The scalar factors of the elementary reflectors (see Further
70 * Details). TAU is tied to the distributed matrix A.
71 *
72 * T (local output) COMPLEX array, dimension (NB_A,NB_A)
73 * The upper triangular matrix T.
74 *
75 * Y (local output) COMPLEX pointer into the local memory
76 * to an array of dimension (LLD_Y,NB_A). On exit, this array
77 * contains the local pieces of the N-by-NB distributed
78 * matrix Y. LLD_Y >= LOCr(IA+N-1).
79 *
80 * IY (global input) INTEGER
81 * The row index in the global array Y indicating the first
82 * row of sub( Y ).
83 *
84 * JY (global input) INTEGER
85 * The column index in the global array Y indicating the
86 * first column of sub( Y ).
87 *
88 * DESCY (global and local input) INTEGER array of dimension DLEN_.
89 * The array descriptor for the distributed matrix Y.
90 *
91 * WORK (local workspace) COMPLEX array, dimension (NB)
92 *
93 * Further Details
94 * ===============
95 *
96 * The matrix Q is represented as a product of nb elementary reflectors
97 *
98 * Q = H(1) H(2) . . . H(nb).
99 *
100 * Each H(i) has the form
101 *
102 * H(i) = I - tau * v * v'
103 *
104 * where tau is a complex scalar, and v is a complex vector with
105 * v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
106 * A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
107 *
108 * The elements of the vectors v together form the (n-k+1)-by-nb matrix
109 * V which is needed, with T and Y, to apply the transformation to the
110 * unreduced part of the matrix, using an update of the form:
111 * A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').
112 *
113 * The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the
114 * following example with n = 7, k = 3 and nb = 2:
115 *
116 * ( a h a a a )
117 * ( a h a a a )
118 * ( a h a a a )
119 * ( h h a a a )
120 * ( v1 h a a a )
121 * ( v1 v2 a a a )
122 * ( v1 v2 a a a )
123 *
124 * where a denotes an element of the original matrix
125 * A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper
126 * Hessenberg matrix H, and vi denotes an element of the vector
127 * defining H(i).
128 *
129 * =====================================================================
130 *
131 * .. Parameters ..
132  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
133  \$ lld_, mb_, m_, nb_, n_, rsrc_
134  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
135  \$ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
136  \$ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
137  COMPLEX ONE, ZERO
138  parameter( one = ( 1.0e+0, 0.0e+0 ),
139  \$ zero = ( 0.0e+0, 0.0e+0 ) )
140 * ..
141 * .. Local Scalars ..
142  LOGICAL IPROC
143  INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,
144  \$ jt, jw, l, myrow, mycol, npcol, nprow, nq
145  COMPLEX EI
146 * ..
147 * .. Local Arrays ..
148  INTEGER DESCW( DLEN_ )
149 * ..
150 * .. External Functions ..
151  INTEGER NUMROC
152  EXTERNAL numroc
153 * ..
154 * .. External Subroutines ..
155  EXTERNAL blacs_gridinfo, caxpy, ccopy, cscal,
156  \$ ctrmv, descset, infog2l, pcelset,
157  \$ pcgemv, pclacgv, pclarfg, pcscal
158 * ..
159 * .. Intrinsic Functions ..
160  INTRINSIC min, mod
161 * ..
162 * .. Executable Statements ..
163 *
164 * Quick return if possible
165 *
166  IF( n.LE.1 )
167  \$ RETURN
168 *
169  ictxt = desca( ctxt_ )
170  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
171 *
172  ioff = mod( ja-1, desca( nb_ ) )
173  CALL infog2l( ia+k, ja, desca, nprow, npcol, myrow, mycol, ii,
174  \$ jj, iarow, iacol )
175 *
176  iproc = ( myrow.EQ.iarow .AND. mycol.EQ.iacol )
177  nq = numroc( n+ja-1, desca( nb_ ), mycol, iacol, npcol )
178  IF( mycol.EQ.iacol )
179  \$ nq = nq - ioff
180 *
181  ei = zero
182
183  jw = ioff + 1
184  CALL descset( descw, 1, desca( mb_ ), 1, desca( mb_ ), iarow,
185  \$ iacol, ictxt, 1 )
186 *
187  DO 10 l = 1, nb
188  i = ia + k + l - 2
189  j = ja + l - 1
190 *
191  IF( l.GT.1 ) THEN
192 *
193 * Update A(ia:ia+n-1,j)
194 *
195 * Compute i-th column of A - Y * V'
196 *
197  CALL pclacgv( l-1, a, i, ja, desca, desca( m_ ) )
198  CALL pcgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
199  \$ a, i, ja, desca, desca( m_ ), one, a, ia, j,
200  \$ desca, 1 )
201  CALL pclacgv( l-1, a, i, ja, desca, desca( m_ ) )
202 *
203 * Apply I - V * T' * V' to this column (call it b) from the
204 * left, using the last column of T as workspace
205 *
206 * Let V = ( V1 ) and b = ( b1 ) (first I-1 rows)
207 * ( V2 ) ( b2 )
208 *
209 * where V1 is unit lower triangular
210 *
211 * w := V1' * b1
212 *
213  IF( iproc ) THEN
214  CALL ccopy( l-1, a( (jj+l-2)*desca( lld_ )+ii ), 1,
215  \$ work( jw ), 1 )
216  CALL ctrmv( 'Lower', 'Conjugate transpose', 'Unit', l-1,
217  \$ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
218  \$ work( jw ), 1 )
219  END IF
220 *
221 * w := w + V2'*b2
222 *
223  CALL pcgemv( 'Conjugate transpose', n-k-l+1, l-1, one, a,
224  \$ i+1, ja, desca, a, i+1, j, desca, 1, one, work,
225  \$ 1, jw, descw, descw( m_ ) )
226 *
227 * w := T'*w
228 *
229  IF( iproc )
230  \$ CALL ctrmv( 'Upper', 'Conjugate transpose', 'Non-unit',
231  \$ l-1, t, desca( nb_ ), work( jw ), 1 )
232 *
233 * b2 := b2 - V2*w
234 *
235  CALL pcgemv( 'No transpose', n-k-l+1, l-1, -one, a, i+1, ja,
236  \$ desca, work, 1, jw, descw, descw( m_ ), one,
237  \$ a, i+1, j, desca, 1 )
238 *
239 * b1 := b1 - V1*w
240 *
241  IF( iproc ) THEN
242  CALL ctrmv( 'Lower', 'No transpose', 'Unit', l-1,
243  \$ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
244  \$ work( jw ), 1 )
245  CALL caxpy( l-1, -one, work( jw ), 1,
246  \$ a( ( jj+l-2 )*desca( lld_ )+ii ), 1 )
247  END IF
248  CALL pcelset( a, i, j-1, desca, ei )
249  END IF
250 *
251 * Generate the elementary reflector H(i) to annihilate
252 * A(ia+k+i:ia+n-1,j)
253 *
254  CALL pclarfg( n-k-l+1, ei, i+1, j, a, min( i+2, n+ia-1 ), j,
255  \$ desca, 1, tau )
256  CALL pcelset( a, i+1, j, desca, one )
257 *
258 * Compute Y(iy:y+n-1,jy+l-1)
259 *
260  CALL pcgemv( 'No transpose', n, n-k-l+1, one, a, ia, j+1,
261  \$ desca, a, i+1, j, desca, 1, zero, y, iy, jy+l-1,
262  \$ descy, 1 )
263  CALL pcgemv( 'Conjugate transpose', n-k-l+1, l-1, one, a, i+1,
264  \$ ja, desca, a, i+1, j, desca, 1, zero, work, 1, jw,
265  \$ descw, descw( m_ ) )
266  CALL pcgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
267  \$ work, 1, jw, descw, descw( m_ ), one, y, iy,
268  \$ jy+l-1, descy, 1 )
269  jl = min( jj+l-1, ja+nq-1 )
270  CALL pcscal( n, tau( jl ), y, iy, jy+l-1, descy, 1 )
271 *
272 * Compute T(1:i,i)
273 *
274  IF( iproc ) THEN
275  jt = ( l-1 ) * desca( nb_ )
276  CALL cscal( l-1, -tau( jl ), work( jw ), 1 )
277  CALL ccopy( l-1, work( jw ), 1, t( jt+1 ), 1 )
278  CALL ctrmv( 'Upper', 'No transpose', 'Non-unit', l-1, t,
279  \$ desca( nb_ ), t( jt+1 ), 1 )
280  t( jt+l ) = tau( jl )
281  END IF
282  10 CONTINUE
283 *
284  CALL pcelset( a, k+nb+ia-1, j, desca, ei )
285 *
286  RETURN
287 *
288 * End of PCLAHRD
289 *
290  END
pclarfg
subroutine pclarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pclarfg.f:3
pclahrd
subroutine pclahrd(N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, DESCY, WORK)
Definition: pclahrd.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pcelset
subroutine pcelset(A, IA, JA, DESCA, ALPHA)
Definition: pcelset.f:2
pclacgv
subroutine pclacgv(N, X, IX, JX, DESCX, INCX)
Definition: pclacgv.f:2
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
min
#define min(A, B)
Definition: pcgemr.c:181