ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pdlahrd.f
Go to the documentation of this file.
1  SUBROUTINE pdlahrd( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,
2  \$ DESCY, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IY, JA, JY, K, N, NB
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCY( * )
14  DOUBLE PRECISION A( * ), T( * ), TAU( * ), WORK( * ), Y( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDLAHRD reduces the first NB columns of a real general N-by-(N-K+1)
21 * distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that elements below the
22 * k-th subdiagonal are zero. The reduction is performed by an orthogo-
23 * nal similarity transformation Q' * A * Q. The routine returns the
24 * matrices V and T which determine Q as a block reflector I - V*T*V',
25 * and also the matrix Y = A * V * T.
26 *
27 * This is an auxiliary routine called by PDGEHRD. In the following
28 * comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
29 *
30 * Arguments
31 * =========
32 *
33 * N (global input) INTEGER
34 * The number of rows and columns to be operated on, i.e. the
35 * order of the distributed submatrix sub( A ).
36 * N >= 0.
37 *
38 * K (global input) INTEGER
39 * The offset for the reduction. Elements below the k-th
40 * subdiagonal in the first NB columns are reduced to zero.
41 *
42 * NB (global input) INTEGER
43 * The number of columns to be reduced.
44 *
45 * A (local input/local output) DOUBLE PRECISION pointer into
46 * the local memory to an array of dimension (LLD_A,
47 * LOCc(JA+N-K)). On entry, this array contains the the local
48 * pieces of the N-by-(N-K+1) general distributed matrix
49 * A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above
50 * the k-th subdiagonal in the first NB columns are overwritten
51 * with the corresponding elements of the reduced distributed
52 * matrix; the elements below the k-th subdiagonal, with the
53 * array TAU, represent the matrix Q as a product of elementary
54 * reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are
55 * unchanged. See Further Details.
56 *
57 * IA (global input) INTEGER
58 * The row index in the global array A indicating the first
59 * row of sub( A ).
60 *
61 * JA (global input) INTEGER
62 * The column index in the global array A indicating the
63 * first column of sub( A ).
64 *
65 * DESCA (global and local input) INTEGER array of dimension DLEN_.
66 * The array descriptor for the distributed matrix A.
67 *
68 * TAU (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-2)
69 * The scalar factors of the elementary reflectors (see Further
70 * Details). TAU is tied to the distributed matrix A.
71 *
72 * T (local output) DOUBLE PRECISION array, dimension (NB_A,NB_A)
73 * The upper triangular matrix T.
74 *
75 * Y (local output) DOUBLE PRECISION pointer into the local memory
76 * to an array of dimension (LLD_Y,NB_A). On exit, this array
77 * contains the local pieces of the N-by-NB distributed
78 * matrix Y. LLD_Y >= LOCr(IA+N-1).
79 *
80 * IY (global input) INTEGER
81 * The row index in the global array Y indicating the first
82 * row of sub( Y ).
83 *
84 * JY (global input) INTEGER
85 * The column index in the global array Y indicating the
86 * first column of sub( Y ).
87 *
88 * DESCY (global and local input) INTEGER array of dimension DLEN_.
89 * The array descriptor for the distributed matrix Y.
90 *
91 * WORK (local workspace) DOUBLE PRECISION array, dimension (NB)
92 *
93 * Further Details
94 * ===============
95 *
96 * The matrix Q is represented as a product of nb elementary reflectors
97 *
98 * Q = H(1) H(2) . . . H(nb).
99 *
100 * Each H(i) has the form
101 *
102 * H(i) = I - tau * v * v'
103 *
104 * where tau is a real scalar, and v is a real vector with
105 * v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
106 * A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
107 *
108 * The elements of the vectors v together form the (n-k+1)-by-nb matrix
109 * V which is needed, with T and Y, to apply the transformation to the
110 * unreduced part of the matrix, using an update of the form:
111 * A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').
112 *
113 * The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the
114 * following example with n = 7, k = 3 and nb = 2:
115 *
116 * ( a h a a a )
117 * ( a h a a a )
118 * ( a h a a a )
119 * ( h h a a a )
120 * ( v1 h a a a )
121 * ( v1 v2 a a a )
122 * ( v1 v2 a a a )
123 *
124 * where a denotes an element of the original matrix
125 * A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper
126 * Hessenberg matrix H, and vi denotes an element of the vector
127 * defining H(i).
128 *
129 * =====================================================================
130 *
131 * .. Parameters ..
132  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
133  \$ lld_, mb_, m_, nb_, n_, rsrc_
134  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
135  \$ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
136  \$ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
137  DOUBLE PRECISION ONE, ZERO
138  parameter( one = 1.0d+0, zero = 0.0d+0 )
139 * ..
140 * .. Local Scalars ..
141  LOGICAL IPROC
142  INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,
143  \$ jt, jw, l, myrow, mycol, npcol, nprow, nq
144  DOUBLE PRECISION EI
145 * ..
146 * .. Local Arrays ..
147  INTEGER DESCW( DLEN_ )
148 * ..
149 * .. External Functions ..
150  INTEGER NUMROC
151  EXTERNAL numroc
152 * ..
153 * .. External Subroutines ..
154  EXTERNAL blacs_gridinfo, daxpy, descset, dcopy,
155  \$ dscal, dtrmv, infog2l, pdelset,
156  \$ pdgemv, pdlarfg, pdscal
157 * ..
158 * .. Intrinsic Functions ..
159  INTRINSIC min, mod
160 * ..
161 * .. Executable Statements ..
162 *
163 * Quick return if possible
164 *
165  IF( n.LE.1 )
166  \$ RETURN
167 *
168  ictxt = desca( ctxt_ )
169  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
170 *
171  ioff = mod( ja-1, desca( nb_ ) )
172  CALL infog2l( ia+k, ja, desca, nprow, npcol, myrow, mycol, ii,
173  \$ jj, iarow, iacol )
174 *
175  iproc = ( myrow.EQ.iarow .AND. mycol.EQ.iacol )
176  nq = numroc( n+ja-1, desca( nb_ ), mycol, iacol, npcol )
177  IF( mycol.EQ.iacol )
178  \$ nq = nq - ioff
179 *
180  ei = zero
181
182  jw = ioff + 1
183  CALL descset( descw, 1, desca( mb_ ), 1, desca( mb_ ), iarow,
184  \$ iacol, ictxt, 1 )
185 *
186  DO 10 l = 1, nb
187  i = ia + k + l - 2
188  j = ja + l - 1
189 *
190  IF( l.GT.1 ) THEN
191 *
192 * Update A(ia:ia+n-1,j)
193 *
194 * Compute i-th column of A - Y * V'
195 *
196  CALL pdgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
197  \$ a, i, ja, desca, desca( m_ ), one, a, ia, j,
198  \$ desca, 1 )
199 *
200 * Apply I - V * T' * V' to this column (call it b) from the
201 * left, using the last column of T as workspace
202 *
203 * Let V = ( V1 ) and b = ( b1 ) (first I-1 rows)
204 * ( V2 ) ( b2 )
205 *
206 * where V1 is unit lower triangular
207 *
208 * w := V1' * b1
209 *
210  IF( iproc ) THEN
211  CALL dcopy( l-1, a( (jj+l-2)*desca( lld_ )+ii ), 1,
212  \$ work( jw ), 1 )
213  CALL dtrmv( 'Lower', 'Transpose', 'Unit', l-1,
214  \$ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
215  \$ work( jw ), 1 )
216  END IF
217 *
218 * w := w + V2'*b2
219 *
220  CALL pdgemv( 'Transpose', n-k-l+1, l-1, one, a, i+1, ja,
221  \$ desca, a, i+1, j, desca, 1, one, work, 1, jw,
222  \$ descw, descw( m_ ) )
223 *
224 * w := T'*w
225 *
226  IF( iproc )
227  \$ CALL dtrmv( 'Upper', 'Transpose', 'Non-unit', l-1, t,
228  \$ desca( nb_ ), work( jw ), 1 )
229 *
230 * b2 := b2 - V2*w
231 *
232  CALL pdgemv( 'No transpose', n-k-l+1, l-1, -one, a, i+1, ja,
233  \$ desca, work, 1, jw, descw, descw( m_ ), one,
234  \$ a, i+1, j, desca, 1 )
235 *
236 * b1 := b1 - V1*w
237 *
238  IF( iproc ) THEN
239  CALL dtrmv( 'Lower', 'No transpose', 'Unit', l-1,
240  \$ a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),
241  \$ work( jw ), 1 )
242  CALL daxpy( l-1, -one, work( jw ), 1,
243  \$ a( ( jj+l-2 )*desca( lld_ )+ii ), 1 )
244  END IF
245  CALL pdelset( a, i, j-1, desca, ei )
246  END IF
247 *
248 * Generate the elementary reflector H(i) to annihilate
249 * A(ia+k+i:ia+n-1,j)
250 *
251  CALL pdlarfg( n-k-l+1, ei, i+1, j, a, min( i+2, n+ia-1 ), j,
252  \$ desca, 1, tau )
253  CALL pdelset( a, i+1, j, desca, one )
254 *
255 * Compute Y(iy:y+n-1,jy+l-1)
256 *
257  CALL pdgemv( 'No transpose', n, n-k-l+1, one, a, ia, j+1,
258  \$ desca, a, i+1, j, desca, 1, zero, y, iy, jy+l-1,
259  \$ descy, 1 )
260  CALL pdgemv( 'Transpose', n-k-l+1, l-1, one, a, i+1, ja, desca,
261  \$ a, i+1, j, desca, 1, zero, work, 1, jw, descw,
262  \$ descw( m_ ) )
263  CALL pdgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
264  \$ work, 1, jw, descw, descw( m_ ), one, y, iy,
265  \$ jy+l-1, descy, 1 )
266  jl = min( jj+l-1, ja+nq-1 )
267  CALL pdscal( n, tau( jl ), y, iy, jy+l-1, descy, 1 )
268 *
269 * Compute T(1:i,i)
270 *
271  IF( iproc ) THEN
272  jt = ( l-1 ) * desca( nb_ )
273  CALL dscal( l-1, -tau( jl ), work( jw ), 1 )
274  CALL dcopy( l-1, work( jw ), 1, t( jt+1 ), 1 )
275  CALL dtrmv( 'Upper', 'No transpose', 'Non-unit', l-1, t,
276  \$ desca( nb_ ), t( jt+1 ), 1 )
277  t( jt+l ) = tau( jl )
278  END IF
279  10 CONTINUE
280 *
281  CALL pdelset( a, k+nb+ia-1, j, desca, ei )
282 *
283  RETURN
284 *
285 * End of PDLAHRD
286 *
287  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pdlahrd
subroutine pdlahrd(N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, DESCY, WORK)
Definition: pdlahrd.f:3
pdlarfg
subroutine pdlarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pdlarfg.f:3
pdelset
subroutine pdelset(A, IA, JA, DESCA, ALPHA)
Definition: pdelset.f:2
min
#define min(A, B)
Definition: pcgemr.c:181