ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslarzt.f
Go to the documentation of this file.
1  SUBROUTINE pslarzt( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU,
2  $ T, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER DIRECT, STOREV
11  INTEGER IV, JV, K, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCV( * )
15  REAL TAU( * ), T( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PSLARZT forms the triangular factor T of a real block reflector
22 * H of order > n, which is defined as a product of k elementary
23 * reflectors as returned by PSTZRZF.
24 *
25 * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
26 *
27 * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
28 *
29 * If STOREV = 'C', the vector which defines the elementary reflector
30 * H(i) is stored in the i-th column of the array V, and
31 *
32 * H = I - V * T * V'
33 *
34 * If STOREV = 'R', the vector which defines the elementary reflector
35 * H(i) is stored in the i-th row of the array V, and
36 *
37 * H = I - V' * T * V
38 *
39 * Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
40 *
41 * Notes
42 * =====
43 *
44 * Each global data object is described by an associated description
45 * vector. This vector stores the information required to establish
46 * the mapping between an object element and its corresponding process
47 * and memory location.
48 *
49 * Let A be a generic term for any 2D block cyclicly distributed array.
50 * Such a global array has an associated description vector DESCA.
51 * In the following comments, the character _ should be read as
52 * "of the global array".
53 *
54 * NOTATION STORED IN EXPLANATION
55 * --------------- -------------- --------------------------------------
56 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
57 * DTYPE_A = 1.
58 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
59 * the BLACS process grid A is distribu-
60 * ted over. The context itself is glo-
61 * bal, but the handle (the integer
62 * value) may vary.
63 * M_A (global) DESCA( M_ ) The number of rows in the global
64 * array A.
65 * N_A (global) DESCA( N_ ) The number of columns in the global
66 * array A.
67 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
68 * the rows of the array.
69 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
70 * the columns of the array.
71 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
72 * row of the array A is distributed.
73 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
74 * first column of the array A is
75 * distributed.
76 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
77 * array. LLD_A >= MAX(1,LOCr(M_A)).
78 *
79 * Let K be the number of rows or columns of a distributed matrix,
80 * and assume that its process grid has dimension p x q.
81 * LOCr( K ) denotes the number of elements of K that a process
82 * would receive if K were distributed over the p processes of its
83 * process column.
84 * Similarly, LOCc( K ) denotes the number of elements of K that a
85 * process would receive if K were distributed over the q processes of
86 * its process row.
87 * The values of LOCr() and LOCc() may be determined via a call to the
88 * ScaLAPACK tool function, NUMROC:
89 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
90 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
91 * An upper bound for these quantities may be computed by:
92 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
93 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
94 *
95 * Arguments
96 * =========
97 *
98 * DIRECT (global input) CHARACTER
99 * Specifies the order in which the elementary reflectors are
100 * multiplied to form the block reflector:
101 * = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
102 * = 'B': H = H(k) . . . H(2) H(1) (Backward)
103 *
104 * STOREV (global input) CHARACTER
105 * Specifies how the vectors which define the elementary
106 * reflectors are stored (see also Further Details):
107 * = 'C': columnwise (not supported yet)
108 * = 'R': rowwise
109 *
110 * N (global input) INTEGER
111 * The number of meaningful entries of the block reflector H.
112 * N >= 0.
113 *
114 * K (global input) INTEGER
115 * The order of the triangular factor T (= the number of
116 * elementary reflectors). 1 <= K <= MB_V (= NB_V).
117 *
118 * V (input/output) REAL pointer into the local memory
119 * to an array of local dimension (LOCr(IV+K-1),LOCc(JV+N-1)).
120 * The distributed matrix V contains the Householder vectors.
121 * See further details.
122 *
123 * IV (global input) INTEGER
124 * The row index in the global array V indicating the first
125 * row of sub( V ).
126 *
127 * JV (global input) INTEGER
128 * The column index in the global array V indicating the
129 * first column of sub( V ).
130 *
131 * DESCV (global and local input) INTEGER array of dimension DLEN_.
132 * The array descriptor for the distributed matrix V.
133 *
134 * TAU (local input) REAL, array, dimension LOCr(IV+K-1)
135 * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array
136 * contains the Householder scalars related to the Householder
137 * vectors. TAU is tied to the distributed matrix V.
138 *
139 * T (local output) REAL array, dimension (MB_V,MB_V)
140 * It contains the k-by-k triangular factor of the block
141 * reflector associated with V. T is lower triangular.
142 *
143 * WORK (local workspace) REAL array,
144 * dimension (K*(K-1)/2)
145 *
146 * Further Details
147 * ===============
148 *
149 * The shape of the matrix V and the storage of the vectors which define
150 * the H(i) is best illustrated by the following example with n = 5 and
151 * k = 3. The elements equal to 1 are not stored; the corresponding
152 * array elements are modified but restored on exit. The rest of the
153 * array is not used.
154 *
155 * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
156 *
157 * ______V_____
158 * ( v1 v2 v3 ) / \
159 * ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 )
160 * V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 )
161 * ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 )
162 * ( v1 v2 v3 )
163 * . . .
164 * . . .
165 * 1 . .
166 * 1 .
167 * 1
168 *
169 * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
170 *
171 * ______V_____
172 * 1 / \
173 * . 1 ( 1 . . . . v1 v1 v1 v1 v1 )
174 * . . 1 ( . 1 . . . v2 v2 v2 v2 v2 )
175 * . . . ( . . 1 . . v3 v3 v3 v3 v3 )
176 * . . .
177 * ( v1 v2 v3 )
178 * ( v1 v2 v3 )
179 * V = ( v1 v2 v3 )
180 * ( v1 v2 v3 )
181 * ( v1 v2 v3 )
182 *
183 * =====================================================================
184 *
185 * .. Parameters ..
186  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
187  $ lld_, mb_, m_, nb_, n_, rsrc_
188  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
189  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
190  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
191  REAL ZERO
192  parameter( zero = 0.0e+0 )
193 * ..
194 * .. Local Scalars ..
195  INTEGER ICOFF, ICTXT, II, IIV, INFO, IVCOL, IVROW,
196  $ itmp0, itmp1, iw, jjv, ldv, mycol, myrow,
197  $ npcol, nprow, nq
198 * ..
199 * .. External Subroutines ..
200  EXTERNAL blacs_abort, blacs_gridinfo, infog2l, pxerbla,
201  $ scopy, sgemv, sgsum2d, slaset,
202  $ strmv
203 * ..
204 * .. External Functions ..
205  LOGICAL LSAME
206  INTEGER NUMROC
207  EXTERNAL lsame, numroc
208 * ..
209 * .. Intrinsic Functions ..
210  INTRINSIC mod
211 * ..
212 * .. Executable Statements ..
213 *
214 * Get grid parameters
215 *
216  ictxt = descv( ctxt_ )
217  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
218 *
219 * Check for currently supported options
220 *
221  info = 0
222  IF( .NOT.lsame( direct, 'B' ) ) THEN
223  info = -1
224  ELSE IF( .NOT.lsame( storev, 'R' ) ) THEN
225  info = -2
226  END IF
227  IF( info.NE.0 ) THEN
228  CALL pxerbla( ictxt, 'PSLARZT', -info )
229  CALL blacs_abort( ictxt, 1 )
230  RETURN
231  END IF
232 *
233  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol,
234  $ iiv, jjv, ivrow, ivcol )
235 *
236  IF( myrow.EQ.ivrow ) THEN
237  iw = 1
238  itmp0 = 0
239  ldv = descv( lld_ )
240  icoff = mod( jv-1, descv( nb_ ) )
241  nq = numroc( n+icoff, descv( nb_ ), mycol, ivcol, npcol )
242  IF( mycol.EQ.ivcol )
243  $ nq = nq - icoff
244 *
245  DO 10 ii = iiv+k-2, iiv, -1
246 *
247 * T(i+1:k,i) = -tau( iv+i-1 ) *
248 * V(iv+i:iv+k-1,jv:jv+n-1) * V(iv+i-1,jv:jv+n-1)'
249 *
250  itmp0 = itmp0 + 1
251  IF( nq.GT.0 ) THEN
252  CALL sgemv( 'No transpose', itmp0, nq, -tau( ii ),
253  $ v( ii+1+(jjv-1)*ldv ), ldv,
254  $ v( ii+(jjv-1)*ldv ), ldv, zero, work( iw ),
255  $ 1 )
256  ELSE
257  CALL slaset( 'All', itmp0, 1, zero, zero, work( iw ),
258  $ itmp0 )
259  END IF
260  iw = iw + itmp0
261 *
262  10 CONTINUE
263 *
264  CALL sgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
265  $ myrow, ivcol )
266 *
267  IF( mycol.EQ.ivcol ) THEN
268 *
269  iw = 1
270  itmp0 = 0
271  itmp1 = k + 1 + (k-1) * descv( mb_ )
272 *
273  t( itmp1-1 ) = tau( iiv+k-1 )
274 *
275  DO 20 ii = iiv+k-2, iiv, -1
276 *
277 * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
278 *
279  itmp0 = itmp0 + 1
280  itmp1 = itmp1 - descv( mb_ ) - 1
281  CALL scopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
282  iw = iw + itmp0
283 *
284  CALL strmv( 'Lower', 'No transpose', 'Non-unit', itmp0,
285  $ t( itmp1+descv( mb_ ) ), descv( mb_ ),
286  $ t( itmp1 ), 1 )
287  t( itmp1-1 ) = tau( ii )
288 *
289  20 CONTINUE
290 *
291  END IF
292 *
293  END IF
294 *
295  RETURN
296 *
297 * End of PSLARZT
298 *
299  END
pslarzt
subroutine pslarzt(DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, T, WORK)
Definition: pslarzt.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2