ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclarft.f
Go to the documentation of this file.
1  SUBROUTINE pclarft( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU,
2  $ T, WORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER DIRECT, STOREV
11  INTEGER IV, JV, K, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCV( * )
15  COMPLEX TAU( * ), T( * ), V( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PCLARFT forms the triangular factor T of a complex block reflector H
22 * of order n, which is defined as a product of k elementary reflectors.
23 *
24 * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
25 *
26 * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
27 *
28 * If STOREV = 'C', the vector which defines the elementary reflector
29 * H(i) is stored in the i-th column of the distributed matrix V, and
30 *
31 * H = I - V * T * V'
32 *
33 * If STOREV = 'R', the vector which defines the elementary reflector
34 * H(i) is stored in the i-th row of the distributed matrix V, and
35 *
36 * H = I - V' * T * V
37 *
38 * Notes
39 * =====
40 *
41 * Each global data object is described by an associated description
42 * vector. This vector stores the information required to establish
43 * the mapping between an object element and its corresponding process
44 * and memory location.
45 *
46 * Let A be a generic term for any 2D block cyclicly distributed array.
47 * Such a global array has an associated description vector DESCA.
48 * In the following comments, the character _ should be read as
49 * "of the global array".
50 *
51 * NOTATION STORED IN EXPLANATION
52 * --------------- -------------- --------------------------------------
53 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
54 * DTYPE_A = 1.
55 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
56 * the BLACS process grid A is distribu-
57 * ted over. The context itself is glo-
58 * bal, but the handle (the integer
59 * value) may vary.
60 * M_A (global) DESCA( M_ ) The number of rows in the global
61 * array A.
62 * N_A (global) DESCA( N_ ) The number of columns in the global
63 * array A.
64 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
65 * the rows of the array.
66 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
67 * the columns of the array.
68 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
69 * row of the array A is distributed.
70 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
71 * first column of the array A is
72 * distributed.
73 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
74 * array. LLD_A >= MAX(1,LOCr(M_A)).
75 *
76 * Let K be the number of rows or columns of a distributed matrix,
77 * and assume that its process grid has dimension p x q.
78 * LOCr( K ) denotes the number of elements of K that a process
79 * would receive if K were distributed over the p processes of its
80 * process column.
81 * Similarly, LOCc( K ) denotes the number of elements of K that a
82 * process would receive if K were distributed over the q processes of
83 * its process row.
84 * The values of LOCr() and LOCc() may be determined via a call to the
85 * ScaLAPACK tool function, NUMROC:
86 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
87 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
88 * An upper bound for these quantities may be computed by:
89 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
90 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
91 *
92 * Arguments
93 * =========
94 *
95 * DIRECT (global input) CHARACTER*1
96 * Specifies the order in which the elementary reflectors are
97 * multiplied to form the block reflector:
98 * = 'F': H = H(1) H(2) . . . H(k) (Forward)
99 * = 'B': H = H(k) . . . H(2) H(1) (Backward)
100 *
101 * STOREV (global input) CHARACTER*1
102 * Specifies how the vectors which define the elementary
103 * reflectors are stored (see also Further Details):
104 * = 'C': columnwise
105 * = 'R': rowwise
106 *
107 * N (global input) INTEGER
108 * The order of the block reflector H. N >= 0.
109 *
110 * K (global input) INTEGER
111 * The order of the triangular factor T (= the number of
112 * elementary reflectors). 1 <= K <= MB_V (= NB_V).
113 *
114 * V (input/output) COMPLEX pointer into the local memory
115 * to an array of local dimension (LOCr(IV+N-1),LOCc(JV+K-1))
116 * if STOREV = 'C', and (LOCr(IV+K-1),LOCc(JV+N-1)) if
117 * STOREV = 'R'. The distributed matrix V contains the
118 * Householder vectors. See further details.
119 *
120 * IV (global input) INTEGER
121 * The row index in the global array V indicating the first
122 * row of sub( V ).
123 *
124 * JV (global input) INTEGER
125 * The column index in the global array V indicating the
126 * first column of sub( V ).
127 *
128 * DESCV (global and local input) INTEGER array of dimension DLEN_.
129 * The array descriptor for the distributed matrix V.
130 *
131 * TAU (local input) COMPLEX, array, dimension LOCr(IV+K-1)
132 * if INCV = M_V, and LOCc(JV+K-1) otherwise. This array
133 * contains the Householder scalars related to the Householder
134 * vectors. TAU is tied to the distributed matrix V.
135 *
136 * T (local output) COMPLEX array, dimension (NB_V,NB_V)
137 * if STOREV = 'Col', and (MB_V,MB_V) otherwise. It contains
138 * the k-by-k triangular factor of the block reflector asso-
139 * ciated with V. If DIRECT = 'F', T is upper triangular;
140 * if DIRECT = 'B', T is lower triangular.
141 *
142 * WORK (local workspace) COMPLEX array,
143 * dimension (K*(K-1)/2)
144 *
145 * Further Details
146 * ===============
147 *
148 * The shape of the matrix V and the storage of the vectors which define
149 * the H(i) is best illustrated by the following example with n = 5 and
150 * k = 3. The elements equal to 1 are not stored; the corresponding
151 * array elements are modified but restored on exit. The rest of the
152 * array is not used.
153 *
154 * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
155 *
156 * V( IV:IV+N-1, ( 1 ) V( IV:IV+K-1, ( 1 v1 v1 v1 v1 )
157 * JV:JV+K-1 ) = ( v1 1 ) JV:JV+N-1 ) = ( 1 v2 v2 v2 )
158 * ( v1 v2 1 ) ( 1 v3 v3 )
159 * ( v1 v2 v3 )
160 * ( v1 v2 v3 )
161 *
162 * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
163 *
164 * V( IV:IV+N-1, ( v1 v2 v3 ) V( IV:IV+K-1, ( v1 v1 1 )
165 * JV:JV+K-1 ) = ( v1 v2 v3 ) JV:JV+N-1 ) = ( v2 v2 v2 1 )
166 * ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
167 * ( 1 v3 )
168 * ( 1 )
169 *
170 * =====================================================================
171 *
172 * .. Parameters ..
173  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
174  $ lld_, mb_, m_, nb_, n_, rsrc_
175  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178  COMPLEX ONE, ZERO
179  parameter( one = ( 1.0e+0, 0.0e+0 ),
180  $ zero = ( 0.0e+0, 0.0e+0 ) )
181 * ..
182 * .. Local Scalars ..
183  LOGICAL FORWARD
184  INTEGER ICOFF, ICTXT, II, IIV, IROFF, IVCOL, IVROW,
185  $ itmp0, itmp1, iw, jj, jjv, ldv, micol, mirow,
186  $ mycol, myrow, np, npcol, nprow, nq
187  COMPLEX VII
188 * ..
189 * .. External Subroutines ..
190  EXTERNAL blacs_gridinfo, ccopy, cgemv, cgsum2d,
191  $ clacgv, claset, ctrmv, infog2l
192 * ..
193 * .. External Functions ..
194  LOGICAL LSAME
195  INTEGER INDXG2P, NUMROC
196  EXTERNAL indxg2p, lsame, numroc
197 * ..
198 * .. Intrinsic Functions ..
199  INTRINSIC mod
200 * ..
201 * .. Executable Statements ..
202 *
203 * Quick return if possible
204 *
205  IF( n.LE.0 .OR. k.LE.0 )
206  $ RETURN
207 *
208  ictxt = descv( ctxt_ )
209  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
210 *
211  forward = lsame( direct, 'F' )
212  CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol,
213  $ iiv, jjv, ivrow, ivcol )
214 *
215  IF( lsame( storev, 'C' ) .AND. mycol.EQ.ivcol ) THEN
216 *
217  iw = 1
218  ldv = descv( lld_ )
219  iroff = mod( iv-1, descv( mb_ ) )
220 *
221  IF( forward ) THEN
222 *
223 * DIRECT = 'Forward', STOREV = 'Columnwise'
224 *
225  np = numroc( n+iroff, descv( mb_ ), myrow, ivrow, nprow )
226  IF( myrow.EQ.ivrow ) THEN
227  np = np - iroff
228  ii = iiv + 1
229  ELSE
230  ii = iiv
231  END IF
232  IF( iroff+1.EQ.descv( mb_ ) ) THEN
233  mirow = mod( ivrow+1, nprow )
234  ELSE
235  mirow = ivrow
236  END IF
237  itmp0 = 0
238 *
239  DO 10 jj = jjv+1, jjv+k-1
240 *
241  IF( myrow.EQ.mirow ) THEN
242  vii = v( ii+(jj-1)*ldv )
243  v( ii+(jj-1)*ldv ) = one
244  END IF
245 *
246 * T(1:i-1,i) = -tau( jv+i-1 ) *
247 * V(iv+i-1:iv+n-1,jv:jv+i-2)' * V(iv+i-1:iv+n-1,jv+i-1)
248 *
249  itmp0 = itmp0 + 1
250  IF( np-ii+iiv.GT.0 ) THEN
251  CALL cgemv( 'Conjugate transpose', np-ii+iiv, itmp0,
252  $ -tau( jj ), v( ii+(jjv-1)*ldv ), ldv,
253  $ v( ii+(jj-1)*ldv ), 1, zero,
254  $ work( iw ), 1 )
255  ELSE
256  CALL claset( 'All', itmp0, 1, zero, zero, work( iw ),
257  $ itmp0 )
258  END IF
259 *
260  iw = iw + itmp0
261  IF( myrow.EQ.mirow ) THEN
262  v( ii+(jj-1)*ldv ) = vii
263  ii = ii + 1
264  END IF
265 *
266  IF( mod( iv+itmp0, descv( mb_ ) ).EQ.0 )
267  $ mirow = mod( mirow+1, nprow )
268 *
269  10 CONTINUE
270 *
271  CALL cgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
272  $ ivrow, mycol )
273 *
274  IF( myrow.EQ.ivrow ) THEN
275 *
276  iw = 1
277  itmp0 = 0
278  itmp1 = 1
279 *
280  t( itmp1 ) = tau( jjv )
281 *
282  DO 20 jj = jjv+1, jjv+k-1
283 *
284 * T(1:j-1,j) = T(1:j-1,1:j-1) * T(1:j-1,j)
285 *
286  itmp0 = itmp0 + 1
287  itmp1 = itmp1 + descv( nb_ )
288  CALL ccopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
289  iw = iw + itmp0
290 *
291  CALL ctrmv( 'Upper', 'No transpose', 'Non-unit',
292  $ itmp0, t, descv( nb_ ), t( itmp1 ), 1 )
293  t(itmp1+itmp0) = tau( jj )
294 *
295  20 CONTINUE
296 *
297  END IF
298 *
299  ELSE
300 *
301 * DIRECT = 'Backward', STOREV = 'Columnwise'
302 *
303  np = numroc( n+iroff-1, descv( mb_ ), myrow, ivrow, nprow )
304  IF( myrow.EQ.ivrow )
305  $ np = np - iroff
306  mirow = indxg2p( iv+n-2, descv( mb_ ), myrow,
307  $ descv( rsrc_ ), nprow )
308  ii = iiv + np - 1
309  itmp0 = 0
310 *
311  DO 30 jj = jjv+k-2, jjv, -1
312 *
313  IF( myrow.EQ.mirow ) THEN
314  vii = v( ii+(jj-1)*ldv )
315  v( ii+(jj-1)*ldv ) = one
316  END IF
317 *
318 * T(1:i-1,i) = -tau( jv+i-1 ) *
319 * V(iv:iv+n-k+i-1,jv+i:jv+k-1)' * V(iv:iv+n-k+i-1,jv+i-1)
320 *
321  itmp0 = itmp0 + 1
322  IF( ii-iiv+1.GT.0 ) THEN
323  CALL cgemv( 'Conjugate transpose', ii-iiv+1, itmp0,
324  $ -tau( jj ), v( iiv+jj*ldv ), ldv,
325  $ v( iiv+(jj-1)*ldv ), 1, zero,
326  $ work( iw ), 1 )
327  ELSE
328  CALL claset( 'All', itmp0, 1, zero, zero, work( iw ),
329  $ itmp0 )
330  END IF
331 *
332  iw = iw + itmp0
333  IF( myrow.EQ.mirow ) THEN
334  v( ii+(jj-1)*ldv ) = vii
335  ii = ii - 1
336  END IF
337 *
338  IF( mod( iv+n-itmp0-2, descv(mb_) ).EQ.0 )
339  $ mirow = mod( mirow+nprow-1, nprow )
340 *
341  30 CONTINUE
342 *
343  CALL cgsum2d( ictxt, 'Columnwise', ' ', iw-1, 1, work, iw-1,
344  $ ivrow, mycol )
345 *
346  IF( myrow.EQ.ivrow ) THEN
347 *
348  iw = 1
349  itmp0 = 0
350  itmp1 = k + 1 + (k-1) * descv( nb_ )
351 *
352  t( itmp1-1 ) = tau( jjv+k-1 )
353 *
354  DO 40 jj = jjv+k-2, jjv, -1
355 *
356 * T(j+1:k,j) = T(j+1:k,j+1:k) * T(j+1:k,j)
357 *
358  itmp0 = itmp0 + 1
359  itmp1 = itmp1 - descv( nb_ ) - 1
360  CALL ccopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
361  iw = iw + itmp0
362 *
363  CALL ctrmv( 'Lower', 'No transpose', 'Non-unit',
364  $ itmp0, t( itmp1+descv( nb_ ) ),
365  $ descv( nb_ ), t( itmp1 ), 1 )
366  t( itmp1-1 ) = tau( jj )
367 *
368  40 CONTINUE
369 *
370  END IF
371 *
372  END IF
373 *
374  ELSE IF( lsame( storev, 'R' ) .AND. myrow.EQ.ivrow ) THEN
375 *
376  iw = 1
377  ldv = descv( lld_ )
378  icoff = mod( jv-1, descv( nb_ ) )
379 *
380  IF( forward ) THEN
381 *
382 * DIRECT = 'Forward', STOREV = 'Rowwise'
383 *
384  nq = numroc( n+icoff, descv( nb_ ), mycol, ivcol, npcol )
385  IF( mycol.EQ.ivcol ) THEN
386  nq = nq - icoff
387  jj = jjv + 1
388  ELSE
389  jj = jjv
390  END IF
391  IF( icoff+1.EQ.descv( nb_ ) ) THEN
392  micol = mod( ivcol+1, npcol )
393  ELSE
394  micol = ivcol
395  END IF
396  itmp0 = 0
397 *
398  DO 50 ii = iiv+1, iiv+k-1
399 *
400  IF( mycol.EQ.micol ) THEN
401  vii = v( ii+(jj-1)*ldv )
402  v( ii+(jj-1)*ldv ) = one
403  END IF
404 *
405 * T(1:i-1,i) = -tau( iv+i-1 ) *
406 * V(iv+i-1,jv+i-1:jv+n-1) * V(iv:iv+i-2,jv+i-1:jv+n-1)'
407 *
408  itmp0 = itmp0 + 1
409  IF( nq-jj+jjv.GT.0 ) THEN
410  CALL clacgv( nq-jj+jjv, v( ii+(jj-1)*ldv ), ldv )
411  CALL cgemv( 'No transpose', itmp0, nq-jj+jjv,
412  $ -tau(ii), v( iiv+(jj-1)*ldv ), ldv,
413  $ v( ii+(jj-1)*ldv ), ldv, zero,
414  $ work( iw ), 1 )
415  CALL clacgv( nq-jj+jjv, v( ii+(jj-1)*ldv ), ldv )
416  ELSE
417  CALL claset( 'All', itmp0, 1, zero, zero,
418  $ work( iw ), itmp0 )
419  END IF
420 *
421  iw = iw + itmp0
422  IF( mycol.EQ.micol ) THEN
423  v( ii+(jj-1)*ldv ) = vii
424  jj = jj + 1
425  END IF
426 *
427  IF( mod( jv+itmp0, descv( nb_ ) ).EQ.0 )
428  $ micol = mod( micol+1, npcol )
429 *
430  50 CONTINUE
431 *
432  CALL cgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
433  $ myrow, ivcol )
434 *
435  IF( mycol.EQ.ivcol ) THEN
436 *
437  iw = 1
438  itmp0 = 0
439  itmp1 = 1
440 *
441  t( itmp1 ) = tau( iiv )
442 *
443  DO 60 ii = iiv+1, iiv+k-1
444 *
445 * T(1:i-1,i) = T(1:i-1,1:i-1) * T(1:i-1,i)
446 *
447  itmp0 = itmp0 + 1
448  itmp1 = itmp1 + descv( mb_ )
449  CALL ccopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
450  iw = iw + itmp0
451 *
452  CALL ctrmv( 'Upper', 'No transpose', 'Non-unit',
453  $ itmp0, t, descv( mb_ ), t( itmp1 ), 1 )
454  t( itmp1+itmp0 ) = tau( ii )
455 *
456  60 CONTINUE
457 *
458  END IF
459 *
460  ELSE
461 *
462 * DIRECT = 'Backward', STOREV = 'Rowwise'
463 *
464  nq = numroc( n+icoff-1, descv( nb_ ), mycol, ivcol, npcol )
465  IF( mycol.EQ.ivcol )
466  $ nq = nq - icoff
467  micol = indxg2p( jv+n-2, descv( nb_ ), mycol,
468  $ descv( csrc_ ), npcol )
469  jj = jjv + nq - 1
470  itmp0 = 0
471 *
472  DO 70 ii = iiv+k-2, iiv, -1
473 *
474  IF( mycol.EQ.micol ) THEN
475  vii = v( ii+(jj-1)*ldv )
476  v( ii+(jj-1)*ldv ) = one
477  END IF
478 *
479 * T(i+1:k,i) = -tau( iv+i-1 ) *
480 * V(iv+i:iv+k-1,jv:jv+n-k+i-1)' * V(iv+i-1,jv:jv+n-k+i-1)'
481 *
482  itmp0 = itmp0 + 1
483  IF( jj-jjv+1.GT.0 ) THEN
484  CALL clacgv( jj-jjv+1, v( ii+(jjv-1)*ldv ), ldv )
485  CALL cgemv( 'No transpose', itmp0, jj-jjv+1,
486  $ -tau( ii ), v( ii+1+(jjv-1)*ldv ), ldv,
487  $ v( ii+(jjv-1)*ldv ), ldv, zero,
488  $ work( iw ), 1 )
489  CALL clacgv( jj-jjv+1, v( ii+(jjv-1)*ldv ), ldv )
490  ELSE
491  CALL claset( 'All', itmp0, 1, zero, zero,
492  $ work( iw ), itmp0 )
493  END IF
494 *
495  iw = iw + itmp0
496  IF( mycol.EQ.micol ) THEN
497  v( ii+(jj-1)*ldv ) = vii
498  jj = jj - 1
499  END IF
500 *
501  IF( mod( jv+n-itmp0-2, descv( nb_ ) ).EQ.0 )
502  $ micol = mod( micol+npcol-1, npcol )
503 *
504  70 CONTINUE
505 *
506  CALL cgsum2d( ictxt, 'Rowwise', ' ', iw-1, 1, work, iw-1,
507  $ myrow, ivcol )
508 *
509  IF( mycol.EQ.ivcol ) THEN
510 *
511  iw = 1
512  itmp0 = 0
513  itmp1 = k + 1 + (k-1) * descv( mb_ )
514 *
515  t( itmp1-1 ) = tau( iiv+k-1 )
516 *
517  DO 80 ii = iiv+k-2, iiv, -1
518 *
519 * T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
520 *
521  itmp0 = itmp0 + 1
522  itmp1 = itmp1 - descv( mb_ ) - 1
523  CALL ccopy( itmp0, work( iw ), 1, t( itmp1 ), 1 )
524  iw = iw + itmp0
525 *
526  CALL ctrmv( 'Lower', 'No transpose', 'Non-unit',
527  $ itmp0, t( itmp1+descv( mb_ ) ),
528  $ descv( mb_ ), t( itmp1 ), 1 )
529  t( itmp1-1 ) = tau( ii )
530 *
531  80 CONTINUE
532 *
533  END IF
534 *
535  END IF
536 *
537  END IF
538 *
539  RETURN
540 *
541 * End of PCLARFT
542 *
543  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pclarft
subroutine pclarft(DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, T, WORK)
Definition: pclarft.f:3