ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcgehd2.f
Go to the documentation of this file.
1  SUBROUTINE pcgehd2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK,
2  $ LWORK, INFO )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IHI, ILO, INFO, JA, LWORK, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  COMPLEX A( * ), TAU( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PCGEHD2 reduces a complex general distributed matrix sub( A )
21 * to upper Hessenberg form H by an unitary similarity transformation:
22 * Q' * sub( A ) * Q = H, where
23 * sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1).
24 *
25 * Notes
26 * =====
27 *
28 * Each global data object is described by an associated description
29 * vector. This vector stores the information required to establish
30 * the mapping between an object element and its corresponding process
31 * and memory location.
32 *
33 * Let A be a generic term for any 2D block cyclicly distributed array.
34 * Such a global array has an associated description vector DESCA.
35 * In the following comments, the character _ should be read as
36 * "of the global array".
37 *
38 * NOTATION STORED IN EXPLANATION
39 * --------------- -------------- --------------------------------------
40 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
41 * DTYPE_A = 1.
42 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
43 * the BLACS process grid A is distribu-
44 * ted over. The context itself is glo-
45 * bal, but the handle (the integer
46 * value) may vary.
47 * M_A (global) DESCA( M_ ) The number of rows in the global
48 * array A.
49 * N_A (global) DESCA( N_ ) The number of columns in the global
50 * array A.
51 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
52 * the rows of the array.
53 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
54 * the columns of the array.
55 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
56 * row of the array A is distributed.
57 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
58 * first column of the array A is
59 * distributed.
60 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
61 * array. LLD_A >= MAX(1,LOCr(M_A)).
62 *
63 * Let K be the number of rows or columns of a distributed matrix,
64 * and assume that its process grid has dimension p x q.
65 * LOCr( K ) denotes the number of elements of K that a process
66 * would receive if K were distributed over the p processes of its
67 * process column.
68 * Similarly, LOCc( K ) denotes the number of elements of K that a
69 * process would receive if K were distributed over the q processes of
70 * its process row.
71 * The values of LOCr() and LOCc() may be determined via a call to the
72 * ScaLAPACK tool function, NUMROC:
73 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
74 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
75 * An upper bound for these quantities may be computed by:
76 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
77 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
78 *
79 * Arguments
80 * =========
81 *
82 * N (global input) INTEGER
83 * The number of rows and columns to be operated on, i.e. the
84 * order of the distributed submatrix sub( A ). N >= 0.
85 *
86 * ILO (global input) INTEGER
87 * IHI (global input) INTEGER
88 * It is assumed that sub( A ) is already upper triangular in
89 * rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+JLO-2
90 * and JA+JHI:JA+N-1. See Further Details. If N > 0,
91 * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N.
92 *
93 * A (local input/local output) COMPLEX pointer into the
94 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
95 * On entry, this array contains the local pieces of the N-by-N
96 * general distributed matrix sub( A ) to be reduced. On exit,
97 * the upper triangle and the first subdiagonal of sub( A ) are
98 * overwritten with the upper Hessenberg matrix H, and the ele-
99 * ments below the first subdiagonal, with the array TAU, repre-
100 * sent the unitary matrix Q as a product of elementary
101 * reflectors. See Further Details.
102 *
103 * IA (global input) INTEGER
104 * The row index in the global array A indicating the first
105 * row of sub( A ).
106 *
107 * JA (global input) INTEGER
108 * The column index in the global array A indicating the
109 * first column of sub( A ).
110 *
111 * DESCA (global and local input) INTEGER array of dimension DLEN_.
112 * The array descriptor for the distributed matrix A.
113 *
114 * TAU (local output) COMPLEX array, dimension LOCc(JA+N-2)
115 * The scalar factors of the elementary reflectors (see Further
116 * Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are
117 * set to zero. TAU is tied to the distributed matrix A.
118 *
119 * WORK (local workspace/local output) COMPLEX array,
120 * dimension (LWORK)
121 * On exit, WORK( 1 ) returns the minimal and optimal LWORK.
122 *
123 * LWORK (local or global input) INTEGER
124 * The dimension of the array WORK.
125 * LWORK is local input and must be at least
126 * LWORK >= NB + MAX( NpA0, NB )
127 *
128 * where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ),
129 * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ),
130 * NpA0 = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ),
131 *
132 * INDXG2P and NUMROC are ScaLAPACK tool functions;
133 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
134 * the subroutine BLACS_GRIDINFO.
135 *
136 * If LWORK = -1, then LWORK is global input and a workspace
137 * query is assumed; the routine only calculates the minimum
138 * and optimal size for all work arrays. Each of these
139 * values is returned in the first entry of the corresponding
140 * work array, and no error message is issued by PXERBLA.
141 *
142 * INFO (local output) INTEGER
143 * = 0: successful exit
144 * < 0: If the i-th argument is an array and the j-entry had
145 * an illegal value, then INFO = -(i*100+j), if the i-th
146 * argument is a scalar and had an illegal value, then
147 * INFO = -i.
148 *
149 * Further Details
150 * ===============
151 *
152 * The matrix Q is represented as a product of (ihi-ilo) elementary
153 * reflectors
154 *
155 * Q = H(ilo) H(ilo+1) . . . H(ihi-1).
156 *
157 * Each H(i) has the form
158 *
159 * H(i) = I - tau * v * v'
160 *
161 * where tau is a complex scalar, and v is a complex vector with
162 * v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
163 * exit in A(ia+ilo+i:ia+ihi-1,ja+ilo+i-2), and tau in TAU(ja+ilo+i-2).
164 *
165 * The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follo-
166 * wing example, with n = 7, ilo = 2 and ihi = 6:
167 *
168 * on entry on exit
169 *
170 * ( a a a a a a a ) ( a a h h h h a )
171 * ( a a a a a a ) ( a h h h h a )
172 * ( a a a a a a ) ( h h h h h h )
173 * ( a a a a a a ) ( v2 h h h h h )
174 * ( a a a a a a ) ( v2 v3 h h h h )
175 * ( a a a a a a ) ( v2 v3 v4 h h h )
176 * ( a ) ( a )
177 *
178 * where a denotes an element of the original matrix sub( A ), h denotes
179 * a modified element of the upper Hessenberg matrix H, and vi denotes
180 * an element of the vector defining H(ja+ilo+i-2).
181 *
182 * Alignment requirements
183 * ======================
184 *
185 * The distributed submatrix sub( A ) must verify some alignment proper-
186 * ties, namely the following expression should be true:
187 * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
188 *
189 * =====================================================================
190 *
191 * .. Parameters ..
192  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
193  $ lld_, mb_, m_, nb_, n_, rsrc_
194  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
195  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
196  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
197  COMPLEX ONE
198  parameter( one = ( 1.0e+0, 0.0e+0 ) )
199 * ..
200 * .. Local Scalars ..
201  LOGICAL LQUERY
202  INTEGER I, IAROW, ICOFFA, ICTXT, IROFFA, J, K, LWMIN,
203  $ mycol, myrow, npa0, npcol, nprow
204  COMPLEX AII
205 * ..
206 * .. External Subroutines ..
207  EXTERNAL blacs_abort, blacs_gridinfo, chk1mat, pcelset,
209 * ..
210 * .. External Functions ..
211  INTEGER INDXG2P, NUMROC
212  EXTERNAL indxg2p, numroc
213 * ..
214 * .. Intrinsic Functions ..
215  INTRINSIC cmplx, max, min, mod, real
216 * ..
217 * .. Executable Statements ..
218 *
219 * Get grid parameters
220 *
221  ictxt = desca( ctxt_ )
222  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
223 *
224 * Test the input parameters
225 *
226  info = 0
227  IF( nprow.EQ.-1 ) THEN
228  info = -(700+ctxt_)
229  ELSE
230  CALL chk1mat( n, 1, n, 1, ia, ja, desca, 7, info )
231  IF( info.EQ.0 ) THEN
232  iroffa = mod( ia-1, desca( mb_ ) )
233  icoffa = mod( ja-1, desca( nb_ ) )
234  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
235  $ nprow )
236  npa0 = numroc( ihi+iroffa, desca( mb_ ), myrow, iarow,
237  $ nprow )
238  lwmin = desca( nb_ ) + max( npa0, desca( nb_ ) )
239 *
240  work( 1 ) = cmplx( real( lwmin ) )
241  lquery = ( lwork.EQ.-1 )
242  IF( ilo.LT.1 .OR. ilo.GT.max( 1, n ) ) THEN
243  info = -2
244  ELSE IF( ihi.LT.min( ilo, n ) .OR. ihi.GT.n ) THEN
245  info = -3
246  ELSE IF( iroffa.NE.icoffa ) THEN
247  info = -6
248  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
249  info = -(700+nb_)
250  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
251  info = -10
252  END IF
253  END IF
254  END IF
255 *
256  IF( info.NE.0 ) THEN
257  CALL pxerbla( ictxt, 'PCGEHD2', -info )
258  CALL blacs_abort( ictxt, 1 )
259  RETURN
260  ELSE IF( lquery ) THEN
261  RETURN
262  END IF
263 *
264  DO 10 k = ilo, ihi-1
265  i = ia + k - 1
266  j = ja + k - 1
267 *
268 * Compute elementary reflector H(j) to annihilate
269 * A(i+2:ihi+ia-1,j)
270 *
271  CALL pclarfg( ihi-k, aii, i+1, j, a, min( i+2, n+ia-1 ), j,
272  $ desca, 1, tau )
273  CALL pcelset( a, i+1, j, desca, one )
274 *
275 * Apply H(k) to A(ia:ihi+ia-1,j+1:ihi+ja-1) from the right
276 *
277  CALL pclarf( 'Right', ihi, ihi-k, a, i+1, j, desca, 1, tau, a,
278  $ ia, j+1, desca, work )
279 *
280 * Apply H(j) to A(i+1:ia+ihi-1,j+1:ja+n-1) from the left
281 *
282  CALL pclarfc( 'Left', ihi-k, n-k, a, i+1, j, desca, 1, tau, a,
283  $ i+1, j+1, desca, work )
284 *
285  CALL pcelset( a, i+1, j, desca, aii )
286  10 CONTINUE
287 *
288  work( 1 ) = cmplx( real( lwmin ) )
289 *
290  RETURN
291 *
292 * End of PCGEHD2
293 *
294  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
pclarfg
subroutine pclarfg(N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, TAU)
Definition: pclarfg.f:3
pclarfc
subroutine pclarfc(SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pclarfc.f:3
pcgehd2
subroutine pcgehd2(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pcgehd2.f:3
pcelset
subroutine pcelset(A, IA, JA, DESCA, ALPHA)
Definition: pcelset.f:2
pclarf
subroutine pclarf(SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, C, IC, JC, DESCC, WORK)
Definition: pclarf.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181