ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pchengst.f
Go to the documentation of this file.
1  SUBROUTINE pchengst( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB,
2  $ DESCB, SCALE, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * October 15, 1999
8 *
9 * .. Scalar Arguments ..
10  CHARACTER UPLO
11  INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N
12  REAL SCALE
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCA( * ), DESCB( * )
16  COMPLEX A( * ), B( * ), WORK( * )
17 * ..
18 *
19 * Purpose
20 *
21 * =======
22 *
23 * PCHENGST reduces a complex Hermitian-definite generalized
24 * eigenproblem to standard form.
25 *
26 * PCHENGST performs the same function as PCHEGST, but is based on
27 * rank 2K updates, which are faster and more scalable than
28 * triangular solves (the basis of PCHENGST).
29 *
30 * PCHENGST calls PCHEGST when UPLO='U', hence PCHENGST provides
31 * improved performance only when UPLO='L', IBTYPE=1.
32 *
33 * PCHENGST also calls PCHEGST when insufficient workspace is
34 * provided, hence PCHENGST provides improved
35 * performance only when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB
36 *
37 * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and
38 * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ).
39 *
40 * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x,
41 * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or
42 * inv(L)*sub( A )*inv(L**H)
43 *
44 * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or
45 * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by
46 * U*sub( A )*U**H or L**H*sub( A )*L.
47 *
48 * sub( B ) must have been previously factorized as U**H*U or L*L**H by
49 * PCPOTRF.
50 *
51 * Notes
52 * =====
53 *
54 * Each global data object is described by an associated description
55 * vector. This vector stores the information required to establish
56 * the mapping between an object element and its corresponding process
57 * and memory location.
58 *
59 * Let A be a generic term for any 2D block cyclicly distributed array.
60 * Such a global array has an associated description vector DESCA.
61 * In the following comments, the character _ should be read as
62 * "of the global array".
63 *
64 * NOTATION STORED IN EXPLANATION
65 * --------------- -------------- --------------------------------------
66 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
67 * DTYPE_A = 1.
68 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
69 * the BLACS process grid A is distribu-
70 * ted over. The context itself is glo-
71 * bal, but the handle (the integer
72 * value) may vary.
73 * M_A (global) DESCA( M_ ) The number of rows in the global
74 * array A.
75 * N_A (global) DESCA( N_ ) The number of columns in the global
76 * array A.
77 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
78 * the rows of the array.
79 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
80 * the columns of the array.
81 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
82 * row of the array A is distributed.
83 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
84 * first column of the array A is
85 * distributed.
86 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
87 * array. LLD_A >= MAX(1,LOCr(M_A)).
88 *
89 * Let K be the number of rows or columns of a distributed matrix,
90 * and assume that its process grid has dimension p x q.
91 * LOCr( K ) denotes the number of elements of K that a process
92 * would receive if K were distributed over the p processes of its
93 * process column.
94 * Similarly, LOCc( K ) denotes the number of elements of K that a
95 * process would receive if K were distributed over the q processes of
96 * its process row.
97 * The values of LOCr() and LOCc() may be determined via a call to the
98 * ScaLAPACK tool function, NUMROC:
99 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
100 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
101 * An upper bound for these quantities may be computed by:
102 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
103 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
104 *
105 * Arguments
106 * =========
107 *
108 * IBTYPE (global input) INTEGER
109 * = 1: compute inv(U**H)*sub( A )*inv(U) or
110 * inv(L)*sub( A )*inv(L**H);
111 * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L.
112 *
113 * UPLO (global input) CHARACTER
114 * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is
115 * factored as U**H*U;
116 * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is
117 * factored as L*L**H.
118 *
119 * N (global input) INTEGER
120 * The order of the matrices sub( A ) and sub( B ). N >= 0.
121 *
122 * A (local input/local output) COMPLEX pointer into the
123 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
124 * On entry, this array contains the local pieces of the
125 * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',
126 * the leading N-by-N upper triangular part of sub( A ) contains
127 * the upper triangular part of the matrix, and its strictly
128 * lower triangular part is not referenced. If UPLO = 'L', the
129 * leading N-by-N lower triangular part of sub( A ) contains
130 * the lower triangular part of the matrix, and its strictly
131 * upper triangular part is not referenced.
132 *
133 * On exit, if INFO = 0, the transformed matrix, stored in the
134 * same format as sub( A ).
135 *
136 * IA (global input) INTEGER
137 * A's global row index, which points to the beginning of the
138 * submatrix which is to be operated on.
139 *
140 * JA (global input) INTEGER
141 * A's global column index, which points to the beginning of
142 * the submatrix which is to be operated on.
143 *
144 * DESCA (global and local input) INTEGER array of dimension DLEN_.
145 * The array descriptor for the distributed matrix A.
146 *
147 * B (local input) COMPLEX pointer into the local memory
148 * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry,
149 * this array contains the local pieces of the triangular factor
150 * from the Cholesky factorization of sub( B ), as returned by
151 * PCPOTRF.
152 *
153 * IB (global input) INTEGER
154 * B's global row index, which points to the beginning of the
155 * submatrix which is to be operated on.
156 *
157 * JB (global input) INTEGER
158 * B's global column index, which points to the beginning of
159 * the submatrix which is to be operated on.
160 *
161 * DESCB (global and local input) INTEGER array of dimension DLEN_.
162 * The array descriptor for the distributed matrix B.
163 *
164 * SCALE (global output) REAL
165 * Amount by which the eigenvalues should be scaled to
166 * compensate for the scaling performed in this routine.
167 * At present, SCALE is always returned as 1.0, it is
168 * returned here to allow for future enhancement.
169 *
170 * WORK (local workspace/local output) COMPLEX array,
171 * dimension (LWORK)
172 * On exit, WORK( 1 ) returns the minimal and optimal LWORK.
173 *
174 * LWORK (local or global input) INTEGER
175 * The dimension of the array WORK.
176 * LWORK is local input and must be at least
177 * LWORK >= MAX( NB * ( NP0 +1 ), 3 * NB )
178 *
179 * When IBTYPE = 1 and UPLO = 'L', PCHENGST provides improved
180 * performance when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB
181 *
182 * where NB = MB_A = NB_A,
183 * NP0 = NUMROC( N, NB, 0, 0, NPROW ),
184 * NQ0 = NUMROC( N, NB, 0, 0, NPROW ),
185 *
186 * NUMROC ia a ScaLAPACK tool functions
187 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
188 * the subroutine BLACS_GRIDINFO.
189 *
190 * If LWORK = -1, then LWORK is global input and a workspace
191 * query is assumed; the routine only calculates the
192 * optimal size for all work arrays. Each of these
193 * values is returned in the first entry of the corresponding
194 * work array, and no error message is issued by PXERBLA.
195 *
196 * INFO (global output) INTEGER
197 * = 0: successful exit
198 * < 0: If the i-th argument is an array and the j-entry had
199 * an illegal value, then INFO = -(i*100+j), if the i-th
200 * argument is a scalar and had an illegal value, then
201 * INFO = -i.
202 *
203 * =====================================================================
204 *
205 *
206 *
207 * .. Parameters ..
208  COMPLEX ONEHALF, ONE, MONE
209  REAL RONE
210  parameter( onehalf = ( 0.5e0, 0.0e0 ),
211  $ one = ( 1.0e0, 0.0e0 ),
212  $ mone = ( -1.0e0, 0.0e0 ), rone = 1.0e0 )
213  INTEGER DLEN_, CTXT_, MB_, NB_, RSRC_, CSRC_, LLD_
214  parameter( dlen_ = 9, ctxt_ = 2, mb_ = 5, nb_ = 6,
215  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
216 * ..
217 * .. Local Scalars ..
218  LOGICAL LQUERY, UPPER
219  INTEGER I, IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
220  $ ictxt, indaa, indg, indr, indrt, iroffa,
221  $ iroffb, j, k, kb, lwmin, lwopt, mycol, myrow,
222  $ nb, np0, npcol, npk, nprow, nq0, postk
223 * ..
224 * .. Local Arrays ..
225  INTEGER DESCAA( DLEN_ ), DESCG( DLEN_ ),
226  $ descr( dlen_ ), descrt( dlen_ ), idum1( 2 ),
227  $ idum2( 2 )
228 * ..
229 * .. External Functions ..
230  LOGICAL LSAME
231  INTEGER INDXG2P, NUMROC
232  EXTERNAL lsame, indxg2p, numroc
233 * ..
234 * .. External Subroutines ..
235  EXTERNAL blacs_gridinfo, chk1mat, descset, pcgemm,
236  $ pchegst, pchemm, pcher2k, pchk2mat, pclacpy,
237  $ pctrsm, pxerbla
238 * ..
239 * .. Intrinsic Functions ..
240  INTRINSIC cmplx, conjg, ichar, max, min, mod, real
241 * ..
242 * .. Executable Statements ..
243  ictxt = desca( ctxt_ )
244  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
245  scale = 1.0e0
246 *
247  nb = desca( mb_ )
248 *
249 *
250 * Test the input parameters
251 *
252  info = 0
253  IF( nprow.EQ.-1 ) THEN
254  info = -( 700+ctxt_ )
255  ELSE
256  upper = lsame( uplo, 'U' )
257  CALL chk1mat( n, 3, n, 3, ia, ja, desca, 7, info )
258  CALL chk1mat( n, 3, n, 3, ib, jb, descb, 11, info )
259  IF( info.EQ.0 ) THEN
260  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
261  $ nprow )
262  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
263  $ nprow )
264  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
265  $ npcol )
266  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
267  $ npcol )
268  iroffa = mod( ia-1, desca( mb_ ) )
269  icoffa = mod( ja-1, desca( nb_ ) )
270  iroffb = mod( ib-1, descb( mb_ ) )
271  icoffb = mod( jb-1, descb( nb_ ) )
272  np0 = numroc( n, nb, 0, 0, nprow )
273  nq0 = numroc( n, nb, 0, 0, npcol )
274  lwmin = max( nb*( np0+1 ), 3*nb )
275  IF( ibtype.EQ.1 .AND. .NOT.upper ) THEN
276  lwopt = 2*np0*nb + nq0*nb + nb*nb
277  ELSE
278  lwopt = lwmin
279  END IF
280  work( 1 ) = cmplx( real( lwopt ) )
281  lquery = ( lwork.EQ.-1 )
282  IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
283  info = -1
284  ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
285  info = -2
286  ELSE IF( n.LT.0 ) THEN
287  info = -3
288  ELSE IF( iroffa.NE.0 ) THEN
289  info = -5
290  ELSE IF( icoffa.NE.0 ) THEN
291  info = -6
292  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
293  info = -( 700+nb_ )
294  ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
295  info = -9
296  ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
297  info = -10
298  ELSE IF( descb( mb_ ).NE.desca( mb_ ) ) THEN
299  info = -( 1100+mb_ )
300  ELSE IF( descb( nb_ ).NE.desca( nb_ ) ) THEN
301  info = -( 1100+nb_ )
302  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
303  info = -( 1100+ctxt_ )
304  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
305  info = -13
306  END IF
307  END IF
308  idum1( 1 ) = ibtype
309  idum2( 1 ) = 1
310  IF( upper ) THEN
311  idum1( 2 ) = ichar( 'U' )
312  ELSE
313  idum1( 2 ) = ichar( 'L' )
314  END IF
315  idum2( 2 ) = 2
316  CALL pchk2mat( n, 3, n, 3, ia, ja, desca, 7, n, 3, n, 3, ib,
317  $ jb, descb, 11, 2, idum1, idum2, info )
318  END IF
319 *
320  IF( info.NE.0 ) THEN
321  CALL pxerbla( ictxt, 'PCHENGST', -info )
322  RETURN
323  ELSE IF( lquery ) THEN
324  RETURN
325  END IF
326 *
327 * Quick return if possible
328 *
329  IF( n.EQ.0 )
330  $ RETURN
331 *
332 *
333  IF( ibtype.NE.1 .OR. upper .OR. lwork.LT.lwopt ) THEN
334  CALL pchegst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
335  $ descb, scale, info )
336  RETURN
337  END IF
338 *
339  CALL descset( descg, n, nb, nb, nb, iarow, iacol, ictxt, np0 )
340  CALL descset( descr, n, nb, nb, nb, iarow, iacol, ictxt, np0 )
341  CALL descset( descrt, nb, n, nb, nb, iarow, iacol, ictxt, nb )
342  CALL descset( descaa, nb, nb, nb, nb, iarow, iacol, ictxt, nb )
343 *
344  indg = 1
345  indr = indg + descg( lld_ )*nb
346  indaa = indr + descr( lld_ )*nb
347  indrt = indaa + descaa( lld_ )*nb
348 *
349  DO 30 k = 1, n, nb
350 *
351  kb = min( n-k+1, nb )
352  postk = k + kb
353  npk = n - postk + 1
354 *
355 *
356  CALL pclacpy( 'A', n-postk+1, kb, b, postk+ib-1, k+jb-1, descb,
357  $ work( indg ), postk, 1, descg )
358  CALL pclacpy( 'A', n-postk+1, kb, a, postk+ia-1, k+ja-1, desca,
359  $ work( indr ), postk, 1, descr )
360  CALL pclacpy( 'A', kb, k-1, a, k+ia-1, ja, desca,
361  $ work( indrt ), 1, 1, descrt )
362 *
363  CALL pclacpy( 'L', kb, kb, a, k+ia-1, k+ja-1, desca,
364  $ work( indr ), k, 1, descr )
365  CALL pctrsm( 'Right', 'L', 'N', 'N', npk, kb, mone, b, k+ib-1,
366  $ k+jb-1, descb, work( indg ), postk, 1, descg )
367 *
368  CALL pchemm( 'Right', 'L', npk, kb, onehalf, a, k+ia-1, k+ja-1,
369  $ desca, work( indg ), postk, 1, descg, one,
370  $ work( indr ), postk, 1, descr )
371 *
372  CALL pcher2k( 'Lower', 'No T', npk, kb, one, work( indg ),
373  $ postk, 1, descg, work( indr ), postk, 1, descr,
374  $ rone, a, postk+ia-1, postk+ja-1, desca )
375 *
376  CALL pcgemm( 'No T', 'No Conj', npk, k-1, kb, one,
377  $ work( indg ), postk, 1, descg, work( indrt ), 1,
378  $ 1, descrt, one, a, postk+ia-1, ja, desca )
379 *
380  CALL pchemm( 'Right', 'L', npk, kb, one, work( indr ), k, 1,
381  $ descr, work( indg ), postk, 1, descg, one, a,
382  $ postk+ia-1, k+ja-1, desca )
383 *
384  CALL pctrsm( 'Left', 'Lower', 'No Conj', 'Non-unit', kb, k-1,
385  $ one, b, k+ib-1, k+jb-1, descb, a, k+ia-1, ja,
386  $ desca )
387 *
388  CALL pclacpy( 'L', kb, kb, a, k+ia-1, k+ja-1, desca,
389  $ work( indaa ), 1, 1, descaa )
390 *
391  IF( myrow.EQ.descaa( rsrc_ ) .AND. mycol.EQ.descaa( csrc_ ) )
392  $ THEN
393  DO 20 i = 1, kb
394  DO 10 j = 1, i
395  work( indaa+j-1+( i-1 )*descaa( lld_ ) )
396  $ = conjg( work( indaa+i-1+( j-1 )*descaa( lld_ ) ) )
397  10 CONTINUE
398  20 CONTINUE
399  END IF
400 *
401  CALL pctrsm( 'Left', 'Lower', 'No Conj', 'Non-unit', kb, kb,
402  $ one, b, k+ib-1, k+jb-1, descb, work( indaa ), 1,
403  $ 1, descaa )
404 *
405  CALL pctrsm( 'Right', 'Lower', 'Conj', 'Non-unit', kb, kb, one,
406  $ b, k+ib-1, k+jb-1, descb, work( indaa ), 1, 1,
407  $ descaa )
408 *
409  CALL pclacpy( 'L', kb, kb, work( indaa ), 1, 1, descaa, a,
410  $ k+ia-1, k+ja-1, desca )
411 *
412  CALL pctrsm( 'Right', 'Lower', 'Conj', 'Non-unit', npk, kb,
413  $ one, b, k+ib-1, k+jb-1, descb, a, postk+ia-1,
414  $ k+ja-1, desca )
415 *
416  descr( csrc_ ) = mod( descr( csrc_ )+1, npcol )
417  descg( csrc_ ) = mod( descg( csrc_ )+1, npcol )
418  descrt( rsrc_ ) = mod( descrt( rsrc_ )+1, nprow )
419  descaa( rsrc_ ) = mod( descaa( rsrc_ )+1, nprow )
420  descaa( csrc_ ) = mod( descaa( csrc_ )+1, npcol )
421  30 CONTINUE
422 *
423  work( 1 ) = cmplx( real( lwopt ) )
424 *
425  RETURN
426  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pchegst
subroutine pchegst(IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, SCALE, INFO)
Definition: pchegst.f:5
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pclacpy
subroutine pclacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pclacpy.f:3
pchengst
subroutine pchengst(IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, SCALE, WORK, LWORK, INFO)
Definition: pchengst.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181