ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdsyngst.f
Go to the documentation of this file.
1  SUBROUTINE pdsyngst( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB,
2  $ DESCB, SCALE, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * October 15, 1999
8 *
9 * .. Scalar Arguments ..
10  CHARACTER UPLO
11  INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N
12  DOUBLE PRECISION SCALE
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCA( * ), DESCB( * )
16  DOUBLE PRECISION A( * ), B( * ), WORK( * )
17 * ..
18 *
19 * Purpose
20 *
21 * =======
22 *
23 * PDSYNGST reduces a complex Hermitian-definite generalized
24 * eigenproblem to standard form.
25 *
26 * PDSYNGST performs the same function as PDHEGST, but is based on
27 * rank 2K updates, which are faster and more scalable than
28 * triangular solves (the basis of PDSYNGST).
29 *
30 * PDSYNGST calls PDHEGST when UPLO='U', hence PDHENGST provides
31 * improved performance only when UPLO='L', IBTYPE=1.
32 *
33 * PDSYNGST also calls PDHEGST when insufficient workspace is
34 * provided, hence PDSYNGST provides improved
35 * performance only when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB
36 *
37 * In the following sub( A ) denotes A( IA:IA+N-1, JA:JA+N-1 ) and
38 * sub( B ) denotes B( IB:IB+N-1, JB:JB+N-1 ).
39 *
40 * If IBTYPE = 1, the problem is sub( A )*x = lambda*sub( B )*x,
41 * and sub( A ) is overwritten by inv(U**H)*sub( A )*inv(U) or
42 * inv(L)*sub( A )*inv(L**H)
43 *
44 * If IBTYPE = 2 or 3, the problem is sub( A )*sub( B )*x = lambda*x or
45 * sub( B )*sub( A )*x = lambda*x, and sub( A ) is overwritten by
46 * U*sub( A )*U**H or L**H*sub( A )*L.
47 *
48 * sub( B ) must have been previously factorized as U**H*U or L*L**H by
49 * PDPOTRF.
50 *
51 * Notes
52 * =====
53 *
54 * Each global data object is described by an associated description
55 * vector. This vector stores the information required to establish
56 * the mapping between an object element and its corresponding process
57 * and memory location.
58 *
59 * Let A be a generic term for any 2D block cyclicly distributed array.
60 * Such a global array has an associated description vector DESCA.
61 * In the following comments, the character _ should be read as
62 * "of the global array".
63 *
64 * NOTATION STORED IN EXPLANATION
65 * --------------- -------------- --------------------------------------
66 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
67 * DTYPE_A = 1.
68 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
69 * the BLACS process grid A is distribu-
70 * ted over. The context itself is glo-
71 * bal, but the handle (the integer
72 * value) may vary.
73 * M_A (global) DESCA( M_ ) The number of rows in the global
74 * array A.
75 * N_A (global) DESCA( N_ ) The number of columns in the global
76 * array A.
77 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
78 * the rows of the array.
79 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
80 * the columns of the array.
81 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
82 * row of the array A is distributed.
83 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
84 * first column of the array A is
85 * distributed.
86 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
87 * array. LLD_A >= MAX(1,LOCr(M_A)).
88 *
89 * Let K be the number of rows or columns of a distributed matrix,
90 * and assume that its process grid has dimension p x q.
91 * LOCr( K ) denotes the number of elements of K that a process
92 * would receive if K were distributed over the p processes of its
93 * process column.
94 * Similarly, LOCc( K ) denotes the number of elements of K that a
95 * process would receive if K were distributed over the q processes of
96 * its process row.
97 * The values of LOCr() and LOCc() may be determined via a call to the
98 * ScaLAPACK tool function, NUMROC:
99 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
100 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
101 * An upper bound for these quantities may be computed by:
102 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
103 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
104 *
105 * Arguments
106 * =========
107 *
108 * IBTYPE (global input) INTEGER
109 * = 1: compute inv(U**H)*sub( A )*inv(U) or
110 * inv(L)*sub( A )*inv(L**H);
111 * = 2 or 3: compute U*sub( A )*U**H or L**H*sub( A )*L.
112 *
113 * UPLO (global input) CHARACTER
114 * = 'U': Upper triangle of sub( A ) is stored and sub( B ) is
115 * factored as U**H*U;
116 * = 'L': Lower triangle of sub( A ) is stored and sub( B ) is
117 * factored as L*L**H.
118 *
119 * N (global input) INTEGER
120 * The order of the matrices sub( A ) and sub( B ). N >= 0.
121 *
122 * A (local input/local output) DOUBLE PRECISION pointer into the
123 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
124 * On entry, this array contains the local pieces of the
125 * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',
126 * the leading N-by-N upper triangular part of sub( A ) contains
127 * the upper triangular part of the matrix, and its strictly
128 * lower triangular part is not referenced. If UPLO = 'L', the
129 * leading N-by-N lower triangular part of sub( A ) contains
130 * the lower triangular part of the matrix, and its strictly
131 * upper triangular part is not referenced.
132 *
133 * On exit, if INFO = 0, the transformed matrix, stored in the
134 * same format as sub( A ).
135 *
136 * IA (global input) INTEGER
137 * A's global row index, which points to the beginning of the
138 * submatrix which is to be operated on.
139 *
140 * JA (global input) INTEGER
141 * A's global column index, which points to the beginning of
142 * the submatrix which is to be operated on.
143 *
144 * DESCA (global and local input) INTEGER array of dimension DLEN_.
145 * The array descriptor for the distributed matrix A.
146 *
147 * B (local input) DOUBLE PRECISION pointer into the local memory
148 * to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry,
149 * this array contains the local pieces of the triangular factor
150 * from the Cholesky factorization of sub( B ), as returned by
151 * PDPOTRF.
152 *
153 * IB (global input) INTEGER
154 * B's global row index, which points to the beginning of the
155 * submatrix which is to be operated on.
156 *
157 * JB (global input) INTEGER
158 * B's global column index, which points to the beginning of
159 * the submatrix which is to be operated on.
160 *
161 * DESCB (global and local input) INTEGER array of dimension DLEN_.
162 * The array descriptor for the distributed matrix B.
163 *
164 * SCALE (global output) DOUBLE PRECISION
165 * Amount by which the eigenvalues should be scaled to
166 * compensate for the scaling performed in this routine.
167 * At present, SCALE is always returned as 1.0, it is
168 * returned here to allow for future enhancement.
169 *
170 * WORK (local workspace/local output) DOUBLE PRECISION array,
171 * dimension (LWORK)
172 * On exit, WORK( 1 ) returns the minimal and optimal LWORK.
173 *
174 * LWORK (local or global input) INTEGER
175 * The dimension of the array WORK.
176 * LWORK is local input and must be at least
177 * LWORK >= MAX( NB * ( NP0 +1 ), 3 * NB )
178 *
179 * When IBTYPE = 1 and UPLO = 'L', PDSYNGST provides improved
180 * performance when LWORK >= 2 * NP0 * NB + NQ0 * NB + NB * NB
181 *
182 * where NB = MB_A = NB_A,
183 * NP0 = NUMROC( N, NB, 0, 0, NPROW ),
184 * NQ0 = NUMROC( N, NB, 0, 0, NPROW ),
185 *
186 * NUMROC ia a ScaLAPACK tool functions
187 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
188 * the subroutine BLACS_GRIDINFO.
189 *
190 * If LWORK = -1, then LWORK is global input and a workspace
191 * query is assumed; the routine only calculates the
192 * optimal size for all work arrays. Each of these
193 * values is returned in the first entry of the corresponding
194 * work array, and no error message is issued by PXERBLA.
195 *
196 * INFO (global output) INTEGER
197 * = 0: successful exit
198 * < 0: If the i-th argument is an array and the j-entry had
199 * an illegal value, then INFO = -(i*100+j), if the i-th
200 * argument is a scalar and had an illegal value, then
201 * INFO = -i.
202 *
203 * =====================================================================
204 *
205 *
206 *
207 * .. Parameters ..
208  DOUBLE PRECISION ONEHALF, ONE, MONE
209  parameter( onehalf = 0.5d0, one = 1.0d0, mone = -1.0d0 )
210  INTEGER DLEN_, CTXT_, MB_, NB_, RSRC_, CSRC_, LLD_
211  parameter( dlen_ = 9, ctxt_ = 2, mb_ = 5, nb_ = 6,
212  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
213 * ..
214 * .. Local Scalars ..
215  LOGICAL LQUERY, UPPER
216  INTEGER I, IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
217  $ ictxt, indaa, indg, indr, indrt, iroffa,
218  $ iroffb, j, k, kb, lwmin, lwopt, mycol, myrow,
219  $ nb, np0, npcol, npk, nprow, nq0, postk
220 * ..
221 * .. Local Arrays ..
222  INTEGER DESCAA( DLEN_ ), DESCG( DLEN_ ),
223  $ descr( dlen_ ), descrt( dlen_ ), idum1( 2 ),
224  $ idum2( 2 )
225 * ..
226 * .. External Functions ..
227  LOGICAL LSAME
228  INTEGER INDXG2P, NUMROC
229  EXTERNAL lsame, indxg2p, numroc
230 * ..
231 * .. External Subroutines ..
232  EXTERNAL blacs_gridinfo, chk1mat, descset, pchk2mat,
233  $ pdgemm, pdlacpy, pdsygst, pdsymm, pdsyr2k,
234  $ pdtrsm, pxerbla
235 * ..
236 * .. Intrinsic Functions ..
237  INTRINSIC dble, ichar, max, min, mod
238 * ..
239 * .. Executable Statements ..
240  ictxt = desca( ctxt_ )
241  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
242  scale = 1.0d0
243 *
244  nb = desca( mb_ )
245 *
246 *
247 * Test the input parameters
248 *
249  info = 0
250  IF( nprow.EQ.-1 ) THEN
251  info = -( 700+ctxt_ )
252  ELSE
253  upper = lsame( uplo, 'U' )
254  CALL chk1mat( n, 3, n, 3, ia, ja, desca, 7, info )
255  CALL chk1mat( n, 3, n, 3, ib, jb, descb, 11, info )
256  IF( info.EQ.0 ) THEN
257  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
258  $ nprow )
259  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
260  $ nprow )
261  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
262  $ npcol )
263  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
264  $ npcol )
265  iroffa = mod( ia-1, desca( mb_ ) )
266  icoffa = mod( ja-1, desca( nb_ ) )
267  iroffb = mod( ib-1, descb( mb_ ) )
268  icoffb = mod( jb-1, descb( nb_ ) )
269  np0 = numroc( n, nb, 0, 0, nprow )
270  nq0 = numroc( n, nb, 0, 0, npcol )
271  lwmin = max( nb*( np0+1 ), 3*nb )
272  IF( ibtype.EQ.1 .AND. .NOT.upper ) THEN
273  lwopt = 2*np0*nb + nq0*nb + nb*nb
274  ELSE
275  lwopt = lwmin
276  END IF
277  work( 1 ) = dble( lwopt )
278  lquery = ( lwork.EQ.-1 )
279  IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
280  info = -1
281  ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
282  info = -2
283  ELSE IF( n.LT.0 ) THEN
284  info = -3
285  ELSE IF( iroffa.NE.0 ) THEN
286  info = -5
287  ELSE IF( icoffa.NE.0 ) THEN
288  info = -6
289  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
290  info = -( 700+nb_ )
291  ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
292  info = -9
293  ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
294  info = -10
295  ELSE IF( descb( mb_ ).NE.desca( mb_ ) ) THEN
296  info = -( 1100+mb_ )
297  ELSE IF( descb( nb_ ).NE.desca( nb_ ) ) THEN
298  info = -( 1100+nb_ )
299  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
300  info = -( 1100+ctxt_ )
301  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
302  info = -13
303  END IF
304  END IF
305  idum1( 1 ) = ibtype
306  idum2( 1 ) = 1
307  IF( upper ) THEN
308  idum1( 2 ) = ichar( 'U' )
309  ELSE
310  idum1( 2 ) = ichar( 'L' )
311  END IF
312  idum2( 2 ) = 2
313  CALL pchk2mat( n, 3, n, 3, ia, ja, desca, 7, n, 3, n, 3, ib,
314  $ jb, descb, 11, 2, idum1, idum2, info )
315  END IF
316 *
317  IF( info.NE.0 ) THEN
318  CALL pxerbla( ictxt, 'PDSYNGST', -info )
319  RETURN
320  ELSE IF( lquery ) THEN
321  RETURN
322  END IF
323 *
324 * Quick return if possible
325 *
326  IF( n.EQ.0 )
327  $ RETURN
328 *
329 *
330  IF( ibtype.NE.1 .OR. upper .OR. lwork.LT.lwopt ) THEN
331  CALL pdsygst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
332  $ descb, scale, info )
333  RETURN
334  END IF
335 *
336  CALL descset( descg, n, nb, nb, nb, iarow, iacol, ictxt, np0 )
337  CALL descset( descr, n, nb, nb, nb, iarow, iacol, ictxt, np0 )
338  CALL descset( descrt, nb, n, nb, nb, iarow, iacol, ictxt, nb )
339  CALL descset( descaa, nb, nb, nb, nb, iarow, iacol, ictxt, nb )
340 *
341  indg = 1
342  indr = indg + descg( lld_ )*nb
343  indaa = indr + descr( lld_ )*nb
344  indrt = indaa + descaa( lld_ )*nb
345 *
346  DO 30 k = 1, n, nb
347 *
348  kb = min( n-k+1, nb )
349  postk = k + kb
350  npk = n - postk + 1
351 *
352 *
353  CALL pdlacpy( 'A', n-postk+1, kb, b, postk+ib-1, k+jb-1, descb,
354  $ work( indg ), postk, 1, descg )
355  CALL pdlacpy( 'A', n-postk+1, kb, a, postk+ia-1, k+ja-1, desca,
356  $ work( indr ), postk, 1, descr )
357  CALL pdlacpy( 'A', kb, k-1, a, k+ia-1, ja, desca,
358  $ work( indrt ), 1, 1, descrt )
359 *
360  CALL pdlacpy( 'L', kb, kb, a, k+ia-1, k+ja-1, desca,
361  $ work( indr ), k, 1, descr )
362  CALL pdtrsm( 'Right', 'L', 'N', 'N', npk, kb, mone, b, k+ib-1,
363  $ k+jb-1, descb, work( indg ), postk, 1, descg )
364 *
365  CALL pdsymm( 'Right', 'L', npk, kb, onehalf, a, k+ia-1, k+ja-1,
366  $ desca, work( indg ), postk, 1, descg, one,
367  $ work( indr ), postk, 1, descr )
368 *
369  CALL pdsyr2k( 'Lower', 'No T', npk, kb, one, work( indg ),
370  $ postk, 1, descg, work( indr ), postk, 1, descr,
371  $ one, a, postk+ia-1, postk+ja-1, desca )
372 *
373  CALL pdgemm( 'No T', 'No Conj', npk, k-1, kb, one,
374  $ work( indg ), postk, 1, descg, work( indrt ), 1,
375  $ 1, descrt, one, a, postk+ia-1, ja, desca )
376 *
377  CALL pdsymm( 'Right', 'L', npk, kb, one, work( indr ), k, 1,
378  $ descr, work( indg ), postk, 1, descg, one, a,
379  $ postk+ia-1, k+ja-1, desca )
380 *
381  CALL pdtrsm( 'Left', 'Lower', 'No Conj', 'Non-unit', kb, k-1,
382  $ one, b, k+ib-1, k+jb-1, descb, a, k+ia-1, ja,
383  $ desca )
384 *
385  CALL pdlacpy( 'L', kb, kb, a, k+ia-1, k+ja-1, desca,
386  $ work( indaa ), 1, 1, descaa )
387 *
388  IF( myrow.EQ.descaa( rsrc_ ) .AND. mycol.EQ.descaa( csrc_ ) )
389  $ THEN
390  DO 20 i = 1, kb
391  DO 10 j = 1, i
392  work( indaa+j-1+( i-1 )*descaa( lld_ ) )
393  $ = work( indaa+i-1+( j-1 )*descaa( lld_ ) )
394  10 CONTINUE
395  20 CONTINUE
396  END IF
397 *
398  CALL pdtrsm( 'Left', 'Lower', 'No Conj', 'Non-unit', kb, kb,
399  $ one, b, k+ib-1, k+jb-1, descb, work( indaa ), 1,
400  $ 1, descaa )
401 *
402  CALL pdtrsm( 'Right', 'Lower', 'Conj', 'Non-unit', kb, kb, one,
403  $ b, k+ib-1, k+jb-1, descb, work( indaa ), 1, 1,
404  $ descaa )
405 *
406  CALL pdlacpy( 'L', kb, kb, work( indaa ), 1, 1, descaa, a,
407  $ k+ia-1, k+ja-1, desca )
408 *
409  CALL pdtrsm( 'Right', 'Lower', 'Conj', 'Non-unit', npk, kb,
410  $ one, b, k+ib-1, k+jb-1, descb, a, postk+ia-1,
411  $ k+ja-1, desca )
412 *
413  descr( csrc_ ) = mod( descr( csrc_ )+1, npcol )
414  descg( csrc_ ) = mod( descg( csrc_ )+1, npcol )
415  descrt( rsrc_ ) = mod( descrt( rsrc_ )+1, nprow )
416  descaa( rsrc_ ) = mod( descaa( rsrc_ )+1, nprow )
417  descaa( csrc_ ) = mod( descaa( csrc_ )+1, npcol )
418  30 CONTINUE
419 *
420  work( 1 ) = dble( lwopt )
421 *
422  RETURN
423  END
max
#define max(A, B)
Definition: pcgemr.c:180
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pdsyngst
subroutine pdsyngst(IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, SCALE, WORK, LWORK, INFO)
Definition: pdsyngst.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pdsygst
subroutine pdsygst(IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, SCALE, INFO)
Definition: pdsygst.f:5
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pdlacpy
subroutine pdlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacpy.f:3
min
#define min(A, B)
Definition: pcgemr.c:181