ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pssytd2.f
Go to the documentation of this file.
1  SUBROUTINE pssytd2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2  $ LWORK, INFO )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER UPLO
11  INTEGER IA, INFO, JA, LWORK, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * )
15  REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PSSYTD2 reduces a real symmetric matrix sub( A ) to symmetric
22 * tridiagonal form T by an orthogonal similarity transformation:
23 * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
24 *
25 * Notes
26 * =====
27 *
28 * Each global data object is described by an associated description
29 * vector. This vector stores the information required to establish
30 * the mapping between an object element and its corresponding process
31 * and memory location.
32 *
33 * Let A be a generic term for any 2D block cyclicly distributed array.
34 * Such a global array has an associated description vector DESCA.
35 * In the following comments, the character _ should be read as
36 * "of the global array".
37 *
38 * NOTATION STORED IN EXPLANATION
39 * --------------- -------------- --------------------------------------
40 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
41 * DTYPE_A = 1.
42 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
43 * the BLACS process grid A is distribu-
44 * ted over. The context itself is glo-
45 * bal, but the handle (the integer
46 * value) may vary.
47 * M_A (global) DESCA( M_ ) The number of rows in the global
48 * array A.
49 * N_A (global) DESCA( N_ ) The number of columns in the global
50 * array A.
51 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
52 * the rows of the array.
53 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
54 * the columns of the array.
55 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
56 * row of the array A is distributed.
57 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
58 * first column of the array A is
59 * distributed.
60 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
61 * array. LLD_A >= MAX(1,LOCr(M_A)).
62 *
63 * Let K be the number of rows or columns of a distributed matrix,
64 * and assume that its process grid has dimension p x q.
65 * LOCr( K ) denotes the number of elements of K that a process
66 * would receive if K were distributed over the p processes of its
67 * process column.
68 * Similarly, LOCc( K ) denotes the number of elements of K that a
69 * process would receive if K were distributed over the q processes of
70 * its process row.
71 * The values of LOCr() and LOCc() may be determined via a call to the
72 * ScaLAPACK tool function, NUMROC:
73 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
74 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
75 * An upper bound for these quantities may be computed by:
76 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
77 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
78 *
79 * Arguments
80 * =========
81 *
82 * UPLO (global input) CHARACTER
83 * Specifies whether the upper or lower triangular part of the
84 * symmetric matrix sub( A ) is stored:
85 * = 'U': Upper triangular
86 * = 'L': Lower triangular
87 *
88 * N (global input) INTEGER
89 * The number of rows and columns to be operated on, i.e. the
90 * order of the distributed submatrix sub( A ). N >= 0.
91 *
92 * A (local input/local output) REAL pointer into the
93 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
94 * On entry, this array contains the local pieces of the
95 * symmetric distributed matrix sub( A ). If UPLO = 'U', the
96 * leading N-by-N upper triangular part of sub( A ) contains
97 * the upper triangular part of the matrix, and its strictly
98 * lower triangular part is not referenced. If UPLO = 'L', the
99 * leading N-by-N lower triangular part of sub( A ) contains the
100 * lower triangular part of the matrix, and its strictly upper
101 * triangular part is not referenced. On exit, if UPLO = 'U',
102 * the diagonal and first superdiagonal of sub( A ) are over-
103 * written by the corresponding elements of the tridiagonal
104 * matrix T, and the elements above the first superdiagonal,
105 * with the array TAU, represent the orthogonal matrix Q as a
106 * product of elementary reflectors; if UPLO = 'L', the diagonal
107 * and first subdiagonal of sub( A ) are overwritten by the
108 * corresponding elements of the tridiagonal matrix T, and the
109 * elements below the first subdiagonal, with the array TAU,
110 * represent the orthogonal matrix Q as a product of elementary
111 * reflectors. See Further Details.
112 *
113 * IA (global input) INTEGER
114 * The row index in the global array A indicating the first
115 * row of sub( A ).
116 *
117 * JA (global input) INTEGER
118 * The column index in the global array A indicating the
119 * first column of sub( A ).
120 *
121 * DESCA (global and local input) INTEGER array of dimension DLEN_.
122 * The array descriptor for the distributed matrix A.
123 *
124 * D (local output) REAL array, dimension LOCc(JA+N-1)
125 * The diagonal elements of the tridiagonal matrix T:
126 * D(i) = A(i,i). D is tied to the distributed matrix A.
127 *
128 * E (local output) REAL array, dimension LOCc(JA+N-1)
129 * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal
130 * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
131 * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
132 * distributed matrix A.
133 *
134 * TAU (local output) REAL, array, dimension
135 * LOCc(JA+N-1). This array contains the scalar factors TAU of
136 * the elementary reflectors. TAU is tied to the distributed
137 * matrix A.
138 *
139 * WORK (local workspace/local output) REAL array,
140 * dimension (LWORK)
141 * On exit, WORK( 1 ) returns the minimal and optimal LWORK.
142 *
143 * LWORK (local or global input) INTEGER
144 * The dimension of the array WORK.
145 * LWORK is local input and must be at least
146 * LWORK >= 3*N.
147 *
148 * If LWORK = -1, then LWORK is global input and a workspace
149 * query is assumed; the routine only calculates the minimum
150 * and optimal size for all work arrays. Each of these
151 * values is returned in the first entry of the corresponding
152 * work array, and no error message is issued by PXERBLA.
153 *
154 * INFO (local output) INTEGER
155 * = 0: successful exit
156 * < 0: If the i-th argument is an array and the j-entry had
157 * an illegal value, then INFO = -(i*100+j), if the i-th
158 * argument is a scalar and had an illegal value, then
159 * INFO = -i.
160 *
161 * Further Details
162 * ===============
163 *
164 * If UPLO = 'U', the matrix Q is represented as a product of elementary
165 * reflectors
166 *
167 * Q = H(n-1) . . . H(2) H(1).
168 *
169 * Each H(i) has the form
170 *
171 * H(i) = I - tau * v * v'
172 *
173 * where tau is a real scalar, and v is a real vector with
174 * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
175 * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
176 *
177 * If UPLO = 'L', the matrix Q is represented as a product of elementary
178 * reflectors
179 *
180 * Q = H(1) H(2) . . . H(n-1).
181 *
182 * Each H(i) has the form
183 *
184 * H(i) = I - tau * v * v'
185 *
186 * where tau is a real scalar, and v is a real vector with
187 * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
188 * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
189 *
190 * The contents of sub( A ) on exit are illustrated by the following
191 * examples with n = 5:
192 *
193 * if UPLO = 'U': if UPLO = 'L':
194 *
195 * ( d e v2 v3 v4 ) ( d )
196 * ( d e v3 v4 ) ( e d )
197 * ( d e v4 ) ( v1 e d )
198 * ( d e ) ( v1 v2 e d )
199 * ( d ) ( v1 v2 v3 e d )
200 *
201 * where d and e denote diagonal and off-diagonal elements of T, and vi
202 * denotes an element of the vector defining H(i).
203 *
204 * Alignment requirements
205 * ======================
206 *
207 * The distributed submatrix sub( A ) must verify some alignment proper-
208 * ties, namely the following expression should be true:
209 * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) with
210 * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
211 *
212 * =====================================================================
213 *
214 * .. Parameters ..
215  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
216  $ lld_, mb_, m_, nb_, n_, rsrc_
217  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
218  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
219  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
220  REAL HALF, ONE, ZERO
221  parameter( half = 0.5e+0, one = 1.0e+0, zero = 0.0e+0 )
222 * ..
223 * .. Local Scalars ..
224  LOGICAL LQUERY, UPPER
225  INTEGER IACOL, IAROW, ICOFFA, ICTXT, II, IK, IROFFA, J,
226  $ jj, jk, jn, lda, lwmin, mycol, myrow, npcol,
227  $ nprow
228  REAL ALPHA, TAUI
229 * ..
230 * .. External Subroutines ..
231  EXTERNAL blacs_abort, blacs_gridinfo, chk1mat, infog2l,
232  $ pxerbla, saxpy, sgebr2d, sgebs2d,
233  $ slarfg, ssymv, ssyr2
234 * ..
235 * .. External Functions ..
236  LOGICAL LSAME
237  REAL SDOT
238  EXTERNAL lsame, sdot
239 * ..
240 * .. Intrinsic Functions ..
241  INTRINSIC real
242 * ..
243 * .. Executable Statements ..
244 *
245 * Get grid parameters
246 *
247  ictxt = desca( ctxt_ )
248  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
249 *
250 * Test the input parameters
251 *
252  info = 0
253  IF( nprow.EQ.-1 ) THEN
254  info = -(600+ctxt_)
255  ELSE
256  upper = lsame( uplo, 'U' )
257  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
258  lwmin = 3 * n
259 *
260  work( 1 ) = real( lwmin )
261  lquery = ( lwork.EQ.-1 )
262  IF( info.EQ.0 ) THEN
263  iroffa = mod( ia-1, desca( mb_ ) )
264  icoffa = mod( ja-1, desca( nb_ ) )
265  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
266  info = -1
267  ELSE IF( iroffa.NE.icoffa ) THEN
268  info = -5
269  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
270  info = -(600+nb_)
271  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
272  info = -11
273  END IF
274  END IF
275  END IF
276 *
277  IF( info.NE.0 ) THEN
278  CALL pxerbla( ictxt, 'PSSYTD2', -info )
279  CALL blacs_abort( ictxt, 1 )
280  RETURN
281  ELSE IF( lquery ) THEN
282  RETURN
283  END IF
284 *
285 * Quick return if possible
286 *
287  IF( n.LE.0 )
288  $ RETURN
289 *
290 * Compute local information
291 *
292  lda = desca( lld_ )
293  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
294  $ iarow, iacol )
295 *
296  IF( upper ) THEN
297 *
298 * Process(IAROW, IACOL) owns block to be reduced
299 *
300  IF( mycol.EQ.iacol ) THEN
301  IF( myrow.EQ.iarow ) THEN
302 *
303 * Reduce the upper triangle of sub( A )
304 *
305  DO 10 j = n-1, 1, -1
306  ik = ii + j - 1
307  jk = jj + j - 1
308 *
309 * Generate elementary reflector H(i) = I - tau * v * v'
310 * to annihilate A(IA:IA+J-1,JA:JA+J-1)
311 *
312  CALL slarfg( j, a( ik+jk*lda ), a( ii+jk*lda ), 1,
313  $ taui )
314  e( jk+1 ) = a( ik+jk*lda )
315 *
316  IF( taui.NE.zero ) THEN
317 *
318 * Apply H(i) from both sides to
319 * A(IA:IA+J-1,JA:JA+J-1)
320 *
321  a( ik+jk*lda ) = one
322 *
323 * Compute x := tau * A * v storing x in TAU(1:i)
324 *
325  CALL ssymv( uplo, j, taui, a( ii+(jj-1)*lda ),
326  $ lda, a( ii+jk*lda ), 1, zero,
327  $ tau( jj ), 1 )
328 *
329 * Compute w := x - 1/2 * tau * (x'*v) * v
330 *
331  alpha = -half*taui*sdot( j, tau( jj ), 1,
332  $ a( ii+jk*lda ), 1 )
333  CALL saxpy( j, alpha, a( ii+jk*lda ), 1,
334  $ tau( jj ), 1 )
335 *
336 * Apply the transformation as a rank-2 update:
337 * A := A - v * w' - w * v'
338 *
339  CALL ssyr2( uplo, j, -one, a( ii+jk*lda ), 1,
340  $ tau( jj ), 1, a( ii+(jj-1)*lda ),
341  $ lda )
342  a( ik+jk*lda ) = e( jk+1 )
343  END IF
344 *
345 * Copy D, E, TAU to broadcast them columnwise.
346 *
347  d( jk+1 ) = a( ik+1+jk*lda )
348  work( j+1 ) = d( jk+1 )
349  work( n+j+1 ) = e( jk+1 )
350  tau( jk+1 ) = taui
351  work( 2*n+j+1 ) = tau( jk+1 )
352 *
353  10 CONTINUE
354  d( jj ) = a( ii+(jj-1)*lda )
355  work( 1 ) = d( jj )
356  work( n+1 ) = zero
357  work( 2*n+1 ) = zero
358 *
359  CALL sgebs2d( ictxt, 'Columnwise', ' ', 1, 3*n, work, 1 )
360 *
361  ELSE
362  CALL sgebr2d( ictxt, 'Columnwise', ' ', 1, 3*n, work, 1,
363  $ iarow, iacol )
364  DO 20 j = 2, n
365  jn = jj + j - 1
366  d( jn ) = work( j )
367  e( jn ) = work( n+j )
368  tau( jn ) = work( 2*n+j )
369  20 CONTINUE
370  d( jj ) = work( 1 )
371  END IF
372  END IF
373 *
374  ELSE
375 *
376 * Process (IAROW, IACOL) owns block to be factorized
377 *
378  IF( mycol.EQ.iacol ) THEN
379  IF( myrow.EQ.iarow ) THEN
380 *
381 * Reduce the lower triangle of sub( A )
382 *
383  DO 30 j = 1, n - 1
384  ik = ii + j - 1
385  jk = jj + j - 1
386 *
387 * Generate elementary reflector H(i) = I - tau * v * v'
388 * to annihilate A(IA+J-JA+2:IA+N-1,JA+J-1)
389 *
390  CALL slarfg( n-j, a( ik+1+(jk-1)*lda ),
391  $ a( ik+2+(jk-1)*lda ), 1, taui )
392  e( jk ) = a( ik+1+(jk-1)*lda )
393 *
394  IF( taui.NE.zero ) THEN
395 *
396 * Apply H(i) from both sides to
397 * A(IA+J-JA+1:IA+N-1,JA+J+1:JA+N-1)
398 *
399  a( ik+1+(jk-1)*lda ) = one
400 *
401 * Compute x := tau * A * v storing y in TAU(i:n-1)
402 *
403  CALL ssymv( uplo, n-j, taui, a( ik+1+jk*lda ),
404  $ lda, a( ik+1+(jk-1)*lda ), 1,
405  $ zero, tau( jk ), 1 )
406 *
407 * Compute w := x - 1/2 * tau * (x'*v) * v
408 *
409  alpha = -half*taui*sdot( n-j, tau( jk ), 1,
410  $ a( ik+1+(jk-1)*lda ), 1 )
411  CALL saxpy( n-j, alpha, a( ik+1+(jk-1)*lda ),
412  $ 1, tau( jk ), 1 )
413 *
414 * Apply the transformation as a rank-2 update:
415 * A := A - v * w' - w * v'
416 *
417  CALL ssyr2( uplo, n-j, -one,
418  $ a( ik+1+(jk-1)*lda ), 1,
419  $ tau( jk ), 1, a( ik+1+jk*lda ),
420  $ lda )
421  a( ik+1+(jk-1)*lda ) = e( jk )
422  END IF
423 *
424 * Copy D(JK), E(JK), TAU(JK) to broadcast them
425 * columnwise.
426 *
427  d( jk ) = a( ik+(jk-1)*lda )
428  work( j ) = d( jk )
429  work( n+j ) = e( jk )
430  tau( jk ) = taui
431  work( 2*n+j ) = tau( jk )
432  30 CONTINUE
433  jn = jj + n - 1
434  d( jn ) = a( ii+n-1+(jn-1)*lda )
435  work( n ) = d( jn )
436  tau( jn ) = zero
437  work( 2*n ) = zero
438 *
439  CALL sgebs2d( ictxt, 'Columnwise', ' ', 1, 3*n-1, work,
440  $ 1 )
441 *
442  ELSE
443  CALL sgebr2d( ictxt, 'Columnwise', ' ', 1, 3*n-1, work,
444  $ 1, iarow, iacol )
445  DO 40 j = 1, n - 1
446  jn = jj + j - 1
447  d( jn ) = work( j )
448  e( jn ) = work( n+j )
449  tau( jn ) = work( 2*n+j )
450  40 CONTINUE
451  jn = jj + n - 1
452  d( jn ) = work( n )
453  tau( jn ) = zero
454  END IF
455  END IF
456  END IF
457 *
458  work( 1 ) = real( lwmin )
459 *
460  RETURN
461 *
462 * End of PSSYTD2
463 *
464  END
pssytd2
subroutine pssytd2(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pssytd2.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2