ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pchetd2.f
Go to the documentation of this file.
1  SUBROUTINE pchetd2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2  $ LWORK, INFO )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER UPLO
11  INTEGER IA, INFO, JA, LWORK, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * )
15  REAL D( * ), E( * )
16  COMPLEX A( * ), TAU( * ), WORK( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * PCHETD2 reduces a complex Hermitian matrix sub( A ) to Hermitian
23 * tridiagonal form T by an unitary similarity transformation:
24 * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
25 *
26 * Notes
27 * =====
28 *
29 * Each global data object is described by an associated description
30 * vector. This vector stores the information required to establish
31 * the mapping between an object element and its corresponding process
32 * and memory location.
33 *
34 * Let A be a generic term for any 2D block cyclicly distributed array.
35 * Such a global array has an associated description vector DESCA.
36 * In the following comments, the character _ should be read as
37 * "of the global array".
38 *
39 * NOTATION STORED IN EXPLANATION
40 * --------------- -------------- --------------------------------------
41 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42 * DTYPE_A = 1.
43 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44 * the BLACS process grid A is distribu-
45 * ted over. The context itself is glo-
46 * bal, but the handle (the integer
47 * value) may vary.
48 * M_A (global) DESCA( M_ ) The number of rows in the global
49 * array A.
50 * N_A (global) DESCA( N_ ) The number of columns in the global
51 * array A.
52 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
53 * the rows of the array.
54 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
55 * the columns of the array.
56 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57 * row of the array A is distributed.
58 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59 * first column of the array A is
60 * distributed.
61 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62 * array. LLD_A >= MAX(1,LOCr(M_A)).
63 *
64 * Let K be the number of rows or columns of a distributed matrix,
65 * and assume that its process grid has dimension p x q.
66 * LOCr( K ) denotes the number of elements of K that a process
67 * would receive if K were distributed over the p processes of its
68 * process column.
69 * Similarly, LOCc( K ) denotes the number of elements of K that a
70 * process would receive if K were distributed over the q processes of
71 * its process row.
72 * The values of LOCr() and LOCc() may be determined via a call to the
73 * ScaLAPACK tool function, NUMROC:
74 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76 * An upper bound for these quantities may be computed by:
77 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79 *
80 * Arguments
81 * =========
82 *
83 * UPLO (global input) CHARACTER
84 * Specifies whether the upper or lower triangular part of the
85 * Hermitian matrix sub( A ) is stored:
86 * = 'U': Upper triangular
87 * = 'L': Lower triangular
88 *
89 * N (global input) INTEGER
90 * The number of rows and columns to be operated on, i.e. the
91 * order of the distributed submatrix sub( A ). N >= 0.
92 *
93 * A (local input/local output) COMPLEX pointer into the
94 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
95 * On entry, this array contains the local pieces of the
96 * Hermitian distributed matrix sub( A ). If UPLO = 'U', the
97 * leading N-by-N upper triangular part of sub( A ) contains
98 * the upper triangular part of the matrix, and its strictly
99 * lower triangular part is not referenced. If UPLO = 'L', the
100 * leading N-by-N lower triangular part of sub( A ) contains the
101 * lower triangular part of the matrix, and its strictly upper
102 * triangular part is not referenced. On exit, if UPLO = 'U',
103 * the diagonal and first superdiagonal of sub( A ) are over-
104 * written by the corresponding elements of the tridiagonal
105 * matrix T, and the elements above the first superdiagonal,
106 * with the array TAU, represent the unitary matrix Q as a
107 * product of elementary reflectors; if UPLO = 'L', the diagonal
108 * and first subdiagonal of sub( A ) are overwritten by the
109 * corresponding elements of the tridiagonal matrix T, and the
110 * elements below the first subdiagonal, with the array TAU,
111 * represent the unitary matrix Q as a product of elementary
112 * reflectors. See Further Details.
113 *
114 * IA (global input) INTEGER
115 * The row index in the global array A indicating the first
116 * row of sub( A ).
117 *
118 * JA (global input) INTEGER
119 * The column index in the global array A indicating the
120 * first column of sub( A ).
121 *
122 * DESCA (global and local input) INTEGER array of dimension DLEN_.
123 * The array descriptor for the distributed matrix A.
124 *
125 * D (local output) REAL array, dimension LOCc(JA+N-1)
126 * The diagonal elements of the tridiagonal matrix T:
127 * D(i) = A(i,i). D is tied to the distributed matrix A.
128 *
129 * E (local output) REAL array, dimension LOCc(JA+N-1)
130 * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal
131 * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
132 * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
133 * distributed matrix A.
134 *
135 * TAU (local output) COMPLEX, array, dimension
136 * LOCc(JA+N-1). This array contains the scalar factors TAU of
137 * the elementary reflectors. TAU is tied to the distributed
138 * matrix A.
139 *
140 * WORK (local workspace/local output) COMPLEX array,
141 * dimension (LWORK)
142 * On exit, WORK( 1 ) returns the minimal and optimal LWORK.
143 *
144 * LWORK (local or global input) INTEGER
145 * The dimension of the array WORK.
146 * LWORK is local input and must be at least
147 * LWORK >= 3*N.
148 *
149 * If LWORK = -1, then LWORK is global input and a workspace
150 * query is assumed; the routine only calculates the minimum
151 * and optimal size for all work arrays. Each of these
152 * values is returned in the first entry of the corresponding
153 * work array, and no error message is issued by PXERBLA.
154 *
155 * INFO (local output) INTEGER
156 * = 0: successful exit
157 * < 0: If the i-th argument is an array and the j-entry had
158 * an illegal value, then INFO = -(i*100+j), if the i-th
159 * argument is a scalar and had an illegal value, then
160 * INFO = -i.
161 *
162 * Further Details
163 * ===============
164 *
165 * If UPLO = 'U', the matrix Q is represented as a product of elementary
166 * reflectors
167 *
168 * Q = H(n-1) . . . H(2) H(1).
169 *
170 * Each H(i) has the form
171 *
172 * H(i) = I - tau * v * v'
173 *
174 * where tau is a complex scalar, and v is a complex vector with
175 * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
176 * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
177 *
178 * If UPLO = 'L', the matrix Q is represented as a product of elementary
179 * reflectors
180 *
181 * Q = H(1) H(2) . . . H(n-1).
182 *
183 * Each H(i) has the form
184 *
185 * H(i) = I - tau * v * v'
186 *
187 * where tau is a complex scalar, and v is a complex vector with
188 * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
189 * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
190 *
191 * The contents of sub( A ) on exit are illustrated by the following
192 * examples with n = 5:
193 *
194 * if UPLO = 'U': if UPLO = 'L':
195 *
196 * ( d e v2 v3 v4 ) ( d )
197 * ( d e v3 v4 ) ( e d )
198 * ( d e v4 ) ( v1 e d )
199 * ( d e ) ( v1 v2 e d )
200 * ( d ) ( v1 v2 v3 e d )
201 *
202 * where d and e denote diagonal and off-diagonal elements of T, and vi
203 * denotes an element of the vector defining H(i).
204 *
205 * Alignment requirements
206 * ======================
207 *
208 * The distributed submatrix sub( A ) must verify some alignment proper-
209 * ties, namely the following expression should be true:
210 * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA ) with
211 * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
212 *
213 * =====================================================================
214 *
215 * .. Parameters ..
216  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217  $ lld_, mb_, m_, nb_, n_, rsrc_
218  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221  COMPLEX HALF, ONE, ZERO
222  parameter( half = ( 0.5e+0, 0.0e+0 ),
223  $ one = ( 1.0e+0, 0.0e+0 ),
224  $ zero = ( 0.0e+0, 0.0e+0 ) )
225 * ..
226 * .. Local Scalars ..
227  LOGICAL LQUERY, UPPER
228  INTEGER IACOL, IAROW, ICOFFA, ICTXT, II, IK, IROFFA, J,
229  $ jj, jk, jn, lda, lwmin, mycol, myrow, npcol,
230  $ nprow
231  COMPLEX ALPHA, TAUI
232 * ..
233 * .. External Subroutines ..
234  EXTERNAL blacs_abort, blacs_gridinfo, caxpy, cgebr2d,
235  $ cgebs2d, chk1mat, chemv,
236  $ cher2, clarfg, infog2l, pxerbla
237 * ..
238 * .. External Functions ..
239  LOGICAL LSAME
240  COMPLEX CDOTC
241  EXTERNAL lsame, cdotc
242 * ..
243 * .. Intrinsic Functions ..
244  INTRINSIC cmplx, real
245 * ..
246 * .. Executable Statements ..
247 *
248 * Get grid parameters
249 *
250  ictxt = desca( ctxt_ )
251  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
252 *
253 * Test the input parameters
254 *
255  info = 0
256  IF( nprow.EQ.-1 ) THEN
257  info = -(600+ctxt_)
258  ELSE
259  upper = lsame( uplo, 'U' )
260  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
261  lwmin = 3 * n
262 *
263  work( 1 ) = cmplx( real( lwmin ) )
264  lquery = ( lwork.EQ.-1 )
265  IF( info.EQ.0 ) THEN
266  iroffa = mod( ia-1, desca( mb_ ) )
267  icoffa = mod( ja-1, desca( nb_ ) )
268  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
269  info = -1
270  ELSE IF( iroffa.NE.icoffa ) THEN
271  info = -5
272  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
273  info = -(600+nb_)
274  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
275  info = -11
276  END IF
277  END IF
278  END IF
279 *
280  IF( info.NE.0 ) THEN
281  CALL pxerbla( ictxt, 'PCHETD2', -info )
282  CALL blacs_abort( ictxt, 1 )
283  RETURN
284  ELSE IF( lquery ) THEN
285  RETURN
286  END IF
287 *
288 * Quick return if possible
289 *
290  IF( n.LE.0 )
291  $ RETURN
292 *
293 * Compute local information
294 *
295  lda = desca( lld_ )
296  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
297  $ iarow, iacol )
298 *
299  IF( upper ) THEN
300 *
301 * Process(IAROW, IACOL) owns block to be reduced
302 *
303  IF( mycol.EQ.iacol ) THEN
304  IF( myrow.EQ.iarow ) THEN
305 *
306 * Reduce the upper triangle of sub( A )
307 *
308  ik = ii+n-1+(jj+n-2)*lda
309  a( ik ) = real( a( ik ) )
310  DO 10 j = n-1, 1, -1
311  ik = ii + j - 1
312  jk = jj + j - 1
313 *
314 * Generate elementary reflector H(i) = I - tau * v * v'
315 * to annihilate A(IA:IA+J-1,JA:JA+J-1)
316 *
317  alpha = a( ik+jk*lda )
318  CALL clarfg( j, alpha, a( ii+jk*lda ), 1, taui )
319  e( jk+1 ) = real( alpha )
320 *
321  IF( taui.NE.zero ) THEN
322 *
323 * Apply H(i) from both sides to
324 * A(IA:IA+J-1,JA:JA+J-1)
325 *
326  a( ik+jk*lda ) = one
327 *
328 * Compute x := tau * A * v storing x in TAU(1:i)
329 *
330  CALL chemv( uplo, j, taui, a( ii+(jj-1)*lda ),
331  $ lda, a( ii+jk*lda ), 1, zero,
332  $ tau( jj ), 1 )
333 *
334 * Compute w := x - 1/2 * tau * (x'*v) * v
335 *
336  alpha = -half*taui*cdotc( j, tau( jj ), 1,
337  $ a( ii+jk*lda ), 1 )
338  CALL caxpy( j, alpha, a( ii+jk*lda ), 1,
339  $ tau( jj ), 1 )
340 *
341 * Apply the transformation as a rank-2 update:
342 * A := A - v * w' - w * v'
343 *
344  CALL cher2( uplo, j, -one, a( ii+jk*lda ), 1,
345  $ tau( jj ), 1, a( ii+(jj-1)*lda ),
346  $ lda )
347  END IF
348 *
349 * Copy D, E, TAU to broadcast them columnwise.
350 *
351  a( ik+jk*lda ) = cmplx( e( jk+1 ) )
352  d( jk+1 ) = real( a( ik+1+jk*lda ) )
353  work( j+1 ) = cmplx( d( jk+1 ) )
354  work( n+j+1 ) = cmplx( e( jk+1 ) )
355  tau( jk+1 ) = taui
356  work( 2*n+j+1 ) = tau( jk+1 )
357 *
358  10 CONTINUE
359  d( jj ) = real( a( ii+(jj-1)*lda ) )
360  work( 1 ) = cmplx( d( jj ) )
361  work( n+1 ) = zero
362  work( 2*n+1 ) = zero
363 *
364  CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 3*n, work, 1 )
365 *
366  ELSE
367  CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 3*n, work, 1,
368  $ iarow, iacol )
369  DO 20 j = 2, n
370  jn = jj + j - 1
371  d( jn ) = real( work( j ) )
372  e( jn ) = real( work( n+j ) )
373  tau( jn ) = work( 2*n+j )
374  20 CONTINUE
375  d( jj ) = real( work( 1 ) )
376  END IF
377  END IF
378 *
379  ELSE
380 *
381 * Process (IAROW, IACOL) owns block to be factorized
382 *
383  IF( mycol.EQ.iacol ) THEN
384  IF( myrow.EQ.iarow ) THEN
385 *
386 * Reduce the lower triangle of sub( A )
387 *
388  a( ii+(jj-1)*lda ) = real( a( ii+(jj-1)*lda ) )
389  DO 30 j = 1, n - 1
390  ik = ii + j - 1
391  jk = jj + j - 1
392 *
393 * Generate elementary reflector H(i) = I - tau * v * v'
394 * to annihilate A(IA+J-JA+2:IA+N-1,JA+J-1)
395 *
396  alpha = a( ik+1+(jk-1)*lda )
397  CALL clarfg( n-j, alpha, a( ik+2+(jk-1)*lda ), 1,
398  $ taui )
399  e( jk ) = real( alpha )
400 *
401  IF( taui.NE.zero ) THEN
402 *
403 * Apply H(i) from both sides to
404 * A(IA+J-JA+1:IA+N-1,JA+J+1:JA+N-1)
405 *
406  a( ik+1+(jk-1)*lda ) = one
407 *
408 * Compute x := tau * A * v storing y in TAU(i:n-1)
409 *
410  CALL chemv( uplo, n-j, taui, a( ik+1+jk*lda ),
411  $ lda, a( ik+1+(jk-1)*lda ), 1,
412  $ zero, tau( jk ), 1 )
413 *
414 * Compute w := x - 1/2 * tau * (x'*v) * v
415 *
416  alpha = -half*taui*cdotc( n-j, tau( jk ), 1,
417  $ a( ik+1+(jk-1)*lda ), 1 )
418  CALL caxpy( n-j, alpha, a( ik+1+(jk-1)*lda ),
419  $ 1, tau( jk ), 1 )
420 *
421 * Apply the transformation as a rank-2 update:
422 * A := A - v * w' - w * v'
423 *
424  CALL cher2( uplo, n-j, -one,
425  $ a( ik+1+(jk-1)*lda ), 1,
426  $ tau( jk ), 1, a( ik+1+jk*lda ),
427  $ lda )
428  END IF
429 *
430 * Copy D(JK), E(JK), TAU(JK) to broadcast them
431 * columnwise.
432 *
433  a( ik+1+(jk-1)*lda ) = cmplx( e( jk ) )
434  d( jk ) = real( a( ik+(jk-1)*lda ) )
435  work( j ) = cmplx( d( jk ) )
436  work( n+j ) = cmplx( e( jk ) )
437  tau( jk ) = taui
438  work( 2*n+j ) = tau( jk )
439  30 CONTINUE
440  jn = jj + n - 1
441  d( jn ) = real( a( ii+n-1+(jn-1)*lda ) )
442  work( n ) = cmplx( d( jn ) )
443  tau( jn ) = zero
444  work( 2*n ) = zero
445 *
446  CALL cgebs2d( ictxt, 'Columnwise', ' ', 1, 3*n-1, work,
447  $ 1 )
448 *
449  ELSE
450  CALL cgebr2d( ictxt, 'Columnwise', ' ', 1, 3*n-1, work,
451  $ 1, iarow, iacol )
452  DO 40 j = 1, n - 1
453  jn = jj + j - 1
454  d( jn ) = real( work( j ) )
455  e( jn ) = real( work( n+j ) )
456  tau( jn ) = work( 2*n+j )
457  40 CONTINUE
458  jn = jj + n - 1
459  d( jn ) = real( work( n ) )
460  tau( jn ) = zero
461  END IF
462  END IF
463  END IF
464 *
465  work( 1 ) = cmplx( real( lwmin ) )
466 *
467  RETURN
468 *
469 * End of PCHETD2
470 *
471  END
cmplx
float cmplx[2]
Definition: pblas.h:132
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pchetd2
subroutine pchetd2(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pchetd2.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2