ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pchettrd.f
Go to the documentation of this file.
1  SUBROUTINE pchettrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2  $ LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, INFO, JA, LWORK, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  REAL D( * ), E( * )
15  COMPLEX A( * ), TAU( * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 *
20 * =======
21 *
22 * PCHETTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
23 * tridiagonal form T by an unitary similarity transformation:
24 * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
25 *
26 * Notes
27 * =====
28 *
29 * Each global data object is described by an associated description
30 * vector. This vector stores the information required to establish
31 * the mapping between an object element and its corresponding
32 * process and memory location.
33 *
34 * Let A be a generic term for any 2D block cyclicly distributed
35 * array.
36 * Such a global array has an associated description vector DESCA.
37 * In the following comments, the character _ should be read as
38 * "of the global array".
39 *
40 * NOTATION STORED IN EXPLANATION
41 * --------------- -------------- -----------------------------------
42 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43 * DTYPE_A = 1.
44 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
45 * indicating the BLACS process grid A is distribu-
46 * ted over. The context itself is glo-
47 * bal, but the handle (the integer
48 * value) may vary.
49 * M_A (global) DESCA( M_ ) The number of rows in the global
50 * array A.
51 * N_A (global) DESCA( N_ ) The number of columns in the global
52 * array A.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to
54 * distribute the rows of the array.
55 * NB_A (global) DESCA( NB_ ) The blocking factor used to
56 * distribute the columns of the array.
57 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the
58 * first row of the array A is distributed.
59 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60 * first column of the array A is
61 * distributed.
62 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63 * array. LLD_A >= MAX(1,LOCp(M_A)).
64 *
65 * Let K be the number of rows or columns of a distributed matrix,
66 * and assume that its process grid has dimension p x q.
67 * LOCp( K ) denotes the number of elements of K that a process
68 * would receive if K were distributed over the p processes of its
69 * process column.
70 * Similarly, LOCq( K ) denotes the number of elements of K that a
71 * process would receive if K were distributed over the q processes
72 * of its process row.
73 * The values of LOCp() and LOCq() may be determined via a call to
74 * the ScaLAPACK tool function, NUMROC:
75 * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76 * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77 * An upper bound for these quantities may be computed by:
78 * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79 * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80 *
81 * Arguments
82 * =========
83 *
84 * UPLO (global input) CHARACTER
85 * Specifies whether the upper or lower triangular part of the
86 * Hermitian matrix sub( A ) is stored:
87 * = 'U': Upper triangular
88 * = 'L': Lower triangular
89 *
90 * N (global input) INTEGER
91 * The number of rows and columns to be operated on, i.e. the
92 * order of the distributed submatrix sub( A ). N >= 0.
93 *
94 * A (local input/local output) COMPLEX pointer into the
95 * local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
96 * On entry, this array contains the local pieces of the
97 * Hermitian distributed matrix sub( A ). If UPLO = 'U', the
98 * leading N-by-N upper triangular part of sub( A ) contains
99 * the upper triangular part of the matrix, and its strictly
100 * lower triangular part is not referenced. If UPLO = 'L', the
101 * leading N-by-N lower triangular part of sub( A ) contains the
102 * lower triangular part of the matrix, and its strictly upper
103 * triangular part is not referenced. On exit, if UPLO = 'U',
104 * the diagonal and first superdiagonal of sub( A ) are over-
105 * written by the corresponding elements of the tridiagonal
106 * matrix T, and the elements above the first superdiagonal,
107 * with the array TAU, represent the unitary matrix Q as a
108 * product of elementary reflectors; if UPLO = 'L', the diagonal
109 * and first subdiagonal of sub( A ) are overwritten by the
110 * corresponding elements of the tridiagonal matrix T, and the
111 * elements below the first subdiagonal, with the array TAU,
112 * represent the unitary matrix Q as a product of elementary
113 * reflectors. See Further Details.
114 *
115 * IA (global input) INTEGER
116 * The row index in the global array A indicating the first
117 * row of sub( A ).
118 *
119 * JA (global input) INTEGER
120 * The column index in the global array A indicating the
121 * first column of sub( A ).
122 *
123 * DESCA (global and local input) INTEGER array of dimension DLEN_.
124 * The array descriptor for the distributed matrix A.
125 *
126 * D (local output) REAL array, dim LOCq(JA+N-1)
127 * The diagonal elements of the tridiagonal matrix T:
128 * D(i) = A(i,i). D is tied to the distributed matrix A.
129 *
130 * E (local output) REAL array, dim LOCq(JA+N-1)
131 * if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
132 * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
133 * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
134 * distributed matrix A.
135 *
136 * TAU (local output) COMPLEX, array, dimension
137 * LOCq(JA+N-1). This array contains the scalar factors TAU of
138 * the elementary reflectors. TAU is tied to the distributed
139 * matrix A.
140 *
141 * WORK (local workspace) COMPLEX array, dimension (LWORK)
142 * On exit, WORK( 1 ) returns the minimal and optimal workspace
143 *
144 * LWORK (local input) INTEGER
145 * The dimension of the array WORK.
146 * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
147 * Where:
148 * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
149 * ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PCHETTRD', 'L', 0, 0,
150 * 0, 0 )
151 *
152 * NUMROC is a ScaLAPACK tool function;
153 * PJLAENV is a ScaLAPACK envionmental inquiry function
154 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
155 * the subroutine BLACS_GRIDINFO.
156 *
157 * INFO (global output) INTEGER
158 * = 0: successful exit
159 * < 0: If the i-th argument is an array and the j-entry had
160 * an illegal value, then INFO = -(i*100+j), if the i-th
161 * argument is a scalar and had an illegal value, then
162 * INFO = -i.
163 *
164 * Further Details
165 * ===============
166 *
167 * If UPLO = 'U', the matrix Q is represented as a product of
168 * elementary reflectors
169 *
170 * Q = H(n-1) . . . H(2) H(1).
171 *
172 * Each H(i) has the form
173 *
174 * H(i) = I - tau * v * v'
175 *
176 * where tau is a complex scalar, and v is a complex vector with
177 * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
178 * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
179 *
180 * If UPLO = 'L', the matrix Q is represented as a product of
181 * elementary reflectors
182 *
183 * Q = H(1) H(2) . . . H(n-1).
184 *
185 * Each H(i) has the form
186 *
187 * H(i) = I - tau * v * v'
188 *
189 * where tau is a complex scalar, and v is a complex vector with
190 * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
191 * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
192 *
193 * The contents of sub( A ) on exit are illustrated by the following
194 * examples with n = 5:
195 *
196 * if UPLO = 'U': if UPLO = 'L':
197 *
198 * ( d e v2 v3 v4 ) ( d )
199 * ( d e v3 v4 ) ( e d )
200 * ( d e v4 ) ( v1 e d )
201 * ( d e ) ( v1 v2 e d )
202 * ( d ) ( v1 v2 v3 e d )
203 *
204 * where d and e denote diagonal and off-diagonal elements of T, and
205 * vi denotes an element of the vector defining H(i).
206 *
207 * Data storage requirements
208 * =========================
209 *
210 * PCHETTRD is not intended to be called directly. All users are
211 * encourage to call PCHETRD which will then call PCHETTRD if
212 * appropriate. A must be in cyclic format (i.e. MB = NB = 1),
213 * the process grid must be square ( i.e. NPROW = NPCOL ) and
214 * only lower triangular storage is supported.
215 *
216 * Local variables
217 * ===============
218 *
219 * PCHETTRD uses five local arrays:
220 * WORK ( InV ) dimension ( NP, ANB+1): array V
221 * WORK ( InH ) dimension ( NP, ANB+1): array H
222 * WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
223 * WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
224 * WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
225 *
226 * Arrays V and H are replicated across all processor columns.
227 * Arrays V^T and H^T are replicated across all processor rows.
228 *
229 * WORK ( InVT ), or V^T, is stored as a tall skinny
230 * array ( NQ x ANB-1 ) for efficiency. Since only the lower
231 * triangular portion of A is updated, Av is computed as:
232 * tril(A) * v + v^T * tril(A,-1). This is performed as
233 * two local triangular matrix-vector multiplications (both in
234 * MVR2) followed by a transpose and a sum across the columns.
235 * In the local computation, WORK( InVT ) is used to compute
236 * tril(A) * v and WORK( InV ) is used to compute
237 * v^T * tril(A,-1)
238 *
239 * The following variables are global indices into A:
240 * INDEX: The current global row and column number.
241 * MAXINDEX: The global row and column for the first row and
242 * column in the trailing block of A.
243 * LIIB, LIJB: The first row, column in
244 *
245 * The following variables point into the arrays A, V, H, V^T, H^T:
246 * BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
247 * LII: local index I: The local row number for row INDEX
248 * LIJ: local index J: The local column number for column INDEX
249 * LIIP1: local index I+1: The local row number for row INDEX+1
250 * LIJP1: local index J+1: The local col number for col INDEX+1
251 * LTLI: lower triangular local index I: The local row for the
252 * upper left entry in tril( A(INDEX, INDEX) )
253 * LTLIP1: lower triangular local index I+1: The local row for the
254 * upper left entry in tril( A(INDEX+1, INDEX+1) )
255 *
256 * Details: The distinction between LII and LTLI (and between
257 * LIIP1 and LTLIP1) is subtle. Within the current processor
258 * column (i.e. MYCOL .eq. CURCOL) they are the same. However,
259 * on some processors, A( LII, LIJ ) points to an element
260 * above the diagonal, on these processors, LTLI = LII+1.
261 *
262 * The following variables give the number of rows and/or columns
263 * in various matrices:
264 * NP: The number of local rows in A( 1:N, 1:N )
265 * NQ: The number of local columns in A( 1:N, 1:N )
266 * NPM0: The number of local rows in A( INDEX:N, INDEX:N )
267 * NQM0: The number of local columns in A( INDEX:N, INDEX:N )
268 * NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
269 * NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
270 * LTNM0: The number of local rows & columns in
271 * tril( A( INDEX:N, INDEX:N ) )
272 * LTNM1: The number of local rows & columns in
273 * tril( A( INDEX+1:N, INDEX+1:N ) )
274 * NOTE: LTNM0 == LTNM1 on all processors except the diagonal
275 * processors, i.e. those where MYCOL == MYROW.
276 *
277 * Invariants:
278 * NP = NPM0 + LII - 1
279 * NQ = NQM0 + LIJ - 1
280 * NP = NPM1 + LIIP1 - 1
281 * NQ = NQM1 + LIJP1 - 1
282 * NP = LTLI + LTNM0 - 1
283 * NP = LTLIP1 + LTNM1 - 1
284 *
285 * Temporary variables. The following variables are used within
286 * a few lines after they are set and do hold state from one loop
287 * iteration to the next:
288 *
289 * The matrix A:
290 * The matrix A does not hold the same values that it would
291 * in an unblocked code nor the values that it would hold in
292 * in a blocked code.
293 *
294 * The value of A is confusing. It is easiest to state the
295 * difference between trueA and A at the point that MVR2 is called,
296 * so we will start there.
297 *
298 * Let trueA be the value that A would
299 * have at a given point in an unblocked code and A
300 * be the value that A has in this code at the same point.
301 *
302 * At the time of the call to MVR2,
303 * trueA = A + V' * H + H' * V
304 * where H = H( MAXINDEX:N, 1:BINDEX ) and
305 * V = V( MAXINDEX:N, 1:BINDEX ).
306 *
307 * At the bottom of the inner loop,
308 * trueA = A + V' * H + H' * V + v' * h + h' * v
309 * where H = H( MAXINDEX:N, 1:BINDEX ) and
310 * V = V( MAXINDEX:N, 1:BINDEX ) and
311 * v = V( liip1:N, BINDEX+1 ) and
312 * h = H( liip1:N, BINDEX+1 )
313 *
314 * At the top of the loop, BINDEX gets incremented, hence:
315 * trueA = A + V' * H + H' * V + v' * h + h' * v
316 * where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
317 * V = V( MAXINDEX:N, 1:BINDEX-1 ) and
318 * v = V( liip1:N, BINDEX ) and
319 * h = H( liip1:N, BINDEX )
320 *
321 *
322 * A gets updated at the bottom of the outer loop
323 * After this update, trueA = A + v' * h + h' * v
324 * where v = V( liip1:N, BINDEX ) and
325 * h = H( liip1:N, BINDEX ) and BINDEX = 0
326 * Indeed, the previous loop invariant as stated above for the
327 * top of the loop still holds, but with BINDEX = 0, H and V
328 * are null matrices.
329 *
330 * After the current column of A is updated,
331 * trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
332 * the rest of A is untouched.
333 *
334 * After the current block column of A is updated,
335 * trueA = A + V' * H + H' * V
336 * where H = H( MAXINDEX:N, 1:BINDEX ) and
337 * V = V( MAXINDEX:N, 1:BINDEX )
338 *
339 * This brings us back to the point at which mvr2 is called.
340 *
341 *
342 * Details of the parallelization:
343 *
344 * We delay spreading v across to all processor columns (which
345 * would naturally happen at the bottom of the loop) in order to
346 * combine the spread of v( : , i-1 ) with the spread of h( : , i )
347 *
348 * In order to compute h( :, i ), we must update A( :, i )
349 * which means that the processor column owning A( :, i ) must
350 * have: c, tau, v( i, i ) and h( i, i ).
351 *
352 * The traditional
353 * way of computing v (and the one used in pzlatrd.f and
354 * zlatrd.f) is:
355 * v = tau * v
356 * c = v' * h
357 * alpha = - tau * c / 2
358 * v = v + alpha * h
359 * However, the traditional way of computing v requires that tau
360 * be broadcast to all processors in the current column (to compute
361 * v = tau * v) and then a sum-to-all is required (to
362 * compute v' * h ). We use the following formula instead:
363 * c = v' * h
364 * v = tau * ( v - c * tau' * h / 2 )
365 * The above formula allows tau to be spread down in the
366 * same call to SGSUM2D which performs the sum-to-all of c.
367 *
368 * The computation of v, which could be performed in any processor
369 * column (or other procesor subsets), is performed in the
370 * processor column that owns A( :, i+1 ) so that A( :, i+1 )
371 * can be updated prior to spreading v across.
372 *
373 * We keep the block column of A up-to-date to minimize the
374 * work required in updating the current column of A. Updating
375 * the block column of A is reasonably load balanced whereas
376 * updating the current column of A is not (only the current
377 * processor column is involved).
378 *
379 * In the following overview of the steps performed, M in the
380 * margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
381 * or more flops per processor.
382 *
383 * Inner loop:
384 * A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
385 *M h = house( A(index:n, index) )
386 *M Spread v, h across
387 *M vt = v^T; ht = h^T
388 * A( index+1:n, index+1:maxindex ) -=
389 * ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
390 *C v = tril(A) * h; vt = ht * tril(A,-1)
391 *MorC v = v - H*V*h - V*H*h
392 *M v = v + vt^T
393 *M c = v' * h
394 * v = tau * ( v - c * tau' * h / 2 )
395 *C A = A - H*V - V*H
396 *
397 *
398 *
399 * =================================================================
400 *
401 * .. Parameters ..
402  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
403  $ mb_, nb_, rsrc_, csrc_, lld_
404  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
405  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
406  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
407  REAL ONE
408  parameter( one = 1.0e0 )
409  COMPLEX Z_ONE, Z_NEGONE, Z_ZERO
410  parameter( z_one = 1.0e0, z_negone = -1.0e0,
411  $ z_zero = 0.0e0 )
412  REAL ZERO
413  parameter( zero = 0.0e+0 )
414 * ..
415 *
416 *
417 * .. Local Scalars ..
418 *
419 *
420  LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
421  INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
422  $ indexa, indexinh, indexinv, inh, inhb, inht,
423  $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
424  $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
425  $ ltlip1, ltnm1, lwmin, maxindex, minindex,
426  $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
427  $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
428  $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
429  $ pbmin, pbsize, pnb, rowsperproc
430  REAL NORM, SAFMAX, SAFMIN
431  COMPLEX ALPHA, BETA, C, ONEOVERBETA, TOPH, TOPNV,
432  $ toptau, topv, ttoph, ttopv
433 * ..
434 * .. Local Arrays ..
435 *
436 *
437 *
438 *
439  INTEGER IDUM1( 1 ), IDUM2( 1 )
440  REAL DTMP( 5 )
441  COMPLEX CC( 3 )
442 * ..
443 * .. External Subroutines ..
444  EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d, cgemm, cgemv,
445  $ cgerv2d, cgesd2d, cgsum2d, chk1mat, clamov,
446  $ cscal, ctrmvt, pchk1mat, pstreecomb, pxerbla,
447  $ scombnrm2, sgebr2d, sgebs2d, sgsum2d
448 * ..
449 * .. External Functions ..
450 *
451  LOGICAL LSAME
452  INTEGER ICEIL, NUMROC, PJLAENV
453  REAL PSLAMCH, SCNRM2
454  EXTERNAL lsame, iceil, numroc, pjlaenv, pslamch, scnrm2
455 * ..
456 * .. Intrinsic Functions ..
457  INTRINSIC aimag, cmplx, conjg, ichar, max, min, mod,
458  $ real, sign, sqrt
459 * ..
460 *
461 *
462 * .. Executable Statements ..
463 * This is just to keep ftnchek and toolpack/1 happy
464  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
465  $ rsrc_.LT.0 )RETURN
466 *
467 *
468 *
469 * Further details
470 * ===============
471 *
472 * At the top of the loop, v and nh have been computed but not
473 * spread across. Hence, A is out-of-date even after the
474 * rank 2k update. Furthermore, we compute the next v before
475 * nh is spread across.
476 *
477 * I claim that if we used a sum-to-all on NV, by summing CC within
478 * each column, that we could compute NV locally and could avoid
479 * spreading V across. Bruce claims that sum-to-all can be made
480 * to cost no more than sum-to-one on the Paragon. If that is
481 * true, this would be a win. But,
482 * the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
483 * and hence the present scheme is better for now.
484 *
485 * Get grid parameters
486 *
487  ictxt = desca( ctxt_ )
488  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
489 *
490  safmax = sqrt( pslamch( ictxt, 'O' ) ) / n
491  safmin = sqrt( pslamch( ictxt, 'S' ) )
492 *
493 * Test the input parameters
494 *
495  info = 0
496  IF( nprow.EQ.-1 ) THEN
497  info = -( 600+ctxt_ )
498  ELSE
499 *
500 * Here we set execution options for PCHETTRD
501 *
502  pnb = pjlaenv( ictxt, 2, 'PCHETTRD', 'L', 0, 0, 0, 0 )
503  anb = pjlaenv( ictxt, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
504 *
505  interleave = ( pjlaenv( ictxt, 4, 'PCHETTRD', 'L', 1, 0, 0,
506  $ 0 ).EQ.1 )
507  twogemms = ( pjlaenv( ictxt, 4, 'PCHETTRD', 'L', 2, 0, 0,
508  $ 0 ).EQ.1 )
509  balanced = ( pjlaenv( ictxt, 4, 'PCHETTRD', 'L', 3, 0, 0,
510  $ 0 ).EQ.1 )
511 *
512  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
513 *
514 *
515  upper = lsame( uplo, 'U' )
516  IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
517  $ info = 600 + nb_
518  IF( info.EQ.0 ) THEN
519 *
520 *
521 * Here is the arithmetic:
522 * Let maxnpq = max( np, nq, 2 * ANB )
523 * LDV = 4 * max( np, nq ) + 2
524 * LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
525 * = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
526 *
527 * This overestimates memory requirements when ANB > NP/2
528 * Memory requirements are lower when interleave = .false.
529 * Hence, we could have two sets of memory requirements,
530 * one for interleave and one for
531 *
532 *
533  nps = max( numroc( n, 1, 0, 0, nprow ), 2*anb )
534  lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
535 *
536  work( 1 ) = cmplx( lwmin )
537  IF( .NOT.lsame( uplo, 'L' ) ) THEN
538  info = -1
539  ELSE IF( ia.NE.1 ) THEN
540  info = -4
541  ELSE IF( ja.NE.1 ) THEN
542  info = -5
543  ELSE IF( nprow.NE.npcol ) THEN
544  info = -( 600+ctxt_ )
545  ELSE IF( desca( dtype_ ).NE.1 ) THEN
546  info = -( 600+dtype_ )
547  ELSE IF( desca( mb_ ).NE.1 ) THEN
548  info = -( 600+mb_ )
549  ELSE IF( desca( nb_ ).NE.1 ) THEN
550  info = -( 600+nb_ )
551  ELSE IF( desca( rsrc_ ).NE.0 ) THEN
552  info = -( 600+rsrc_ )
553  ELSE IF( desca( csrc_ ).NE.0 ) THEN
554  info = -( 600+csrc_ )
555  ELSE IF( lwork.LT.lwmin ) THEN
556  info = -11
557  END IF
558  END IF
559  IF( upper ) THEN
560  idum1( 1 ) = ichar( 'U' )
561  ELSE
562  idum1( 1 ) = ichar( 'L' )
563  END IF
564  idum2( 1 ) = 1
565 *
566  CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
567  $ info )
568  END IF
569 *
570  IF( info.NE.0 ) THEN
571  CALL pxerbla( ictxt, 'PCHETTRD', -info )
572  RETURN
573  END IF
574 *
575 * Quick return if possible
576 *
577  IF( n.EQ.0 )
578  $ RETURN
579 *
580 *
581 *
582 * Reduce the lower triangle of sub( A )
583  np = numroc( n, 1, myrow, 0, nprow )
584  nq = numroc( n, 1, mycol, 0, npcol )
585 *
586  nxtrow = 0
587  nxtcol = 0
588 *
589  liip1 = 1
590  lijp1 = 1
591  npm1 = np
592  nqm1 = nq
593 *
594  lda = desca( lld_ )
595  ictxt = desca( ctxt_ )
596 *
597 *
598 *
599 * Miscellaneous details:
600 * Put tau, D and E in the right places
601 * Check signs
602 * Place all the arrays in WORK, control their placement
603 * in memory.
604 *
605 *
606 *
607 * Loop invariants
608 * A(LIIP1, LIJ) points to the first element of A(I+1,J)
609 * NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
610 * A(LII:N,LIJ:N) is one step out of date.
611 * proc( CURROW, CURCOL ) owns A(LII,LIJ)
612 * proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
613 *
614  inh = 1
615 *
616  IF( interleave ) THEN
617 *
618 * H and V are interleaved to minimize memory movement
619 * LDV has to be twice as large to accomodate interleaving.
620 * In addition, LDV is doubled again to allow v, h and
621 * toptau to be spreaad across and transposed in a
622 * single communication operation with minimum memory
623 * movement.
624 *
625 * We could reduce LDV back to 2*MAX(NPM1,NQM1)
626 * by increasing the memory movement required in
627 * the spread and transpose of v, h and toptau.
628 * However, since the non-interleaved path already
629 * provides a mear minimum memory requirement option,
630 * we did not provide this additional path.
631 *
632  ldv = 4*( max( npm1, nqm1 ) ) + 2
633 *
634  inh = 1
635 *
636  inv = inh + ldv / 2
637  invt = inh + ( anb+1 )*ldv
638 *
639  inht = invt + ldv / 2
640  intmp = invt + ldv*( anb+1 )
641 *
642  ELSE
643  ldv = max( npm1, nqm1 )
644 *
645  inht = inh + ldv*( anb+1 )
646  inv = inht + ldv*( anb+1 )
647 *
648 * The code works without this +1, but only because of a
649 * coincidence. Without the +1, WORK(INVT) gets trashed, but
650 * WORK(INVT) is only used once and when it is used, it is
651 * multiplied by WORK( INH ) which is zero. Hence, the fact
652 * that WORK(INVT) is trashed has no effect.
653 *
654  invt = inv + ldv*( anb+1 ) + 1
655  intmp = invt + ldv*( 2*anb )
656 *
657  END IF
658 *
659  IF( info.NE.0 ) THEN
660  CALL pxerbla( ictxt, 'PCHETTRD', -info )
661  work( 1 ) = cmplx( lwmin )
662  RETURN
663  END IF
664 *
665 *
666 * The satisfies the loop invariant: trueA = A - V * HT - H * VT,
667 * (where V, H, VT and HT all have BINDEX+1 rows/columns)
668 * the first ANB times through the loop.
669 *
670 *
671 *
672 * Setting either ( InH and InHT ) or InV to Z_ZERO
673 * is adequate except in the face of NaNs.
674 *
675 *
676  DO 10 i = 1, np
677  work( inh+i-1 ) = z_zero
678  work( inv+i-1 ) = z_zero
679  10 CONTINUE
680  DO 20 i = 1, nq
681  work( inht+i-1 ) = z_zero
682  20 CONTINUE
683 *
684 *
685 *
686  topnv = z_zero
687 *
688  ltlip1 = lijp1
689  ltnm1 = npm1
690  IF( mycol.GT.myrow ) THEN
691  ltlip1 = ltlip1 + 1
692  ltnm1 = ltnm1 - 1
693  END IF
694 *
695 *
696  DO 210 minindex = 1, n - 1, anb
697 *
698 *
699  maxindex = min( minindex+anb-1, n )
700  lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
701  liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
702 *
703  nqb = nq - lijb + 1
704  npb = np - liib + 1
705  inhtb = inht + lijb - 1
706  invtb = invt + lijb - 1
707  inhb = inh + liib - 1
708  invb = inv + liib - 1
709 *
710 *
711 *
712 *
713  DO 160 index = minindex, min( maxindex, n-1 )
714 *
715  bindex = index - minindex
716 *
717  currow = nxtrow
718  curcol = nxtcol
719 *
720  nxtrow = mod( currow+1, nprow )
721  nxtcol = mod( curcol+1, npcol )
722 *
723  lii = liip1
724  lij = lijp1
725  npm0 = npm1
726 *
727  IF( myrow.EQ.currow ) THEN
728  npm1 = npm1 - 1
729  liip1 = liip1 + 1
730  END IF
731  IF( mycol.EQ.curcol ) THEN
732  nqm1 = nqm1 - 1
733  lijp1 = lijp1 + 1
734  ltlip1 = ltlip1 + 1
735  ltnm1 = ltnm1 - 1
736  END IF
737 *
738 *
739 *
740 *
741 * V = NV, VT = NVT, H = NH, HT = NHT
742 *
743 *
744 * Update the current column of A
745 *
746 *
747  IF( mycol.EQ.curcol ) THEN
748 *
749  indexa = lii + ( lij-1 )*lda
750  indexinv = inv + lii - 1 + ( bindex-1 )*ldv
751  indexinh = inh + lii - 1 + ( bindex-1 )*ldv
752  ttoph = conjg( work( inht+lij-1+bindex*ldv ) )
753  ttopv = conjg( topnv )
754 *
755  IF( index.GT.1 ) THEN
756  DO 30 i = 0, npm0 - 1
757 * A( INDEXA+I ) = A( INDEXA+I )
758  a( indexa+i ) = a( indexa+i ) -
759  $ work( indexinv+ldv+i )*ttoph -
760  $ work( indexinh+ldv+i )*ttopv
761  30 CONTINUE
762  END IF
763 *
764 *
765  END IF
766 *
767 *
768  IF( mycol.EQ.curcol ) THEN
769 *
770 * Compute the householder vector
771 *
772  IF( myrow.EQ.currow ) THEN
773  dtmp( 2 ) = real( a( lii+( lij-1 )*lda ) )
774  ELSE
775  dtmp( 2 ) = zero
776  END IF
777  IF( myrow.EQ.nxtrow ) THEN
778  dtmp( 3 ) = real( a( liip1+( lij-1 )*lda ) )
779  dtmp( 4 ) = aimag( a( liip1+( lij-1 )*lda ) )
780  ELSE
781  dtmp( 3 ) = zero
782  dtmp( 4 ) = zero
783  END IF
784 *
785  norm = scnrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
786  dtmp( 1 ) = norm
787 *
788 * IF DTMP(5) = 1.0, NORM is too large and might cause
789 * overflow, hence PSTREECOMB must be called. IF DTMP(5)
790 * is zero on output, DTMP(1) can be trusted.
791 *
792  dtmp( 5 ) = zero
793  IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin ) THEN
794  dtmp( 5 ) = one
795  dtmp( 1 ) = zero
796  END IF
797 *
798  dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
799  CALL sgsum2d( ictxt, 'C', ' ', 5, 1, dtmp, 5, -1,
800  $ curcol )
801  IF( dtmp( 5 ).EQ.zero ) THEN
802  dtmp( 1 ) = sqrt( dtmp( 1 ) )
803  ELSE
804  dtmp( 1 ) = norm
805  CALL pstreecomb( ictxt, 'C', 1, dtmp, -1, mycol,
806  $ scombnrm2 )
807  END IF
808 *
809  norm = dtmp( 1 )
810 *
811  d( lij ) = dtmp( 2 )
812  IF( myrow.EQ.currow .AND. mycol.EQ.curcol ) THEN
813  a( lii+( lij-1 )*lda ) = cmplx( d( lij ), zero )
814  END IF
815 *
816 *
817  alpha = cmplx( dtmp( 3 ), dtmp( 4 ) )
818 *
819  norm = sign( norm, real( alpha ) )
820 *
821  IF( norm.EQ.zero ) THEN
822  toptau = zero
823  ELSE
824  beta = norm + alpha
825  toptau = beta / norm
826  oneoverbeta = 1.0e0 / beta
827 *
828  CALL cscal( npm1, oneoverbeta,
829  $ a( liip1+( lij-1 )*lda ), 1 )
830  END IF
831 *
832  IF( myrow.EQ.nxtrow ) THEN
833  a( liip1+( lij-1 )*lda ) = z_one
834  END IF
835 *
836  tau( lij ) = toptau
837  e( lij ) = -norm
838 *
839  END IF
840 *
841 *
842 * Spread v, nh, toptau across
843 *
844  DO 40 i = 0, npm1 - 1
845  work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
846  $ ( lij-1 )*lda )
847  40 CONTINUE
848 *
849  IF( mycol.EQ.curcol ) THEN
850  work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
851  CALL cgebs2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
852  $ work( inv+liip1-1+bindex*ldv ),
853  $ npm1+npm1+1 )
854  ELSE
855  CALL cgebr2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
856  $ work( inv+liip1-1+bindex*ldv ),
857  $ npm1+npm1+1, myrow, curcol )
858  toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
859  END IF
860  DO 50 i = 0, npm1 - 1
861  work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
862  $ 1+bindex*ldv+npm1+i )
863  50 CONTINUE
864 *
865  IF( index.LT.n ) THEN
866  IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
867  $ a( liip1+( lij-1 )*lda ) = e( lij )
868  END IF
869 *
870 * Transpose v, nh
871 *
872 *
873  IF( myrow.EQ.mycol ) THEN
874  DO 60 i = 0, npm1 + npm1
875  work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
876  $ bindex*ldv+i )
877  60 CONTINUE
878  ELSE
879  CALL cgesd2d( ictxt, npm1+npm1, 1,
880  $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
881  $ mycol, myrow )
882  CALL cgerv2d( ictxt, nqm1+nqm1, 1,
883  $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
884  $ mycol, myrow )
885  END IF
886 *
887  DO 70 i = 0, nqm1 - 1
888  work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
889  $ lijp1-1+bindex*ldv+nqm1+i )
890  70 CONTINUE
891 *
892 *
893 * Update the current block column of A
894 *
895  IF( index.GT.1 ) THEN
896  DO 90 j = lijp1, lijb - 1
897  DO 80 i = 0, npm1 - 1
898 *
899  a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
900  $ - work( inv+liip1-1+bindex*ldv+i )*
901  $ conjg( work( inht+j-1+bindex*ldv ) ) -
902  $ work( inh+liip1-1+bindex*ldv+i )*
903  $ conjg( work( invt+j-1+bindex*ldv ) )
904  80 CONTINUE
905  90 CONTINUE
906  END IF
907 *
908 *
909 *
910 * Compute NV = A * NHT; NVT = A * NH
911 *
912 * These two lines are necessary because these elements
913 * are not always involved in the calls to CTRMVT
914 * for two reasons:
915 * 1) On diagonal processors, the call to TRMVT
916 * involves only LTNM1-1 elements
917 * 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
918 * and when the results are combined across all processes,
919 * uninitialized values may be included.
920  work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
921  work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
922 *
923 *
924  IF( myrow.EQ.mycol ) THEN
925  IF( ltnm1.GT.1 ) THEN
926  CALL ctrmvt( 'L', ltnm1-1,
927  $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
928  $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
929  $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
930  $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
931  $ ldv ), 1, work( inht+lijp1-1+( bindex+
932  $ 1 )*ldv ), 1 )
933  END IF
934  DO 100 i = 1, ltnm1
935  work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
936  $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
937  $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
938  $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
939  100 CONTINUE
940  ELSE
941  IF( ltnm1.GT.0 )
942  $ CALL ctrmvt( 'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
943  $ lda, work( invt+lijp1-1+( bindex+1 )*
944  $ ldv ), 1, work( inh+ltlip1-1+( bindex+
945  $ 1 )*ldv ), 1, work( inv+ltlip1-1+
946  $ ( bindex+1 )*ldv ), 1,
947  $ work( inht+lijp1-1+( bindex+1 )*ldv ),
948  $ 1 )
949 *
950  END IF
951 *
952 *
953 * We take advantage of the fact that:
954 * A * sum( B ) = sum ( A * B ) for matrices A,B
955 *
956 * trueA = A + V * HT + H * VT
957 * hence: (trueA)v = Av' + V * HT * v + H * VT * v
958 * VT * v = sum_p_in_NPROW ( VTp * v )
959 * H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
960 *
961 * v = v + V * HT * h + H * VT * h
962 *
963 *
964 *
965 * tmp = HT * nh1
966  DO 110 i = 1, 2*( bindex+1 )
967  work( intmp-1+i ) = 0
968  110 CONTINUE
969 *
970  IF( balanced ) THEN
971  npset = nprow
972  mysetnum = myrow
973  rowsperproc = iceil( nqb, npset )
974  myfirstrow = min( nqb+1, 1+rowsperproc*mysetnum )
975  numrows = min( rowsperproc, nqb-myfirstrow+1 )
976 *
977 *
978 * tmp = HT * v
979 *
980  CALL cgemv( 'C', numrows, bindex+1, z_one,
981  $ work( inhtb+myfirstrow-1 ), ldv,
982  $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
983  $ 1, z_zero, work( intmp ), 1 )
984 * tmp2 = VT * v
985  CALL cgemv( 'C', numrows, bindex+1, z_one,
986  $ work( invtb+myfirstrow-1 ), ldv,
987  $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
988  $ 1, z_zero, work( intmp+bindex+1 ), 1 )
989 *
990 *
991  CALL cgsum2d( ictxt, 'C', ' ', 2*( bindex+1 ), 1,
992  $ work( intmp ), 2*( bindex+1 ), -1, -1 )
993  ELSE
994 * tmp = HT * v
995 *
996  CALL cgemv( 'C', nqb, bindex+1, z_one, work( inhtb ),
997  $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
998  $ z_zero, work( intmp ), 1 )
999 * tmp2 = VT * v
1000  CALL cgemv( 'C', nqb, bindex+1, z_one, work( invtb ),
1001  $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
1002  $ z_zero, work( intmp+bindex+1 ), 1 )
1003 *
1004  END IF
1005 *
1006 *
1007 *
1008  IF( balanced ) THEN
1009  mysetnum = mycol
1010 *
1011  rowsperproc = iceil( npb, npset )
1012  myfirstrow = min( npb+1, 1+rowsperproc*mysetnum )
1013  numrows = min( rowsperproc, npb-myfirstrow+1 )
1014 *
1015  CALL cgsum2d( ictxt, 'R', ' ', 2*( bindex+1 ), 1,
1016  $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1017 *
1018 *
1019 * v = v + V * tmp
1020  IF( index.GT.1. ) THEN
1021  CALL cgemv( 'N', numrows, bindex+1, z_negone,
1022  $ work( invb+myfirstrow-1 ), ldv,
1023  $ work( intmp ), 1, z_one,
1024  $ work( invb+myfirstrow-1+( bindex+1 )*
1025  $ ldv ), 1 )
1026 *
1027 * v = v + H * tmp2
1028  CALL cgemv( 'N', numrows, bindex+1, z_negone,
1029  $ work( inhb+myfirstrow-1 ), ldv,
1030  $ work( intmp+bindex+1 ), 1, z_one,
1031  $ work( invb+myfirstrow-1+( bindex+1 )*
1032  $ ldv ), 1 )
1033  END IF
1034 *
1035  ELSE
1036 * v = v + V * tmp
1037  CALL cgemv( 'N', npb, bindex+1, z_negone, work( invb ),
1038  $ ldv, work( intmp ), 1, z_one,
1039  $ work( invb+( bindex+1 )*ldv ), 1 )
1040 *
1041 *
1042 * v = v + H * tmp2
1043  CALL cgemv( 'N', npb, bindex+1, z_negone, work( inhb ),
1044  $ ldv, work( intmp+bindex+1 ), 1, z_one,
1045  $ work( invb+( bindex+1 )*ldv ), 1 )
1046 *
1047  END IF
1048 *
1049 *
1050 * Transpose NV and add it back into NVT
1051 *
1052  IF( myrow.EQ.mycol ) THEN
1053  DO 120 i = 0, nqm1 - 1
1054  work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1055  $ i )
1056  120 CONTINUE
1057  ELSE
1058  CALL cgesd2d( ictxt, nqm1, 1,
1059  $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1060  $ nqm1, mycol, myrow )
1061  CALL cgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1062  $ myrow )
1063 *
1064  END IF
1065  DO 130 i = 0, npm1 - 1
1066  work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1067  $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1068  130 CONTINUE
1069 *
1070 * Sum-to-one NV rowwise (within a row)
1071 *
1072  CALL cgsum2d( ictxt, 'R', ' ', npm1, 1,
1073  $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1074  $ myrow, nxtcol )
1075 *
1076 *
1077 * Dot product c = NV * NH
1078 * Sum-to-all c within next processor column
1079 *
1080 *
1081  IF( mycol.EQ.nxtcol ) THEN
1082  cc( 1 ) = z_zero
1083  DO 140 i = 0, npm1 - 1
1084  cc( 1 ) = cc( 1 ) + conjg( work( inv+liip1-1+( bindex+
1085  $ 1 )*ldv+i ) )*work( inh+liip1-1+
1086  $ ( bindex+1 )*ldv+i )
1087  140 CONTINUE
1088  IF( myrow.EQ.nxtrow ) THEN
1089  cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1090  cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1091  ELSE
1092  cc( 2 ) = z_zero
1093  cc( 3 ) = z_zero
1094  END IF
1095  CALL cgsum2d( ictxt, 'C', ' ', 3, 1, cc, 3, -1, nxtcol )
1096 *
1097  topv = cc( 2 )
1098  c = cc( 1 )
1099  toph = cc( 3 )
1100 *
1101  topnv = toptau*( topv-c*conjg( toptau ) / 2*toph )
1102 *
1103 *
1104 * Compute V = Tau * (V - C * Tau' / 2 * H )
1105 *
1106 *
1107  DO 150 i = 0, npm1 - 1
1108  work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1109  $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*
1110  $ conjg( toptau ) / 2*work( inh+liip1-1+( bindex+1 )*
1111  $ ldv+i ) )
1112  150 CONTINUE
1113 *
1114  END IF
1115 *
1116 *
1117  160 CONTINUE
1118 *
1119 *
1120 * Perform the rank2k update
1121 *
1122  IF( maxindex.LT.n ) THEN
1123 *
1124  DO 170 i = 0, npm1 - 1
1125  work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1126  170 CONTINUE
1127 *
1128 *
1129 *
1130  IF( .NOT.twogemms ) THEN
1131  IF( interleave ) THEN
1132  ldzg = ldv / 2
1133  ELSE
1134  CALL clamov( 'A', ltnm1, anb, work( inht+lijp1-1 ),
1135  $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1136 *
1137  CALL clamov( 'A', ltnm1, anb, work( inv+ltlip1-1 ),
1138  $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1139  ldzg = ldv
1140  END IF
1141  nbzg = anb*2
1142  ELSE
1143  ldzg = ldv
1144  nbzg = anb
1145  END IF
1146 *
1147 *
1148  DO 180 pbmin = 1, ltnm1, pnb
1149 *
1150  pbsize = min( pnb, ltnm1-pbmin+1 )
1151  pbmax = min( ltnm1, pbmin+pnb-1 )
1152  CALL cgemm( 'N', 'C', pbsize, pbmax, nbzg, z_negone,
1153  $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1154  $ work( invt+lijp1-1 ), ldzg, z_one,
1155  $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1156  IF( twogemms ) THEN
1157  CALL cgemm( 'N', 'C', pbsize, pbmax, anb, z_negone,
1158  $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1159  $ work( inht+lijp1-1 ), ldzg, z_one,
1160  $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1161  END IF
1162  180 CONTINUE
1163 *
1164 *
1165 *
1166  DO 190 i = 0, npm1 - 1
1167  work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1168  work( inh+liip1-1+i ) = work( intmp+i )
1169  190 CONTINUE
1170  DO 200 i = 0, nqm1 - 1
1171  work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1172  200 CONTINUE
1173 *
1174 *
1175  END IF
1176 *
1177 * End of the update A code
1178 *
1179  210 CONTINUE
1180 *
1181  IF( mycol.EQ.nxtcol ) THEN
1182  IF( myrow.EQ.nxtrow ) THEN
1183 *
1184  d( nq ) = real( a( np+( nq-1 )*lda ) )
1185  a( np+( nq-1 )*lda ) = d( nq )
1186 *
1187  CALL sgebs2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1 )
1188  ELSE
1189  CALL sgebr2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1, nxtrow,
1190  $ nxtcol )
1191  END IF
1192  END IF
1193 *
1194 *
1195 *
1196 *
1197  work( 1 ) = cmplx( lwmin )
1198  RETURN
1199 *
1200 * End of PCHETTRD
1201 *
1202 *
1203  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
pchettrd
subroutine pchettrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pchettrd.f:3
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pstreecomb
subroutine pstreecomb(ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, SUBPTR)
Definition: pstreecomb.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
scombnrm2
subroutine scombnrm2(X, Y)
Definition: pstreecomb.f:306
ctrmvt
subroutine ctrmvt(UPLO, N, T, LDT, X, INCX, Y, INCY, W, INCW, Z, INCZ)
Definition: ctrmvt.f:3
min
#define min(A, B)
Definition: pcgemr.c:181