ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdsyttrd.f
Go to the documentation of this file.
1  SUBROUTINE pdsyttrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2  $ LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, INFO, JA, LWORK, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 *
19 * =======
20 *
21 * PDSYTTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
22 * tridiagonal form T by an unitary similarity transformation:
23 * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
24 *
25 * Notes
26 * =====
27 *
28 * Each global data object is described by an associated description
29 * vector. This vector stores the information required to establish
30 * the mapping between an object element and its corresponding
31 * process and memory location.
32 *
33 * Let A be a generic term for any 2D block cyclicly distributed
34 * array.
35 * Such a global array has an associated description vector DESCA.
36 * In the following comments, the character _ should be read as
37 * "of the global array".
38 *
39 * NOTATION STORED IN EXPLANATION
40 * --------------- -------------- -----------------------------------
41 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42 * DTYPE_A = 1.
43 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
44 * indicating the BLACS process grid A is distribu-
45 * ted over. The context itself is glo-
46 * bal, but the handle (the integer
47 * value) may vary.
48 * M_A (global) DESCA( M_ ) The number of rows in the global
49 * array A.
50 * N_A (global) DESCA( N_ ) The number of columns in the global
51 * array A.
52 * MB_A (global) DESCA( MB_ ) The blocking factor used to
53 * distribute the rows of the array.
54 * NB_A (global) DESCA( NB_ ) The blocking factor used to
55 * distribute the columns of the array.
56 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the
57 * first row of the array A is distributed.
58 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59 * first column of the array A is
60 * distributed.
61 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62 * array. LLD_A >= MAX(1,LOCp(M_A)).
63 *
64 * Let K be the number of rows or columns of a distributed matrix,
65 * and assume that its process grid has dimension p x q.
66 * LOCp( K ) denotes the number of elements of K that a process
67 * would receive if K were distributed over the p processes of its
68 * process column.
69 * Similarly, LOCq( K ) denotes the number of elements of K that a
70 * process would receive if K were distributed over the q processes
71 * of its process row.
72 * The values of LOCp() and LOCq() may be determined via a call to
73 * the ScaLAPACK tool function, NUMROC:
74 * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75 * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76 * An upper bound for these quantities may be computed by:
77 * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78 * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79 *
80 * Arguments
81 * =========
82 *
83 * UPLO (global input) CHARACTER
84 * Specifies whether the upper or lower triangular part of the
85 * Hermitian matrix sub( A ) is stored:
86 * = 'U': Upper triangular
87 * = 'L': Lower triangular
88 *
89 * N (global input) INTEGER
90 * The number of rows and columns to be operated on, i.e. the
91 * order of the distributed submatrix sub( A ). N >= 0.
92 *
93 * A (local input/local output) DOUBLE PRECISION pointer into the
94 * local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
95 * On entry, this array contains the local pieces of the
96 * Hermitian distributed matrix sub( A ). If UPLO = 'U', the
97 * leading N-by-N upper triangular part of sub( A ) contains
98 * the upper triangular part of the matrix, and its strictly
99 * lower triangular part is not referenced. If UPLO = 'L', the
100 * leading N-by-N lower triangular part of sub( A ) contains the
101 * lower triangular part of the matrix, and its strictly upper
102 * triangular part is not referenced. On exit, if UPLO = 'U',
103 * the diagonal and first superdiagonal of sub( A ) are over-
104 * written by the corresponding elements of the tridiagonal
105 * matrix T, and the elements above the first superdiagonal,
106 * with the array TAU, represent the unitary matrix Q as a
107 * product of elementary reflectors; if UPLO = 'L', the diagonal
108 * and first subdiagonal of sub( A ) are overwritten by the
109 * corresponding elements of the tridiagonal matrix T, and the
110 * elements below the first subdiagonal, with the array TAU,
111 * represent the unitary matrix Q as a product of elementary
112 * reflectors. See Further Details.
113 *
114 * IA (global input) INTEGER
115 * The row index in the global array A indicating the first
116 * row of sub( A ).
117 *
118 * JA (global input) INTEGER
119 * The column index in the global array A indicating the
120 * first column of sub( A ).
121 *
122 * DESCA (global and local input) INTEGER array of dimension DLEN_.
123 * The array descriptor for the distributed matrix A.
124 *
125 * D (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
126 * The diagonal elements of the tridiagonal matrix T:
127 * D(i) = A(i,i). D is tied to the distributed matrix A.
128 *
129 * E (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
130 * if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
131 * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
132 * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
133 * distributed matrix A.
134 *
135 * TAU (local output) DOUBLE PRECISION array, dimension
136 * LOCq(JA+N-1). This array contains the scalar factors TAU of
137 * the elementary reflectors. TAU is tied to the distributed
138 * matrix A.
139 *
140 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
141 * On exit, WORK( 1 ) returns the minimal and optimal workspace
142 *
143 * LWORK (local input) INTEGER
144 * The dimension of the array WORK.
145 * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
146 * Where:
147 * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
148 * ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PDSYTTRD', 'L', 0, 0,
149 * 0, 0 )
150 *
151 * NUMROC is a ScaLAPACK tool function;
152 * PJLAENV is a ScaLAPACK envionmental inquiry function
153 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
154 * the subroutine BLACS_GRIDINFO.
155 *
156 * INFO (global output) INTEGER
157 * = 0: successful exit
158 * < 0: If the i-th argument is an array and the j-entry had
159 * an illegal value, then INFO = -(i*100+j), if the i-th
160 * argument is a scalar and had an illegal value, then
161 * INFO = -i.
162 *
163 * Further Details
164 * ===============
165 *
166 * If UPLO = 'U', the matrix Q is represented as a product of
167 * elementary reflectors
168 *
169 * Q = H(n-1) . . . H(2) H(1).
170 *
171 * Each H(i) has the form
172 *
173 * H(i) = I - tau * v * v'
174 *
175 * where tau is a complex scalar, and v is a complex vector with
176 * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
177 * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
178 *
179 * If UPLO = 'L', the matrix Q is represented as a product of
180 * elementary reflectors
181 *
182 * Q = H(1) H(2) . . . H(n-1).
183 *
184 * Each H(i) has the form
185 *
186 * H(i) = I - tau * v * v'
187 *
188 * where tau is a complex scalar, and v is a complex vector with
189 * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
190 * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
191 *
192 * The contents of sub( A ) on exit are illustrated by the following
193 * examples with n = 5:
194 *
195 * if UPLO = 'U': if UPLO = 'L':
196 *
197 * ( d e v2 v3 v4 ) ( d )
198 * ( d e v3 v4 ) ( e d )
199 * ( d e v4 ) ( v1 e d )
200 * ( d e ) ( v1 v2 e d )
201 * ( d ) ( v1 v2 v3 e d )
202 *
203 * where d and e denote diagonal and off-diagonal elements of T, and
204 * vi denotes an element of the vector defining H(i).
205 *
206 * Data storage requirements
207 * =========================
208 *
209 * PDSYTTRD is not intended to be called directly. All users are
210 * encourage to call PDSYTRD which will then call PDHETTRD if
211 * appropriate. A must be in cyclic format (i.e. MB = NB = 1),
212 * the process grid must be square ( i.e. NPROW = NPCOL ) and
213 * only lower triangular storage is supported.
214 *
215 * Local variables
216 * ===============
217 *
218 * PDSYTTRD uses five local arrays:
219 * WORK ( InV ) dimension ( NP, ANB+1): array V
220 * WORK ( InH ) dimension ( NP, ANB+1): array H
221 * WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
222 * WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
223 * WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
224 *
225 * Arrays V and H are replicated across all processor columns.
226 * Arrays V^T and H^T are replicated across all processor rows.
227 *
228 * WORK ( InVT ), or V^T, is stored as a tall skinny
229 * array ( NQ x ANB-1 ) for efficiency. Since only the lower
230 * triangular portion of A is updated, Av is computed as:
231 * tril(A) * v + v^T * tril(A,-1). This is performed as
232 * two local triangular matrix-vector multiplications (both in
233 * MVR2) followed by a transpose and a sum across the columns.
234 * In the local computation, WORK( InVT ) is used to compute
235 * tril(A) * v and WORK( InV ) is used to compute
236 * v^T * tril(A,-1)
237 *
238 * The following variables are global indices into A:
239 * INDEX: The current global row and column number.
240 * MAXINDEX: The global row and column for the first row and
241 * column in the trailing block of A.
242 * LIIB, LIJB: The first row, column in
243 *
244 * The following variables point into the arrays A, V, H, V^T, H^T:
245 * BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
246 * LII: local index I: The local row number for row INDEX
247 * LIJ: local index J: The local column number for column INDEX
248 * LIIP1: local index I+1: The local row number for row INDEX+1
249 * LIJP1: local index J+1: The local col number for col INDEX+1
250 * LTLI: lower triangular local index I: The local row for the
251 * upper left entry in tril( A(INDEX, INDEX) )
252 * LTLIP1: lower triangular local index I+1: The local row for the
253 * upper left entry in tril( A(INDEX+1, INDEX+1) )
254 *
255 * Details: The distinction between LII and LTLI (and between
256 * LIIP1 and LTLIP1) is subtle. Within the current processor
257 * column (i.e. MYCOL .eq. CURCOL) they are the same. However,
258 * on some processors, A( LII, LIJ ) points to an element
259 * above the diagonal, on these processors, LTLI = LII+1.
260 *
261 * The following variables give the number of rows and/or columns
262 * in various matrices:
263 * NP: The number of local rows in A( 1:N, 1:N )
264 * NQ: The number of local columns in A( 1:N, 1:N )
265 * NPM0: The number of local rows in A( INDEX:N, INDEX:N )
266 * NQM0: The number of local columns in A( INDEX:N, INDEX:N )
267 * NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
268 * NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
269 * LTNM0: The number of local rows & columns in
270 * tril( A( INDEX:N, INDEX:N ) )
271 * LTNM1: The number of local rows & columns in
272 * tril( A( INDEX+1:N, INDEX+1:N ) )
273 * NOTE: LTNM0 == LTNM1 on all processors except the diagonal
274 * processors, i.e. those where MYCOL == MYROW.
275 *
276 * Invariants:
277 * NP = NPM0 + LII - 1
278 * NQ = NQM0 + LIJ - 1
279 * NP = NPM1 + LIIP1 - 1
280 * NQ = NQM1 + LIJP1 - 1
281 * NP = LTLI + LTNM0 - 1
282 * NP = LTLIP1 + LTNM1 - 1
283 *
284 * Temporary variables. The following variables are used within
285 * a few lines after they are set and do hold state from one loop
286 * iteration to the next:
287 *
288 * The matrix A:
289 * The matrix A does not hold the same values that it would
290 * in an unblocked code nor the values that it would hold in
291 * in a blocked code.
292 *
293 * The value of A is confusing. It is easiest to state the
294 * difference between trueA and A at the point that MVR2 is called,
295 * so we will start there.
296 *
297 * Let trueA be the value that A would
298 * have at a given point in an unblocked code and A
299 * be the value that A has in this code at the same point.
300 *
301 * At the time of the call to MVR2,
302 * trueA = A + V' * H + H' * V
303 * where H = H( MAXINDEX:N, 1:BINDEX ) and
304 * V = V( MAXINDEX:N, 1:BINDEX ).
305 *
306 * At the bottom of the inner loop,
307 * trueA = A + V' * H + H' * V + v' * h + h' * v
308 * where H = H( MAXINDEX:N, 1:BINDEX ) and
309 * V = V( MAXINDEX:N, 1:BINDEX ) and
310 * v = V( liip1:N, BINDEX+1 ) and
311 * h = H( liip1:N, BINDEX+1 )
312 *
313 * At the top of the loop, BINDEX gets incremented, hence:
314 * trueA = A + V' * H + H' * V + v' * h + h' * v
315 * where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
316 * V = V( MAXINDEX:N, 1:BINDEX-1 ) and
317 * v = V( liip1:N, BINDEX ) and
318 * h = H( liip1:N, BINDEX )
319 *
320 *
321 * A gets updated at the bottom of the outer loop
322 * After this update, trueA = A + v' * h + h' * v
323 * where v = V( liip1:N, BINDEX ) and
324 * h = H( liip1:N, BINDEX ) and BINDEX = 0
325 * Indeed, the previous loop invariant as stated above for the
326 * top of the loop still holds, but with BINDEX = 0, H and V
327 * are null matrices.
328 *
329 * After the current column of A is updated,
330 * trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
331 * the rest of A is untouched.
332 *
333 * After the current block column of A is updated,
334 * trueA = A + V' * H + H' * V
335 * where H = H( MAXINDEX:N, 1:BINDEX ) and
336 * V = V( MAXINDEX:N, 1:BINDEX )
337 *
338 * This brings us back to the point at which mvr2 is called.
339 *
340 *
341 * Details of the parallelization:
342 *
343 * We delay spreading v across to all processor columns (which
344 * would naturally happen at the bottom of the loop) in order to
345 * combine the spread of v( : , i-1 ) with the spread of h( : , i )
346 *
347 * In order to compute h( :, i ), we must update A( :, i )
348 * which means that the processor column owning A( :, i ) must
349 * have: c, tau, v( i, i ) and h( i, i ).
350 *
351 * The traditional
352 * way of computing v (and the one used in pzlatrd.f and
353 * zlatrd.f) is:
354 * v = tau * v
355 * c = v' * h
356 * alpha = - tau * c / 2
357 * v = v + alpha * h
358 * However, the traditional way of computing v requires that tau
359 * be broadcast to all processors in the current column (to compute
360 * v = tau * v) and then a sum-to-all is required (to
361 * compute v' * h ). We use the following formula instead:
362 * c = v' * h
363 * v = tau * ( v - c * tau' * h / 2 )
364 * The above formula allows tau to be spread down in the
365 * same call to DGSUM2D which performs the sum-to-all of c.
366 *
367 * The computation of v, which could be performed in any processor
368 * column (or other procesor subsets), is performed in the
369 * processor column that owns A( :, i+1 ) so that A( :, i+1 )
370 * can be updated prior to spreading v across.
371 *
372 * We keep the block column of A up-to-date to minimize the
373 * work required in updating the current column of A. Updating
374 * the block column of A is reasonably load balanced whereas
375 * updating the current column of A is not (only the current
376 * processor column is involved).
377 *
378 * In the following overview of the steps performed, M in the
379 * margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
380 * or more flops per processor.
381 *
382 * Inner loop:
383 * A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
384 *M h = house( A(index:n, index) )
385 *M Spread v, h across
386 *M vt = v^T; ht = h^T
387 * A( index+1:n, index+1:maxindex ) -=
388 * ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
389 *C v = tril(A) * h; vt = ht * tril(A,-1)
390 *MorC v = v - H*V*h - V*H*h
391 *M v = v + vt^T
392 *M c = v' * h
393 * v = tau * ( v - c * tau' * h / 2 )
394 *C A = A - H*V - V*H
395 *
396 *
397 *
398 * =================================================================
399 *
400 * .. Parameters ..
401  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
402  $ mb_, nb_, rsrc_, csrc_, lld_
403  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
404  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
405  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
406  DOUBLE PRECISION ONE
407  parameter( one = 1.0d0 )
408  DOUBLE PRECISION Z_ONE, Z_NEGONE, Z_ZERO
409  parameter( z_one = 1.0d0, z_negone = -1.0d0,
410  $ z_zero = 0.0d0 )
411  DOUBLE PRECISION ZERO
412  parameter( zero = 0.0d+0 )
413 * ..
414 *
415 *
416 * .. Local Scalars ..
417 *
418 *
419  LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
420  INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
421  $ indexa, indexinh, indexinv, inh, inhb, inht,
422  $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
423  $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
424  $ ltlip1, ltnm1, lwmin, maxindex, minindex,
425  $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
426  $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
427  $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
428  $ pbmin, pbsize, pnb, rowsperproc
429  DOUBLE PRECISION ALPHA, BETA, C, CONJTOPH, CONJTOPV, NORM,
430  $ oneoverbeta, safmax, safmin, toph, topnv,
431  $ toptau, topv
432 * ..
433 * .. Local Arrays ..
434 *
435 *
436 *
437 *
438  INTEGER IDUM1( 1 ), IDUM2( 1 )
439  DOUBLE PRECISION CC( 3 ), DTMP( 5 )
440 * ..
441 * .. External Subroutines ..
442  EXTERNAL blacs_gridinfo, chk1mat, dcombnrm2, dgebr2d,
443  $ dgebs2d, dgemm, dgemv, dgerv2d, dgesd2d,
444  $ dgsum2d, dlamov, dscal, dtrmvt, pchk1mat,
446 * ..
447 * .. External Functions ..
448 *
449  LOGICAL LSAME
450  INTEGER ICEIL, NUMROC, PJLAENV
451  DOUBLE PRECISION DNRM2, PDLAMCH
452  EXTERNAL lsame, iceil, numroc, pjlaenv, dnrm2, pdlamch
453 * ..
454 * .. Intrinsic Functions ..
455  INTRINSIC dble, ichar, max, min, mod, sign, sqrt
456 * ..
457 *
458 *
459 * .. Executable Statements ..
460 * This is just to keep ftnchek and toolpack/1 happy
461  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
462  $ rsrc_.LT.0 )RETURN
463 *
464 *
465 *
466 * Further details
467 * ===============
468 *
469 * At the top of the loop, v and nh have been computed but not
470 * spread across. Hence, A is out-of-date even after the
471 * rank 2k update. Furthermore, we compute the next v before
472 * nh is spread across.
473 *
474 * I claim that if we used a sum-to-all on NV, by summing CC within
475 * each column, that we could compute NV locally and could avoid
476 * spreading V across. Bruce claims that sum-to-all can be made
477 * to cost no more than sum-to-one on the Paragon. If that is
478 * true, this would be a win. But,
479 * the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
480 * and hence the present scheme is better for now.
481 *
482 * Get grid parameters
483 *
484  ictxt = desca( ctxt_ )
485  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
486 *
487  safmax = sqrt( pdlamch( ictxt, 'O' ) ) / n
488  safmin = sqrt( pdlamch( ictxt, 'S' ) )
489 *
490 * Test the input parameters
491 *
492  info = 0
493  IF( nprow.EQ.-1 ) THEN
494  info = -( 600+ctxt_ )
495  ELSE
496 *
497 * Here we set execution options for PDSYTTRD
498 *
499  pnb = pjlaenv( ictxt, 2, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
500  anb = pjlaenv( ictxt, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
501 *
502  interleave = ( pjlaenv( ictxt, 4, 'PDSYTTRD', 'L', 1, 0, 0,
503  $ 0 ).EQ.1 )
504  twogemms = ( pjlaenv( ictxt, 4, 'PDSYTTRD', 'L', 2, 0, 0,
505  $ 0 ).EQ.1 )
506  balanced = ( pjlaenv( ictxt, 4, 'PDSYTTRD', 'L', 3, 0, 0,
507  $ 0 ).EQ.1 )
508 *
509  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
510 *
511 *
512  upper = lsame( uplo, 'U' )
513  IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
514  $ info = 600 + nb_
515  IF( info.EQ.0 ) THEN
516 *
517 *
518 * Here is the arithmetic:
519 * Let maxnpq = max( np, nq, 2 * ANB )
520 * LDV = 4 * max( np, nq ) + 2
521 * LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
522 * = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
523 *
524 * This overestimates memory requirements when ANB > NP/2
525 * Memory requirements are lower when interleave = .false.
526 * Hence, we could have two sets of memory requirements,
527 * one for interleave and one for
528 *
529 *
530  nps = max( numroc( n, 1, 0, 0, nprow ), 2*anb )
531  lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
532 *
533  work( 1 ) = dble( lwmin )
534  IF( .NOT.lsame( uplo, 'L' ) ) THEN
535  info = -1
536  ELSE IF( ia.NE.1 ) THEN
537  info = -4
538  ELSE IF( ja.NE.1 ) THEN
539  info = -5
540  ELSE IF( nprow.NE.npcol ) THEN
541  info = -( 600+ctxt_ )
542  ELSE IF( desca( dtype_ ).NE.1 ) THEN
543  info = -( 600+dtype_ )
544  ELSE IF( desca( mb_ ).NE.1 ) THEN
545  info = -( 600+mb_ )
546  ELSE IF( desca( nb_ ).NE.1 ) THEN
547  info = -( 600+nb_ )
548  ELSE IF( desca( rsrc_ ).NE.0 ) THEN
549  info = -( 600+rsrc_ )
550  ELSE IF( desca( csrc_ ).NE.0 ) THEN
551  info = -( 600+csrc_ )
552  ELSE IF( lwork.LT.lwmin ) THEN
553  info = -11
554  END IF
555  END IF
556  IF( upper ) THEN
557  idum1( 1 ) = ichar( 'U' )
558  ELSE
559  idum1( 1 ) = ichar( 'L' )
560  END IF
561  idum2( 1 ) = 1
562 *
563  CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
564  $ info )
565  END IF
566 *
567  IF( info.NE.0 ) THEN
568  CALL pxerbla( ictxt, 'PDSYTTRD', -info )
569  RETURN
570  END IF
571 *
572 * Quick return if possible
573 *
574  IF( n.EQ.0 )
575  $ RETURN
576 *
577 *
578 *
579 * Reduce the lower triangle of sub( A )
580  np = numroc( n, 1, myrow, 0, nprow )
581  nq = numroc( n, 1, mycol, 0, npcol )
582 *
583  nxtrow = 0
584  nxtcol = 0
585 *
586  liip1 = 1
587  lijp1 = 1
588  npm1 = np
589  nqm1 = nq
590 *
591  lda = desca( lld_ )
592  ictxt = desca( ctxt_ )
593 *
594 *
595 *
596 * Miscellaneous details:
597 * Put tau, D and E in the right places
598 * Check signs
599 * Place all the arrays in WORK, control their placement
600 * in memory.
601 *
602 *
603 *
604 * Loop invariants
605 * A(LIIP1, LIJ) points to the first element of A(I+1,J)
606 * NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
607 * A(LII:N,LIJ:N) is one step out of date.
608 * proc( CURROW, CURCOL ) owns A(LII,LIJ)
609 * proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
610 *
611  inh = 1
612 *
613  IF( interleave ) THEN
614 *
615 * H and V are interleaved to minimize memory movement
616 * LDV has to be twice as large to accomodate interleaving.
617 * In addition, LDV is doubled again to allow v, h and
618 * toptau to be spreaad across and transposed in a
619 * single communication operation with minimum memory
620 * movement.
621 *
622 * We could reduce LDV back to 2*MAX(NPM1,NQM1)
623 * by increasing the memory movement required in
624 * the spread and transpose of v, h and toptau.
625 * However, since the non-interleaved path already
626 * provides a mear minimum memory requirement option,
627 * we did not provide this additional path.
628 *
629  ldv = 4*( max( npm1, nqm1 ) ) + 2
630 *
631  inh = 1
632 *
633  inv = inh + ldv / 2
634  invt = inh + ( anb+1 )*ldv
635 *
636  inht = invt + ldv / 2
637  intmp = invt + ldv*( anb+1 )
638 *
639  ELSE
640  ldv = max( npm1, nqm1 )
641 *
642  inht = inh + ldv*( anb+1 )
643  inv = inht + ldv*( anb+1 )
644 *
645 * The code works without this +1, but only because of a
646 * coincidence. Without the +1, WORK(INVT) gets trashed, but
647 * WORK(INVT) is only used once and when it is used, it is
648 * multiplied by WORK( INH ) which is zero. Hence, the fact
649 * that WORK(INVT) is trashed has no effect.
650 *
651  invt = inv + ldv*( anb+1 ) + 1
652  intmp = invt + ldv*( 2*anb )
653 *
654  END IF
655 *
656  IF( info.NE.0 ) THEN
657  CALL pxerbla( ictxt, 'PDSYTTRD', -info )
658  work( 1 ) = dble( lwmin )
659  RETURN
660  END IF
661 *
662 *
663 * The satisfies the loop invariant: trueA = A - V * HT - H * VT,
664 * (where V, H, VT and HT all have BINDEX+1 rows/columns)
665 * the first ANB times through the loop.
666 *
667 *
668 *
669 * Setting either ( InH and InHT ) or InV to Z_ZERO
670 * is adequate except in the face of NaNs.
671 *
672 *
673  DO 10 i = 1, np
674  work( inh+i-1 ) = z_zero
675  work( inv+i-1 ) = z_zero
676  10 CONTINUE
677  DO 20 i = 1, nq
678  work( inht+i-1 ) = z_zero
679  20 CONTINUE
680 *
681 *
682 *
683  topnv = z_zero
684 *
685  ltlip1 = lijp1
686  ltnm1 = npm1
687  IF( mycol.GT.myrow ) THEN
688  ltlip1 = ltlip1 + 1
689  ltnm1 = ltnm1 - 1
690  END IF
691 *
692 *
693  DO 210 minindex = 1, n - 1, anb
694 *
695 *
696  maxindex = min( minindex+anb-1, n )
697  lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
698  liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
699 *
700  nqb = nq - lijb + 1
701  npb = np - liib + 1
702  inhtb = inht + lijb - 1
703  invtb = invt + lijb - 1
704  inhb = inh + liib - 1
705  invb = inv + liib - 1
706 *
707 *
708 *
709 *
710  DO 160 index = minindex, min( maxindex, n-1 )
711 *
712  bindex = index - minindex
713 *
714  currow = nxtrow
715  curcol = nxtcol
716 *
717  nxtrow = mod( currow+1, nprow )
718  nxtcol = mod( curcol+1, npcol )
719 *
720  lii = liip1
721  lij = lijp1
722  npm0 = npm1
723 *
724  IF( myrow.EQ.currow ) THEN
725  npm1 = npm1 - 1
726  liip1 = liip1 + 1
727  END IF
728  IF( mycol.EQ.curcol ) THEN
729  nqm1 = nqm1 - 1
730  lijp1 = lijp1 + 1
731  ltlip1 = ltlip1 + 1
732  ltnm1 = ltnm1 - 1
733  END IF
734 *
735 *
736 *
737 *
738 * V = NV, VT = NVT, H = NH, HT = NHT
739 *
740 *
741 * Update the current column of A
742 *
743 *
744  IF( mycol.EQ.curcol ) THEN
745 *
746  indexa = lii + ( lij-1 )*lda
747  indexinv = inv + lii - 1 + ( bindex-1 )*ldv
748  indexinh = inh + lii - 1 + ( bindex-1 )*ldv
749  conjtoph = work( inht+lij-1+bindex*ldv )
750  conjtopv = topnv
751 *
752  IF( index.GT.1 ) THEN
753  DO 30 i = 0, npm0 - 1
754 * A( INDEXA+I ) = A( INDEXA+I )
755  a( indexa+i ) = a( indexa+i ) -
756  $ work( indexinv+ldv+i )*conjtoph -
757  $ work( indexinh+ldv+i )*conjtopv
758  30 CONTINUE
759  END IF
760 *
761 *
762  END IF
763 *
764 *
765  IF( mycol.EQ.curcol ) THEN
766 *
767 * Compute the householder vector
768 *
769  IF( myrow.EQ.currow ) THEN
770  dtmp( 2 ) = a( lii+( lij-1 )*lda )
771  ELSE
772  dtmp( 2 ) = zero
773  END IF
774  IF( myrow.EQ.nxtrow ) THEN
775  dtmp( 3 ) = a( liip1+( lij-1 )*lda )
776  dtmp( 4 ) = zero
777  ELSE
778  dtmp( 3 ) = zero
779  dtmp( 4 ) = zero
780  END IF
781 *
782  norm = dnrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
783  dtmp( 1 ) = norm
784 *
785 * IF DTMP(5) = 1.0, NORM is too large and might cause
786 * overflow, hence PDTREECOMB must be called. IF DTMP(5)
787 * is zero on output, DTMP(1) can be trusted.
788 *
789  dtmp( 5 ) = zero
790  IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin ) THEN
791  dtmp( 5 ) = one
792  dtmp( 1 ) = zero
793  END IF
794 *
795  dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
796  CALL dgsum2d( ictxt, 'C', ' ', 5, 1, dtmp, 5, -1,
797  $ curcol )
798  IF( dtmp( 5 ).EQ.zero ) THEN
799  dtmp( 1 ) = sqrt( dtmp( 1 ) )
800  ELSE
801  dtmp( 1 ) = norm
802  CALL pdtreecomb( ictxt, 'C', 1, dtmp, -1, mycol,
803  $ dcombnrm2 )
804  END IF
805 *
806  norm = dtmp( 1 )
807 *
808  d( lij ) = dtmp( 2 )
809  IF( myrow.EQ.currow .AND. mycol.EQ.curcol ) THEN
810  a( lii+( lij-1 )*lda ) = d( lij )
811  END IF
812 *
813 *
814  alpha = dtmp( 3 )
815 *
816  norm = sign( norm, alpha )
817 *
818  IF( norm.EQ.zero ) THEN
819  toptau = zero
820  ELSE
821  beta = norm + alpha
822  toptau = beta / norm
823  oneoverbeta = 1.0d0 / beta
824 *
825  CALL dscal( npm1, oneoverbeta,
826  $ a( liip1+( lij-1 )*lda ), 1 )
827  END IF
828 *
829  IF( myrow.EQ.nxtrow ) THEN
830  a( liip1+( lij-1 )*lda ) = z_one
831  END IF
832 *
833  tau( lij ) = toptau
834  e( lij ) = -norm
835 *
836  END IF
837 *
838 *
839 * Spread v, nh, toptau across
840 *
841  DO 40 i = 0, npm1 - 1
842  work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
843  $ ( lij-1 )*lda )
844  40 CONTINUE
845 *
846  IF( mycol.EQ.curcol ) THEN
847  work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
848  CALL dgebs2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
849  $ work( inv+liip1-1+bindex*ldv ),
850  $ npm1+npm1+1 )
851  ELSE
852  CALL dgebr2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
853  $ work( inv+liip1-1+bindex*ldv ),
854  $ npm1+npm1+1, myrow, curcol )
855  toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
856  END IF
857  DO 50 i = 0, npm1 - 1
858  work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
859  $ 1+bindex*ldv+npm1+i )
860  50 CONTINUE
861 *
862  IF( index.LT.n ) THEN
863  IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
864  $ a( liip1+( lij-1 )*lda ) = e( lij )
865  END IF
866 *
867 * Transpose v, nh
868 *
869 *
870  IF( myrow.EQ.mycol ) THEN
871  DO 60 i = 0, npm1 + npm1
872  work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
873  $ bindex*ldv+i )
874  60 CONTINUE
875  ELSE
876  CALL dgesd2d( ictxt, npm1+npm1, 1,
877  $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
878  $ mycol, myrow )
879  CALL dgerv2d( ictxt, nqm1+nqm1, 1,
880  $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
881  $ mycol, myrow )
882  END IF
883 *
884  DO 70 i = 0, nqm1 - 1
885  work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
886  $ lijp1-1+bindex*ldv+nqm1+i )
887  70 CONTINUE
888 *
889 *
890 * Update the current block column of A
891 *
892  IF( index.GT.1 ) THEN
893  DO 90 j = lijp1, lijb - 1
894  DO 80 i = 0, npm1 - 1
895 *
896  a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
897  $ - work( inv+liip1-1+bindex*ldv+i )*
898  $ work( inht+j-1+bindex*ldv ) -
899  $ work( inh+liip1-1+bindex*ldv+i )*
900  $ work( invt+j-1+bindex*ldv )
901  80 CONTINUE
902  90 CONTINUE
903  END IF
904 *
905 *
906 *
907 * Compute NV = A * NHT; NVT = A * NH
908 *
909 * These two lines are necessary because these elements
910 * are not always involved in the calls to DTRMVT
911 * for two reasons:
912 * 1) On diagonal processors, the call to TRMVT
913 * involves only LTNM1-1 elements
914 * 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
915 * and when the results are combined across all processes,
916 * uninitialized values may be included.
917  work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
918  work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
919 *
920 *
921  IF( myrow.EQ.mycol ) THEN
922  IF( ltnm1.GT.1 ) THEN
923  CALL dtrmvt( 'L', ltnm1-1,
924  $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
925  $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
926  $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
927  $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
928  $ ldv ), 1, work( inht+lijp1-1+( bindex+
929  $ 1 )*ldv ), 1 )
930  END IF
931  DO 100 i = 1, ltnm1
932  work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
933  $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
934  $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
935  $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
936  100 CONTINUE
937  ELSE
938  IF( ltnm1.GT.0 )
939  $ CALL dtrmvt( 'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
940  $ lda, work( invt+lijp1-1+( bindex+1 )*
941  $ ldv ), 1, work( inh+ltlip1-1+( bindex+
942  $ 1 )*ldv ), 1, work( inv+ltlip1-1+
943  $ ( bindex+1 )*ldv ), 1,
944  $ work( inht+lijp1-1+( bindex+1 )*ldv ),
945  $ 1 )
946 *
947  END IF
948 *
949 *
950 * We take advantage of the fact that:
951 * A * sum( B ) = sum ( A * B ) for matrices A,B
952 *
953 * trueA = A + V * HT + H * VT
954 * hence: (trueA)v = Av' + V * HT * v + H * VT * v
955 * VT * v = sum_p_in_NPROW ( VTp * v )
956 * H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
957 *
958 * v = v + V * HT * h + H * VT * h
959 *
960 *
961 *
962 * tmp = HT * nh1
963  DO 110 i = 1, 2*( bindex+1 )
964  work( intmp-1+i ) = 0
965  110 CONTINUE
966 *
967  IF( balanced ) THEN
968  npset = nprow
969  mysetnum = myrow
970  rowsperproc = iceil( nqb, npset )
971  myfirstrow = min( nqb+1, 1+rowsperproc*mysetnum )
972  numrows = min( rowsperproc, nqb-myfirstrow+1 )
973 *
974 *
975 * tmp = HT * v
976 *
977  CALL dgemv( 'C', numrows, bindex+1, z_one,
978  $ work( inhtb+myfirstrow-1 ), ldv,
979  $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
980  $ 1, z_zero, work( intmp ), 1 )
981 * tmp2 = VT * v
982  CALL dgemv( 'C', numrows, bindex+1, z_one,
983  $ work( invtb+myfirstrow-1 ), ldv,
984  $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
985  $ 1, z_zero, work( intmp+bindex+1 ), 1 )
986 *
987 *
988  CALL dgsum2d( ictxt, 'C', ' ', 2*( bindex+1 ), 1,
989  $ work( intmp ), 2*( bindex+1 ), -1, -1 )
990  ELSE
991 * tmp = HT * v
992 *
993  CALL dgemv( 'C', nqb, bindex+1, z_one, work( inhtb ),
994  $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
995  $ z_zero, work( intmp ), 1 )
996 * tmp2 = VT * v
997  CALL dgemv( 'C', nqb, bindex+1, z_one, work( invtb ),
998  $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
999  $ z_zero, work( intmp+bindex+1 ), 1 )
1000 *
1001  END IF
1002 *
1003 *
1004 *
1005  IF( balanced ) THEN
1006  mysetnum = mycol
1007 *
1008  rowsperproc = iceil( npb, npset )
1009  myfirstrow = min( npb+1, 1+rowsperproc*mysetnum )
1010  numrows = min( rowsperproc, npb-myfirstrow+1 )
1011 *
1012  CALL dgsum2d( ictxt, 'R', ' ', 2*( bindex+1 ), 1,
1013  $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1014 *
1015 *
1016 * v = v + V * tmp
1017  IF( index.GT.1. ) THEN
1018  CALL dgemv( 'N', numrows, bindex+1, z_negone,
1019  $ work( invb+myfirstrow-1 ), ldv,
1020  $ work( intmp ), 1, z_one,
1021  $ work( invb+myfirstrow-1+( bindex+1 )*
1022  $ ldv ), 1 )
1023 *
1024 * v = v + H * tmp2
1025  CALL dgemv( 'N', numrows, bindex+1, z_negone,
1026  $ work( inhb+myfirstrow-1 ), ldv,
1027  $ work( intmp+bindex+1 ), 1, z_one,
1028  $ work( invb+myfirstrow-1+( bindex+1 )*
1029  $ ldv ), 1 )
1030  END IF
1031 *
1032  ELSE
1033 * v = v + V * tmp
1034  CALL dgemv( 'N', npb, bindex+1, z_negone, work( invb ),
1035  $ ldv, work( intmp ), 1, z_one,
1036  $ work( invb+( bindex+1 )*ldv ), 1 )
1037 *
1038 *
1039 * v = v + H * tmp2
1040  CALL dgemv( 'N', npb, bindex+1, z_negone, work( inhb ),
1041  $ ldv, work( intmp+bindex+1 ), 1, z_one,
1042  $ work( invb+( bindex+1 )*ldv ), 1 )
1043 *
1044  END IF
1045 *
1046 *
1047 * Transpose NV and add it back into NVT
1048 *
1049  IF( myrow.EQ.mycol ) THEN
1050  DO 120 i = 0, nqm1 - 1
1051  work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1052  $ i )
1053  120 CONTINUE
1054  ELSE
1055  CALL dgesd2d( ictxt, nqm1, 1,
1056  $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1057  $ nqm1, mycol, myrow )
1058  CALL dgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1059  $ myrow )
1060 *
1061  END IF
1062  DO 130 i = 0, npm1 - 1
1063  work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1064  $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1065  130 CONTINUE
1066 *
1067 * Sum-to-one NV rowwise (within a row)
1068 *
1069  CALL dgsum2d( ictxt, 'R', ' ', npm1, 1,
1070  $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1071  $ myrow, nxtcol )
1072 *
1073 *
1074 * Dot product c = NV * NH
1075 * Sum-to-all c within next processor column
1076 *
1077 *
1078  IF( mycol.EQ.nxtcol ) THEN
1079  cc( 1 ) = z_zero
1080  DO 140 i = 0, npm1 - 1
1081  cc( 1 ) = cc( 1 ) + work( inv+liip1-1+( bindex+1 )*
1082  $ ldv+i )*work( inh+liip1-1+( bindex+1 )*ldv+
1083  $ i )
1084  140 CONTINUE
1085  IF( myrow.EQ.nxtrow ) THEN
1086  cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1087  cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1088  ELSE
1089  cc( 2 ) = z_zero
1090  cc( 3 ) = z_zero
1091  END IF
1092  CALL dgsum2d( ictxt, 'C', ' ', 3, 1, cc, 3, -1, nxtcol )
1093 *
1094  topv = cc( 2 )
1095  c = cc( 1 )
1096  toph = cc( 3 )
1097 *
1098  topnv = toptau*( topv-c*toptau / 2*toph )
1099 *
1100 *
1101 * Compute V = Tau * (V - C * Tau' / 2 * H )
1102 *
1103 *
1104  DO 150 i = 0, npm1 - 1
1105  work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1106  $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*toptau /
1107  $ 2*work( inh+liip1-1+( bindex+1 )*ldv+i ) )
1108  150 CONTINUE
1109 *
1110  END IF
1111 *
1112 *
1113  160 CONTINUE
1114 *
1115 *
1116 * Perform the rank2k update
1117 *
1118  IF( maxindex.LT.n ) THEN
1119 *
1120  DO 170 i = 0, npm1 - 1
1121  work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1122  170 CONTINUE
1123 *
1124 *
1125 *
1126  IF( .NOT.twogemms ) THEN
1127  IF( interleave ) THEN
1128  ldzg = ldv / 2
1129  ELSE
1130  CALL dlamov( 'A', ltnm1, anb, work( inht+lijp1-1 ),
1131  $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1132 *
1133  CALL dlamov( 'A', ltnm1, anb, work( inv+ltlip1-1 ),
1134  $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1135  ldzg = ldv
1136  END IF
1137  nbzg = anb*2
1138  ELSE
1139  ldzg = ldv
1140  nbzg = anb
1141  END IF
1142 *
1143 *
1144  DO 180 pbmin = 1, ltnm1, pnb
1145 *
1146  pbsize = min( pnb, ltnm1-pbmin+1 )
1147  pbmax = min( ltnm1, pbmin+pnb-1 )
1148  CALL dgemm( 'N', 'C', pbsize, pbmax, nbzg, z_negone,
1149  $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1150  $ work( invt+lijp1-1 ), ldzg, z_one,
1151  $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1152  IF( twogemms ) THEN
1153  CALL dgemm( 'N', 'C', pbsize, pbmax, anb, z_negone,
1154  $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1155  $ work( inht+lijp1-1 ), ldzg, z_one,
1156  $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1157  END IF
1158  180 CONTINUE
1159 *
1160 *
1161 *
1162  DO 190 i = 0, npm1 - 1
1163  work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1164  work( inh+liip1-1+i ) = work( intmp+i )
1165  190 CONTINUE
1166  DO 200 i = 0, nqm1 - 1
1167  work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1168  200 CONTINUE
1169 *
1170 *
1171  END IF
1172 *
1173 * End of the update A code
1174 *
1175  210 CONTINUE
1176 *
1177  IF( mycol.EQ.nxtcol ) THEN
1178  IF( myrow.EQ.nxtrow ) THEN
1179 *
1180  d( nq ) = a( np+( nq-1 )*lda )
1181 *
1182  CALL dgebs2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1 )
1183  ELSE
1184  CALL dgebr2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1, nxtrow,
1185  $ nxtcol )
1186  END IF
1187  END IF
1188 *
1189 *
1190 *
1191 *
1192  work( 1 ) = dble( lwmin )
1193  RETURN
1194 *
1195 * End of PDSYTTRD
1196 *
1197 *
1198  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdtreecomb
subroutine pdtreecomb(ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, SUBPTR)
Definition: pdtreecomb.f:3
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pdsyttrd
subroutine pdsyttrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pdsyttrd.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
dcombnrm2
subroutine dcombnrm2(X, Y)
Definition: pdtreecomb.f:307
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181
dtrmvt
subroutine dtrmvt(UPLO, N, T, LDT, X, INCX, Y, INCY, W, INCW, Z, INCZ)
Definition: dtrmvt.f:3