ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pssyttrd.f
Go to the documentation of this file.
1  SUBROUTINE pssyttrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2  $ LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 2.0.2) --
5 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6 * May 1 2012
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, INFO, JA, LWORK, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 *
19 * =======
20 *
21 * PSSYTTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
22 * tridiagonal form T by an unitary similarity transformation:
23 * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
24 *
25 * Notes
26 * =====
27 *
28 * Each global data object is described by an associated description
29 * vector. This vector stores the information required to establish
30 * the mapping between an object element and its corresponding
31 * process and memory location.
32 *
33 * Let A be a generic term for any 2D block cyclicly distributed
34 * array.
35 * Such a global array has an associated description vector DESCA.
36 * In the following comments, the character _ should be read as
37 * "of the global array".
38 *
39 * NOTATION STORED IN EXPLANATION
40 * --------------- -------------- -----------------------------------
41 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42 * DTYPE_A = 1.
43 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
44 * indicating the BLACS process grid A is distribu-
45 * ted over. The context itself is glo-
46 * bal, but the handle (the integer
47 * value) may vary.
48 * M_A (global) DESCA( M_ ) The number of rows in the global
49 * array A.
50 * N_A (global) DESCA( N_ ) The number of columns in the global
51 * array A.
52 * MB_A (global) DESCA( MB_ ) The blocking factor used to
53 * distribute the rows of the array.
54 * NB_A (global) DESCA( NB_ ) The blocking factor used to
55 * distribute the columns of the array.
56 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the
57 * first row of the array A is distributed.
58 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59 * first column of the array A is
60 * distributed.
61 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62 * array. LLD_A >= MAX(1,LOCp(M_A)).
63 *
64 * Let K be the number of rows or columns of a distributed matrix,
65 * and assume that its process grid has dimension p x q.
66 * LOCp( K ) denotes the number of elements of K that a process
67 * would receive if K were distributed over the p processes of its
68 * process column.
69 * Similarly, LOCq( K ) denotes the number of elements of K that a
70 * process would receive if K were distributed over the q processes
71 * of its process row.
72 * The values of LOCp() and LOCq() may be determined via a call to
73 * the ScaLAPACK tool function, NUMROC:
74 * LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75 * LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76 * An upper bound for these quantities may be computed by:
77 * LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78 * LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79 *
80 * Arguments
81 * =========
82 *
83 * UPLO (global input) CHARACTER
84 * Specifies whether the upper or lower triangular part of the
85 * Hermitian matrix sub( A ) is stored:
86 * = 'U': Upper triangular
87 * = 'L': Lower triangular
88 *
89 * N (global input) INTEGER
90 * The number of rows and columns to be operated on, i.e. the
91 * order of the distributed submatrix sub( A ). N >= 0.
92 *
93 * A (local input/local output) REAL pointer into the
94 * local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
95 * On entry, this array contains the local pieces of the
96 * Hermitian distributed matrix sub( A ). If UPLO = 'U', the
97 * leading N-by-N upper triangular part of sub( A ) contains
98 * the upper triangular part of the matrix, and its strictly
99 * lower triangular part is not referenced. If UPLO = 'L', the
100 * leading N-by-N lower triangular part of sub( A ) contains the
101 * lower triangular part of the matrix, and its strictly upper
102 * triangular part is not referenced. On exit, if UPLO = 'U',
103 * the diagonal and first superdiagonal of sub( A ) are over-
104 * written by the corresponding elements of the tridiagonal
105 * matrix T, and the elements above the first superdiagonal,
106 * with the array TAU, represent the unitary matrix Q as a
107 * product of elementary reflectors; if UPLO = 'L', the diagonal
108 * and first subdiagonal of sub( A ) are overwritten by the
109 * corresponding elements of the tridiagonal matrix T, and the
110 * elements below the first subdiagonal, with the array TAU,
111 * represent the unitary matrix Q as a product of elementary
112 * reflectors. See Further Details.
113 *
114 * IA (global input) INTEGER
115 * The row index in the global array A indicating the first
116 * row of sub( A ).
117 *
118 * JA (global input) INTEGER
119 * The column index in the global array A indicating the
120 * first column of sub( A ).
121 *
122 * DESCA (global and local input) INTEGER array of dimension DLEN_.
123 * The array descriptor for the distributed matrix A.
124 *
125 * D (local output) REAL array, dim LOCq(JA+N-1)
126 * The diagonal elements of the tridiagonal matrix T:
127 * D(i) = A(i,i). D is tied to the distributed matrix A.
128 *
129 * E (local output) REAL array, dim LOCq(JA+N-1)
130 * if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
131 * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
132 * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
133 * distributed matrix A.
134 *
135 * TAU (local output) REAL, array, dimension
136 * LOCq(JA+N-1). This array contains the scalar factors TAU of
137 * the elementary reflectors. TAU is tied to the distributed
138 * matrix A.
139 *
140 * WORK (local workspace) REAL array, dimension (LWORK)
141 * On exit, WORK( 1 ) returns the minimal and optimal workspace
142 *
143 * LWORK (local input) INTEGER
144 * The dimension of the array WORK.
145 * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
146 * Where:
147 * NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
148 * ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PSSYTTRD', 'L', 0, 0,
149 * 0, 0 )
150 *
151 * NUMROC is a ScaLAPACK tool function;
152 * PJLAENV is a ScaLAPACK envionmental inquiry function
153 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
154 * the subroutine BLACS_GRIDINFO.
155 *
156 * INFO (global output) INTEGER
157 * = 0: successful exit
158 * < 0: If the i-th argument is an array and the j-entry had
159 * an illegal value, then INFO = -(i*100+j), if the i-th
160 * argument is a scalar and had an illegal value, then
161 * INFO = -i.
162 *
163 * Further Details
164 * ===============
165 *
166 * If UPLO = 'U', the matrix Q is represented as a product of
167 * elementary reflectors
168 *
169 * Q = H(n-1) . . . H(2) H(1).
170 *
171 * Each H(i) has the form
172 *
173 * H(i) = I - tau * v * v'
174 *
175 * where tau is a complex scalar, and v is a complex vector with
176 * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
177 * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
178 *
179 * If UPLO = 'L', the matrix Q is represented as a product of
180 * elementary reflectors
181 *
182 * Q = H(1) H(2) . . . H(n-1).
183 *
184 * Each H(i) has the form
185 *
186 * H(i) = I - tau * v * v'
187 *
188 * where tau is a complex scalar, and v is a complex vector with
189 * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
190 * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
191 *
192 * The contents of sub( A ) on exit are illustrated by the following
193 * examples with n = 5:
194 *
195 * if UPLO = 'U': if UPLO = 'L':
196 *
197 * ( d e v2 v3 v4 ) ( d )
198 * ( d e v3 v4 ) ( e d )
199 * ( d e v4 ) ( v1 e d )
200 * ( d e ) ( v1 v2 e d )
201 * ( d ) ( v1 v2 v3 e d )
202 *
203 * where d and e denote diagonal and off-diagonal elements of T, and
204 * vi denotes an element of the vector defining H(i).
205 *
206 * Data storage requirements
207 * =========================
208 *
209 * PSSYTTRD is not intended to be called directly. All users are
210 * encourage to call PSSYTRD which will then call PSHETTRD if
211 * appropriate. A must be in cyclic format (i.e. MB = NB = 1),
212 * the process grid must be square ( i.e. NPROW = NPCOL ) and
213 * only lower triangular storage is supported.
214 *
215 * Local variables
216 * ===============
217 *
218 * PSSYTTRD uses five local arrays:
219 * WORK ( InV ) dimension ( NP, ANB+1): array V
220 * WORK ( InH ) dimension ( NP, ANB+1): array H
221 * WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
222 * WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
223 * WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
224 *
225 * Arrays V and H are replicated across all processor columns.
226 * Arrays V^T and H^T are replicated across all processor rows.
227 *
228 * WORK ( InVT ), or V^T, is stored as a tall skinny
229 * array ( NQ x ANB-1 ) for efficiency. Since only the lower
230 * triangular portion of A is updated, Av is computed as:
231 * tril(A) * v + v^T * tril(A,-1). This is performed as
232 * two local triangular matrix-vector multiplications (both in
233 * MVR2) followed by a transpose and a sum across the columns.
234 * In the local computation, WORK( InVT ) is used to compute
235 * tril(A) * v and WORK( InV ) is used to compute
236 * v^T * tril(A,-1)
237 *
238 * The following variables are global indices into A:
239 * INDEX: The current global row and column number.
240 * MAXINDEX: The global row and column for the first row and
241 * column in the trailing block of A.
242 * LIIB, LIJB: The first row, column in
243 *
244 * The following variables point into the arrays A, V, H, V^T, H^T:
245 * BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
246 * LII: local index I: The local row number for row INDEX
247 * LIJ: local index J: The local column number for column INDEX
248 * LIIP1: local index I+1: The local row number for row INDEX+1
249 * LIJP1: local index J+1: The local col number for col INDEX+1
250 * LTLI: lower triangular local index I: The local row for the
251 * upper left entry in tril( A(INDEX, INDEX) )
252 * LTLIP1: lower triangular local index I+1: The local row for the
253 * upper left entry in tril( A(INDEX+1, INDEX+1) )
254 *
255 * Details: The distinction between LII and LTLI (and between
256 * LIIP1 and LTLIP1) is subtle. Within the current processor
257 * column (i.e. MYCOL .eq. CURCOL) they are the same. However,
258 * on some processors, A( LII, LIJ ) points to an element
259 * above the diagonal, on these processors, LTLI = LII+1.
260 *
261 * The following variables give the number of rows and/or columns
262 * in various matrices:
263 * NP: The number of local rows in A( 1:N, 1:N )
264 * NQ: The number of local columns in A( 1:N, 1:N )
265 * NPM0: The number of local rows in A( INDEX:N, INDEX:N )
266 * NQM0: The number of local columns in A( INDEX:N, INDEX:N )
267 * NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
268 * NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
269 * LTNM0: The number of local rows & columns in
270 * tril( A( INDEX:N, INDEX:N ) )
271 * LTNM1: The number of local rows & columns in
272 * tril( A( INDEX+1:N, INDEX+1:N ) )
273 * NOTE: LTNM0 == LTNM1 on all processors except the diagonal
274 * processors, i.e. those where MYCOL == MYROW.
275 *
276 * Invariants:
277 * NP = NPM0 + LII - 1
278 * NQ = NQM0 + LIJ - 1
279 * NP = NPM1 + LIIP1 - 1
280 * NQ = NQM1 + LIJP1 - 1
281 * NP = LTLI + LTNM0 - 1
282 * NP = LTLIP1 + LTNM1 - 1
283 *
284 * Temporary variables. The following variables are used within
285 * a few lines after they are set and do hold state from one loop
286 * iteration to the next:
287 *
288 * The matrix A:
289 * The matrix A does not hold the same values that it would
290 * in an unblocked code nor the values that it would hold in
291 * in a blocked code.
292 *
293 * The value of A is confusing. It is easiest to state the
294 * difference between trueA and A at the point that MVR2 is called,
295 * so we will start there.
296 *
297 * Let trueA be the value that A would
298 * have at a given point in an unblocked code and A
299 * be the value that A has in this code at the same point.
300 *
301 * At the time of the call to MVR2,
302 * trueA = A + V' * H + H' * V
303 * where H = H( MAXINDEX:N, 1:BINDEX ) and
304 * V = V( MAXINDEX:N, 1:BINDEX ).
305 *
306 * At the bottom of the inner loop,
307 * trueA = A + V' * H + H' * V + v' * h + h' * v
308 * where H = H( MAXINDEX:N, 1:BINDEX ) and
309 * V = V( MAXINDEX:N, 1:BINDEX ) and
310 * v = V( liip1:N, BINDEX+1 ) and
311 * h = H( liip1:N, BINDEX+1 )
312 *
313 * At the top of the loop, BINDEX gets incremented, hence:
314 * trueA = A + V' * H + H' * V + v' * h + h' * v
315 * where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
316 * V = V( MAXINDEX:N, 1:BINDEX-1 ) and
317 * v = V( liip1:N, BINDEX ) and
318 * h = H( liip1:N, BINDEX )
319 *
320 *
321 * A gets updated at the bottom of the outer loop
322 * After this update, trueA = A + v' * h + h' * v
323 * where v = V( liip1:N, BINDEX ) and
324 * h = H( liip1:N, BINDEX ) and BINDEX = 0
325 * Indeed, the previous loop invariant as stated above for the
326 * top of the loop still holds, but with BINDEX = 0, H and V
327 * are null matrices.
328 *
329 * After the current column of A is updated,
330 * trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
331 * the rest of A is untouched.
332 *
333 * After the current block column of A is updated,
334 * trueA = A + V' * H + H' * V
335 * where H = H( MAXINDEX:N, 1:BINDEX ) and
336 * V = V( MAXINDEX:N, 1:BINDEX )
337 *
338 * This brings us back to the point at which mvr2 is called.
339 *
340 *
341 * Details of the parallelization:
342 *
343 * We delay spreading v across to all processor columns (which
344 * would naturally happen at the bottom of the loop) in order to
345 * combine the spread of v( : , i-1 ) with the spread of h( : , i )
346 *
347 * In order to compute h( :, i ), we must update A( :, i )
348 * which means that the processor column owning A( :, i ) must
349 * have: c, tau, v( i, i ) and h( i, i ).
350 *
351 * The traditional
352 * way of computing v (and the one used in pzlatrd.f and
353 * zlatrd.f) is:
354 * v = tau * v
355 * c = v' * h
356 * alpha = - tau * c / 2
357 * v = v + alpha * h
358 * However, the traditional way of computing v requires that tau
359 * be broadcast to all processors in the current column (to compute
360 * v = tau * v) and then a sum-to-all is required (to
361 * compute v' * h ). We use the following formula instead:
362 * c = v' * h
363 * v = tau * ( v - c * tau' * h / 2 )
364 * The above formula allows tau to be spread down in the
365 * same call to SGSUM2D which performs the sum-to-all of c.
366 *
367 * The computation of v, which could be performed in any processor
368 * column (or other procesor subsets), is performed in the
369 * processor column that owns A( :, i+1 ) so that A( :, i+1 )
370 * can be updated prior to spreading v across.
371 *
372 * We keep the block column of A up-to-date to minimize the
373 * work required in updating the current column of A. Updating
374 * the block column of A is reasonably load balanced whereas
375 * updating the current column of A is not (only the current
376 * processor column is involved).
377 *
378 * In the following overview of the steps performed, M in the
379 * margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
380 * or more flops per processor.
381 *
382 * Inner loop:
383 * A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
384 *M h = house( A(index:n, index) )
385 *M Spread v, h across
386 *M vt = v^T; ht = h^T
387 * A( index+1:n, index+1:maxindex ) -=
388 * ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
389 *C v = tril(A) * h; vt = ht * tril(A,-1)
390 *MorC v = v - H*V*h - V*H*h
391 *M v = v + vt^T
392 *M c = v' * h
393 * v = tau * ( v - c * tau' * h / 2 )
394 *C A = A - H*V - V*H
395 *
396 *
397 *
398 * =================================================================
399 *
400 * .. Parameters ..
401  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
402  $ mb_, nb_, rsrc_, csrc_, lld_
403  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
404  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
405  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
406  REAL ONE
407  parameter( one = 1.0e0 )
408  REAL Z_ONE, Z_NEGONE, Z_ZERO
409  parameter( z_one = 1.0e0, z_negone = -1.0e0,
410  $ z_zero = 0.0e0 )
411  REAL ZERO
412  parameter( zero = 0.0e+0 )
413 * ..
414 *
415 *
416 * .. Local Scalars ..
417 *
418 *
419  LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
420  INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
421  $ indexa, indexinh, indexinv, inh, inhb, inht,
422  $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
423  $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
424  $ ltlip1, ltnm1, lwmin, maxindex, minindex,
425  $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
426  $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
427  $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
428  $ pbmin, pbsize, pnb, rowsperproc
429  REAL ALPHA, BETA, C, NORM, ONEOVERBETA, SAFMAX,
430  $ safmin, toph, topnv, toptau, topv, ttoph, ttopv
431 * ..
432 * .. Local Arrays ..
433 *
434 *
435 *
436 *
437  INTEGER IDUM1( 1 ), IDUM2( 1 )
438  REAL CC( 3 ), DTMP( 5 )
439 * ..
440 * .. External Subroutines ..
441  EXTERNAL blacs_gridinfo, chk1mat, pchk1mat, pstreecomb,
442  $ pxerbla, scombnrm2, sgebr2d, sgebs2d, sgemm,
443  $ sgemv, sgerv2d, sgesd2d, sgsum2d, slamov,
444  $ sscal, strmvt
445 * ..
446 * .. External Functions ..
447 *
448  LOGICAL LSAME
449  INTEGER ICEIL, NUMROC, PJLAENV
450  REAL PSLAMCH, SNRM2
451  EXTERNAL lsame, iceil, numroc, pjlaenv, pslamch, snrm2
452 * ..
453 * .. Intrinsic Functions ..
454  INTRINSIC ichar, max, min, mod, real, sign, sqrt
455 * ..
456 *
457 *
458 * .. Executable Statements ..
459 * This is just to keep ftnchek and toolpack/1 happy
460  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
461  $ rsrc_.LT.0 )RETURN
462 *
463 *
464 *
465 * Further details
466 * ===============
467 *
468 * At the top of the loop, v and nh have been computed but not
469 * spread across. Hence, A is out-of-date even after the
470 * rank 2k update. Furthermore, we compute the next v before
471 * nh is spread across.
472 *
473 * I claim that if we used a sum-to-all on NV, by summing CC within
474 * each column, that we could compute NV locally and could avoid
475 * spreading V across. Bruce claims that sum-to-all can be made
476 * to cost no more than sum-to-one on the Paragon. If that is
477 * true, this would be a win. But,
478 * the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
479 * and hence the present scheme is better for now.
480 *
481 * Get grid parameters
482 *
483  ictxt = desca( ctxt_ )
484  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
485 *
486  safmax = sqrt( pslamch( ictxt, 'O' ) ) / n
487  safmin = sqrt( pslamch( ictxt, 'S' ) )
488 *
489 * Test the input parameters
490 *
491  info = 0
492  IF( nprow.EQ.-1 ) THEN
493  info = -( 600+ctxt_ )
494  ELSE
495 *
496 * Here we set execution options for PSSYTTRD
497 *
498  pnb = pjlaenv( ictxt, 2, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
499  anb = pjlaenv( ictxt, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
500 *
501  interleave = ( pjlaenv( ictxt, 4, 'PSSYTTRD', 'L', 1, 0, 0,
502  $ 0 ).EQ.1 )
503  twogemms = ( pjlaenv( ictxt, 4, 'PSSYTTRD', 'L', 2, 0, 0,
504  $ 0 ).EQ.1 )
505  balanced = ( pjlaenv( ictxt, 4, 'PSSYTTRD', 'L', 3, 0, 0,
506  $ 0 ).EQ.1 )
507 *
508  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
509 *
510 *
511  upper = lsame( uplo, 'U' )
512  IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
513  $ info = 600 + nb_
514  IF( info.EQ.0 ) THEN
515 *
516 *
517 * Here is the arithmetic:
518 * Let maxnpq = max( np, nq, 2 * ANB )
519 * LDV = 4 * max( np, nq ) + 2
520 * LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
521 * = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
522 *
523 * This overestimates memory requirements when ANB > NP/2
524 * Memory requirements are lower when interleave = .false.
525 * Hence, we could have two sets of memory requirements,
526 * one for interleave and one for
527 *
528 *
529  nps = max( numroc( n, 1, 0, 0, nprow ), 2*anb )
530  lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
531 *
532  work( 1 ) = real( lwmin )
533  IF( .NOT.lsame( uplo, 'L' ) ) THEN
534  info = -1
535  ELSE IF( ia.NE.1 ) THEN
536  info = -4
537  ELSE IF( ja.NE.1 ) THEN
538  info = -5
539  ELSE IF( nprow.NE.npcol ) THEN
540  info = -( 600+ctxt_ )
541  ELSE IF( desca( dtype_ ).NE.1 ) THEN
542  info = -( 600+dtype_ )
543  ELSE IF( desca( mb_ ).NE.1 ) THEN
544  info = -( 600+mb_ )
545  ELSE IF( desca( nb_ ).NE.1 ) THEN
546  info = -( 600+nb_ )
547  ELSE IF( desca( rsrc_ ).NE.0 ) THEN
548  info = -( 600+rsrc_ )
549  ELSE IF( desca( csrc_ ).NE.0 ) THEN
550  info = -( 600+csrc_ )
551  ELSE IF( lwork.LT.lwmin ) THEN
552  info = -11
553  END IF
554  END IF
555  IF( upper ) THEN
556  idum1( 1 ) = ichar( 'U' )
557  ELSE
558  idum1( 1 ) = ichar( 'L' )
559  END IF
560  idum2( 1 ) = 1
561 *
562  CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
563  $ info )
564  END IF
565 *
566  IF( info.NE.0 ) THEN
567  CALL pxerbla( ictxt, 'PSSYTTRD', -info )
568  RETURN
569  END IF
570 *
571 * Quick return if possible
572 *
573  IF( n.EQ.0 )
574  $ RETURN
575 *
576 *
577 *
578 * Reduce the lower triangle of sub( A )
579  np = numroc( n, 1, myrow, 0, nprow )
580  nq = numroc( n, 1, mycol, 0, npcol )
581 *
582  nxtrow = 0
583  nxtcol = 0
584 *
585  liip1 = 1
586  lijp1 = 1
587  npm1 = np
588  nqm1 = nq
589 *
590  lda = desca( lld_ )
591  ictxt = desca( ctxt_ )
592 *
593 *
594 *
595 * Miscellaneous details:
596 * Put tau, D and E in the right places
597 * Check signs
598 * Place all the arrays in WORK, control their placement
599 * in memory.
600 *
601 *
602 *
603 * Loop invariants
604 * A(LIIP1, LIJ) points to the first element of A(I+1,J)
605 * NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
606 * A(LII:N,LIJ:N) is one step out of date.
607 * proc( CURROW, CURCOL ) owns A(LII,LIJ)
608 * proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
609 *
610  inh = 1
611 *
612  IF( interleave ) THEN
613 *
614 * H and V are interleaved to minimize memory movement
615 * LDV has to be twice as large to accomodate interleaving.
616 * In addition, LDV is doubled again to allow v, h and
617 * toptau to be spreaad across and transposed in a
618 * single communication operation with minimum memory
619 * movement.
620 *
621 * We could reduce LDV back to 2*MAX(NPM1,NQM1)
622 * by increasing the memory movement required in
623 * the spread and transpose of v, h and toptau.
624 * However, since the non-interleaved path already
625 * provides a mear minimum memory requirement option,
626 * we did not provide this additional path.
627 *
628  ldv = 4*( max( npm1, nqm1 ) ) + 2
629 *
630  inh = 1
631 *
632  inv = inh + ldv / 2
633  invt = inh + ( anb+1 )*ldv
634 *
635  inht = invt + ldv / 2
636  intmp = invt + ldv*( anb+1 )
637 *
638  ELSE
639  ldv = max( npm1, nqm1 )
640 *
641  inht = inh + ldv*( anb+1 )
642  inv = inht + ldv*( anb+1 )
643 *
644 * The code works without this +1, but only because of a
645 * coincidence. Without the +1, WORK(INVT) gets trashed, but
646 * WORK(INVT) is only used once and when it is used, it is
647 * multiplied by WORK( INH ) which is zero. Hence, the fact
648 * that WORK(INVT) is trashed has no effect.
649 *
650  invt = inv + ldv*( anb+1 ) + 1
651  intmp = invt + ldv*( 2*anb )
652 *
653  END IF
654 *
655  IF( info.NE.0 ) THEN
656  CALL pxerbla( ictxt, 'PSSYTTRD', -info )
657  work( 1 ) = real( lwmin )
658  RETURN
659  END IF
660 *
661 *
662 * The satisfies the loop invariant: trueA = A - V * HT - H * VT,
663 * (where V, H, VT and HT all have BINDEX+1 rows/columns)
664 * the first ANB times through the loop.
665 *
666 *
667 *
668 * Setting either ( InH and InHT ) or InV to Z_ZERO
669 * is adequate except in the face of NaNs.
670 *
671 *
672  DO 10 i = 1, np
673  work( inh+i-1 ) = z_zero
674  work( inv+i-1 ) = z_zero
675  10 CONTINUE
676  DO 20 i = 1, nq
677  work( inht+i-1 ) = z_zero
678  20 CONTINUE
679 *
680 *
681 *
682  topnv = z_zero
683 *
684  ltlip1 = lijp1
685  ltnm1 = npm1
686  IF( mycol.GT.myrow ) THEN
687  ltlip1 = ltlip1 + 1
688  ltnm1 = ltnm1 - 1
689  END IF
690 *
691 *
692  DO 210 minindex = 1, n - 1, anb
693 *
694 *
695  maxindex = min( minindex+anb-1, n )
696  lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
697  liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
698 *
699  nqb = nq - lijb + 1
700  npb = np - liib + 1
701  inhtb = inht + lijb - 1
702  invtb = invt + lijb - 1
703  inhb = inh + liib - 1
704  invb = inv + liib - 1
705 *
706 *
707 *
708 *
709  DO 160 index = minindex, min( maxindex, n-1 )
710 *
711  bindex = index - minindex
712 *
713  currow = nxtrow
714  curcol = nxtcol
715 *
716  nxtrow = mod( currow+1, nprow )
717  nxtcol = mod( curcol+1, npcol )
718 *
719  lii = liip1
720  lij = lijp1
721  npm0 = npm1
722 *
723  IF( myrow.EQ.currow ) THEN
724  npm1 = npm1 - 1
725  liip1 = liip1 + 1
726  END IF
727  IF( mycol.EQ.curcol ) THEN
728  nqm1 = nqm1 - 1
729  lijp1 = lijp1 + 1
730  ltlip1 = ltlip1 + 1
731  ltnm1 = ltnm1 - 1
732  END IF
733 *
734 *
735 *
736 *
737 * V = NV, VT = NVT, H = NH, HT = NHT
738 *
739 *
740 * Update the current column of A
741 *
742 *
743  IF( mycol.EQ.curcol ) THEN
744 *
745  indexa = lii + ( lij-1 )*lda
746  indexinv = inv + lii - 1 + ( bindex-1 )*ldv
747  indexinh = inh + lii - 1 + ( bindex-1 )*ldv
748  ttoph = work( inht+lij-1+bindex*ldv )
749  ttopv = topnv
750 *
751  IF( index.GT.1 ) THEN
752  DO 30 i = 0, npm0 - 1
753 * A( INDEXA+I ) = A( INDEXA+I )
754  a( indexa+i ) = a( indexa+i ) -
755  $ work( indexinv+ldv+i )*ttoph -
756  $ work( indexinh+ldv+i )*ttopv
757  30 CONTINUE
758  END IF
759 *
760 *
761  END IF
762 *
763 *
764  IF( mycol.EQ.curcol ) THEN
765 *
766 * Compute the householder vector
767 *
768  IF( myrow.EQ.currow ) THEN
769  dtmp( 2 ) = a( lii+( lij-1 )*lda )
770  ELSE
771  dtmp( 2 ) = zero
772  END IF
773  IF( myrow.EQ.nxtrow ) THEN
774  dtmp( 3 ) = a( liip1+( lij-1 )*lda )
775  dtmp( 4 ) = zero
776  ELSE
777  dtmp( 3 ) = zero
778  dtmp( 4 ) = zero
779  END IF
780 *
781  norm = snrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
782  dtmp( 1 ) = norm
783 *
784 * IF DTMP(5) = 1.0, NORM is too large and might cause
785 * overflow, hence PSTREECOMB must be called. IF DTMP(5)
786 * is zero on output, DTMP(1) can be trusted.
787 *
788  dtmp( 5 ) = zero
789  IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin ) THEN
790  dtmp( 5 ) = one
791  dtmp( 1 ) = zero
792  END IF
793 *
794  dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
795  CALL sgsum2d( ictxt, 'C', ' ', 5, 1, dtmp, 5, -1,
796  $ curcol )
797  IF( dtmp( 5 ).EQ.zero ) THEN
798  dtmp( 1 ) = sqrt( dtmp( 1 ) )
799  ELSE
800  dtmp( 1 ) = norm
801  CALL pstreecomb( ictxt, 'C', 1, dtmp, -1, mycol,
802  $ scombnrm2 )
803  END IF
804 *
805  norm = dtmp( 1 )
806 *
807  d( lij ) = dtmp( 2 )
808  IF( myrow.EQ.currow .AND. mycol.EQ.curcol ) THEN
809  a( lii+( lij-1 )*lda ) = d( lij )
810  END IF
811 *
812 *
813  alpha = dtmp( 3 )
814 *
815  norm = sign( norm, alpha )
816 *
817  IF( norm.EQ.zero ) THEN
818  toptau = zero
819  ELSE
820  beta = norm + alpha
821  toptau = beta / norm
822  oneoverbeta = 1.0e0 / beta
823 *
824  CALL sscal( npm1, oneoverbeta,
825  $ a( liip1+( lij-1 )*lda ), 1 )
826  END IF
827 *
828  IF( myrow.EQ.nxtrow ) THEN
829  a( liip1+( lij-1 )*lda ) = z_one
830  END IF
831 *
832  tau( lij ) = toptau
833  e( lij ) = -norm
834 *
835  END IF
836 *
837 *
838 * Spread v, nh, toptau across
839 *
840  DO 40 i = 0, npm1 - 1
841  work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
842  $ ( lij-1 )*lda )
843  40 CONTINUE
844 *
845  IF( mycol.EQ.curcol ) THEN
846  work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
847  CALL sgebs2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
848  $ work( inv+liip1-1+bindex*ldv ),
849  $ npm1+npm1+1 )
850  ELSE
851  CALL sgebr2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
852  $ work( inv+liip1-1+bindex*ldv ),
853  $ npm1+npm1+1, myrow, curcol )
854  toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
855  END IF
856  DO 50 i = 0, npm1 - 1
857  work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
858  $ 1+bindex*ldv+npm1+i )
859  50 CONTINUE
860 *
861  IF( index.LT.n ) THEN
862  IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
863  $ a( liip1+( lij-1 )*lda ) = e( lij )
864  END IF
865 *
866 * Transpose v, nh
867 *
868 *
869  IF( myrow.EQ.mycol ) THEN
870  DO 60 i = 0, npm1 + npm1
871  work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
872  $ bindex*ldv+i )
873  60 CONTINUE
874  ELSE
875  CALL sgesd2d( ictxt, npm1+npm1, 1,
876  $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
877  $ mycol, myrow )
878  CALL sgerv2d( ictxt, nqm1+nqm1, 1,
879  $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
880  $ mycol, myrow )
881  END IF
882 *
883  DO 70 i = 0, nqm1 - 1
884  work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
885  $ lijp1-1+bindex*ldv+nqm1+i )
886  70 CONTINUE
887 *
888 *
889 * Update the current block column of A
890 *
891  IF( index.GT.1 ) THEN
892  DO 90 j = lijp1, lijb - 1
893  DO 80 i = 0, npm1 - 1
894 *
895  a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
896  $ - work( inv+liip1-1+bindex*ldv+i )*
897  $ work( inht+j-1+bindex*ldv ) -
898  $ work( inh+liip1-1+bindex*ldv+i )*
899  $ work( invt+j-1+bindex*ldv )
900  80 CONTINUE
901  90 CONTINUE
902  END IF
903 *
904 *
905 *
906 * Compute NV = A * NHT; NVT = A * NH
907 *
908 * These two lines are necessary because these elements
909 * are not always involved in the calls to STRMVT
910 * for two reasons:
911 * 1) On diagonal processors, the call to TRMVT
912 * involves only LTNM1-1 elements
913 * 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
914 * and when the results are combined across all processes,
915 * uninitialized values may be included.
916  work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
917  work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
918 *
919 *
920  IF( myrow.EQ.mycol ) THEN
921  IF( ltnm1.GT.1 ) THEN
922  CALL strmvt( 'L', ltnm1-1,
923  $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
924  $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
925  $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
926  $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
927  $ ldv ), 1, work( inht+lijp1-1+( bindex+
928  $ 1 )*ldv ), 1 )
929  END IF
930  DO 100 i = 1, ltnm1
931  work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
932  $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
933  $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
934  $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
935  100 CONTINUE
936  ELSE
937  IF( ltnm1.GT.0 )
938  $ CALL strmvt( 'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
939  $ lda, work( invt+lijp1-1+( bindex+1 )*
940  $ ldv ), 1, work( inh+ltlip1-1+( bindex+
941  $ 1 )*ldv ), 1, work( inv+ltlip1-1+
942  $ ( bindex+1 )*ldv ), 1,
943  $ work( inht+lijp1-1+( bindex+1 )*ldv ),
944  $ 1 )
945 *
946  END IF
947 *
948 *
949 * We take advantage of the fact that:
950 * A * sum( B ) = sum ( A * B ) for matrices A,B
951 *
952 * trueA = A + V * HT + H * VT
953 * hence: (trueA)v = Av' + V * HT * v + H * VT * v
954 * VT * v = sum_p_in_NPROW ( VTp * v )
955 * H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
956 *
957 * v = v + V * HT * h + H * VT * h
958 *
959 *
960 *
961 * tmp = HT * nh1
962  DO 110 i = 1, 2*( bindex+1 )
963  work( intmp-1+i ) = 0
964  110 CONTINUE
965 *
966  IF( balanced ) THEN
967  npset = nprow
968  mysetnum = myrow
969  rowsperproc = iceil( nqb, npset )
970  myfirstrow = min( nqb+1, 1+rowsperproc*mysetnum )
971  numrows = min( rowsperproc, nqb-myfirstrow+1 )
972 *
973 *
974 * tmp = HT * v
975 *
976  CALL sgemv( 'C', numrows, bindex+1, z_one,
977  $ work( inhtb+myfirstrow-1 ), ldv,
978  $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
979  $ 1, z_zero, work( intmp ), 1 )
980 * tmp2 = VT * v
981  CALL sgemv( 'C', numrows, bindex+1, z_one,
982  $ work( invtb+myfirstrow-1 ), ldv,
983  $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
984  $ 1, z_zero, work( intmp+bindex+1 ), 1 )
985 *
986 *
987  CALL sgsum2d( ictxt, 'C', ' ', 2*( bindex+1 ), 1,
988  $ work( intmp ), 2*( bindex+1 ), -1, -1 )
989  ELSE
990 * tmp = HT * v
991 *
992  CALL sgemv( 'C', nqb, bindex+1, z_one, work( inhtb ),
993  $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
994  $ z_zero, work( intmp ), 1 )
995 * tmp2 = VT * v
996  CALL sgemv( 'C', nqb, bindex+1, z_one, work( invtb ),
997  $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
998  $ z_zero, work( intmp+bindex+1 ), 1 )
999 *
1000  END IF
1001 *
1002 *
1003 *
1004  IF( balanced ) THEN
1005  mysetnum = mycol
1006 *
1007  rowsperproc = iceil( npb, npset )
1008  myfirstrow = min( npb+1, 1+rowsperproc*mysetnum )
1009  numrows = min( rowsperproc, npb-myfirstrow+1 )
1010 *
1011  CALL sgsum2d( ictxt, 'R', ' ', 2*( bindex+1 ), 1,
1012  $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1013 *
1014 *
1015 * v = v + V * tmp
1016  IF( index.GT.1. ) THEN
1017  CALL sgemv( 'N', numrows, bindex+1, z_negone,
1018  $ work( invb+myfirstrow-1 ), ldv,
1019  $ work( intmp ), 1, z_one,
1020  $ work( invb+myfirstrow-1+( bindex+1 )*
1021  $ ldv ), 1 )
1022 *
1023 * v = v + H * tmp2
1024  CALL sgemv( 'N', numrows, bindex+1, z_negone,
1025  $ work( inhb+myfirstrow-1 ), ldv,
1026  $ work( intmp+bindex+1 ), 1, z_one,
1027  $ work( invb+myfirstrow-1+( bindex+1 )*
1028  $ ldv ), 1 )
1029  END IF
1030 *
1031  ELSE
1032 * v = v + V * tmp
1033  CALL sgemv( 'N', npb, bindex+1, z_negone, work( invb ),
1034  $ ldv, work( intmp ), 1, z_one,
1035  $ work( invb+( bindex+1 )*ldv ), 1 )
1036 *
1037 *
1038 * v = v + H * tmp2
1039  CALL sgemv( 'N', npb, bindex+1, z_negone, work( inhb ),
1040  $ ldv, work( intmp+bindex+1 ), 1, z_one,
1041  $ work( invb+( bindex+1 )*ldv ), 1 )
1042 *
1043  END IF
1044 *
1045 *
1046 * Transpose NV and add it back into NVT
1047 *
1048  IF( myrow.EQ.mycol ) THEN
1049  DO 120 i = 0, nqm1 - 1
1050  work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1051  $ i )
1052  120 CONTINUE
1053  ELSE
1054  CALL sgesd2d( ictxt, nqm1, 1,
1055  $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1056  $ nqm1, mycol, myrow )
1057  CALL sgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1058  $ myrow )
1059 *
1060  END IF
1061  DO 130 i = 0, npm1 - 1
1062  work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1063  $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1064  130 CONTINUE
1065 *
1066 * Sum-to-one NV rowwise (within a row)
1067 *
1068  CALL sgsum2d( ictxt, 'R', ' ', npm1, 1,
1069  $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1070  $ myrow, nxtcol )
1071 *
1072 *
1073 * Dot product c = NV * NH
1074 * Sum-to-all c within next processor column
1075 *
1076 *
1077  IF( mycol.EQ.nxtcol ) THEN
1078  cc( 1 ) = z_zero
1079  DO 140 i = 0, npm1 - 1
1080  cc( 1 ) = cc( 1 ) + work( inv+liip1-1+( bindex+1 )*
1081  $ ldv+i )*work( inh+liip1-1+( bindex+1 )*ldv+
1082  $ i )
1083  140 CONTINUE
1084  IF( myrow.EQ.nxtrow ) THEN
1085  cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1086  cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1087  ELSE
1088  cc( 2 ) = z_zero
1089  cc( 3 ) = z_zero
1090  END IF
1091  CALL sgsum2d( ictxt, 'C', ' ', 3, 1, cc, 3, -1, nxtcol )
1092 *
1093  topv = cc( 2 )
1094  c = cc( 1 )
1095  toph = cc( 3 )
1096 *
1097  topnv = toptau*( topv-c*toptau / 2*toph )
1098 *
1099 *
1100 * Compute V = Tau * (V - C * Tau' / 2 * H )
1101 *
1102 *
1103  DO 150 i = 0, npm1 - 1
1104  work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1105  $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*toptau /
1106  $ 2*work( inh+liip1-1+( bindex+1 )*ldv+i ) )
1107  150 CONTINUE
1108 *
1109  END IF
1110 *
1111 *
1112  160 CONTINUE
1113 *
1114 *
1115 * Perform the rank2k update
1116 *
1117  IF( maxindex.LT.n ) THEN
1118 *
1119  DO 170 i = 0, npm1 - 1
1120  work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1121  170 CONTINUE
1122 *
1123 *
1124 *
1125  IF( .NOT.twogemms ) THEN
1126  IF( interleave ) THEN
1127  ldzg = ldv / 2
1128  ELSE
1129  CALL slamov( 'A', ltnm1, anb, work( inht+lijp1-1 ),
1130  $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1131 *
1132  CALL slamov( 'A', ltnm1, anb, work( inv+ltlip1-1 ),
1133  $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1134  ldzg = ldv
1135  END IF
1136  nbzg = anb*2
1137  ELSE
1138  ldzg = ldv
1139  nbzg = anb
1140  END IF
1141 *
1142 *
1143  DO 180 pbmin = 1, ltnm1, pnb
1144 *
1145  pbsize = min( pnb, ltnm1-pbmin+1 )
1146  pbmax = min( ltnm1, pbmin+pnb-1 )
1147  CALL sgemm( 'N', 'C', pbsize, pbmax, nbzg, z_negone,
1148  $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1149  $ work( invt+lijp1-1 ), ldzg, z_one,
1150  $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1151  IF( twogemms ) THEN
1152  CALL sgemm( 'N', 'C', pbsize, pbmax, anb, z_negone,
1153  $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1154  $ work( inht+lijp1-1 ), ldzg, z_one,
1155  $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1156  END IF
1157  180 CONTINUE
1158 *
1159 *
1160 *
1161  DO 190 i = 0, npm1 - 1
1162  work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1163  work( inh+liip1-1+i ) = work( intmp+i )
1164  190 CONTINUE
1165  DO 200 i = 0, nqm1 - 1
1166  work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1167  200 CONTINUE
1168 *
1169 *
1170  END IF
1171 *
1172 * End of the update A code
1173 *
1174  210 CONTINUE
1175 *
1176  IF( mycol.EQ.nxtcol ) THEN
1177  IF( myrow.EQ.nxtrow ) THEN
1178 *
1179  d( nq ) = a( np+( nq-1 )*lda )
1180 *
1181  CALL sgebs2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1 )
1182  ELSE
1183  CALL sgebr2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1, nxtrow,
1184  $ nxtcol )
1185  END IF
1186  END IF
1187 *
1188 *
1189 *
1190 *
1191  work( 1 ) = real( lwmin )
1192  RETURN
1193 *
1194 * End of PSSYTTRD
1195 *
1196 *
1197  END
strmvt
subroutine strmvt(UPLO, N, T, LDT, X, INCX, Y, INCY, W, INCW, Z, INCZ)
Definition: strmvt.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pstreecomb
subroutine pstreecomb(ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, SUBPTR)
Definition: pstreecomb.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pssyttrd
subroutine pssyttrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pssyttrd.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
scombnrm2
subroutine scombnrm2(X, Y)
Definition: pstreecomb.f:306
min
#define min(A, B)
Definition: pcgemr.c:181