LAPACK  3.8.0
LAPACK: Linear Algebra PACKage
dgghd3.f
Go to the documentation of this file.
1 *> \brief \b DGGHD3
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DGGHD3 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgghd3.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgghd3.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgghd3.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
22 * LDQ, Z, LDZ, WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER COMPQ, COMPZ
26 * INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
30 * $ Z( LDZ, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> DGGHD3 reduces a pair of real matrices (A,B) to generalized upper
40 *> Hessenberg form using orthogonal transformations, where A is a
41 *> general matrix and B is upper triangular. The form of the
42 *> generalized eigenvalue problem is
43 *> A*x = lambda*B*x,
44 *> and B is typically made upper triangular by computing its QR
45 *> factorization and moving the orthogonal matrix Q to the left side
46 *> of the equation.
47 *>
48 *> This subroutine simultaneously reduces A to a Hessenberg matrix H:
49 *> Q**T*A*Z = H
50 *> and transforms B to another upper triangular matrix T:
51 *> Q**T*B*Z = T
52 *> in order to reduce the problem to its standard form
53 *> H*y = lambda*T*y
54 *> where y = Z**T*x.
55 *>
56 *> The orthogonal matrices Q and Z are determined as products of Givens
57 *> rotations. They may either be formed explicitly, or they may be
58 *> postmultiplied into input matrices Q1 and Z1, so that
59 *>
60 *> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
61 *>
62 *> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
63 *>
64 *> If Q1 is the orthogonal matrix from the QR factorization of B in the
65 *> original equation A*x = lambda*B*x, then DGGHD3 reduces the original
66 *> problem to generalized Hessenberg form.
67 *>
68 *> This is a blocked variant of DGGHRD, using matrix-matrix
69 *> multiplications for parts of the computation to enhance performance.
70 *> \endverbatim
71 *
72 * Arguments:
73 * ==========
74 *
75 *> \param[in] COMPQ
76 *> \verbatim
77 *> COMPQ is CHARACTER*1
78 *> = 'N': do not compute Q;
79 *> = 'I': Q is initialized to the unit matrix, and the
80 *> orthogonal matrix Q is returned;
81 *> = 'V': Q must contain an orthogonal matrix Q1 on entry,
82 *> and the product Q1*Q is returned.
83 *> \endverbatim
84 *>
85 *> \param[in] COMPZ
86 *> \verbatim
87 *> COMPZ is CHARACTER*1
88 *> = 'N': do not compute Z;
89 *> = 'I': Z is initialized to the unit matrix, and the
90 *> orthogonal matrix Z is returned;
91 *> = 'V': Z must contain an orthogonal matrix Z1 on entry,
92 *> and the product Z1*Z is returned.
93 *> \endverbatim
94 *>
95 *> \param[in] N
96 *> \verbatim
97 *> N is INTEGER
98 *> The order of the matrices A and B. N >= 0.
99 *> \endverbatim
100 *>
101 *> \param[in] ILO
102 *> \verbatim
103 *> ILO is INTEGER
104 *> \endverbatim
105 *>
106 *> \param[in] IHI
107 *> \verbatim
108 *> IHI is INTEGER
109 *>
110 *> ILO and IHI mark the rows and columns of A which are to be
111 *> reduced. It is assumed that A is already upper triangular
112 *> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are
113 *> normally set by a previous call to DGGBAL; otherwise they
114 *> should be set to 1 and N respectively.
115 *> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
116 *> \endverbatim
117 *>
118 *> \param[in,out] A
119 *> \verbatim
120 *> A is DOUBLE PRECISION array, dimension (LDA, N)
121 *> On entry, the N-by-N general matrix to be reduced.
122 *> On exit, the upper triangle and the first subdiagonal of A
123 *> are overwritten with the upper Hessenberg matrix H, and the
124 *> rest is set to zero.
125 *> \endverbatim
126 *>
127 *> \param[in] LDA
128 *> \verbatim
129 *> LDA is INTEGER
130 *> The leading dimension of the array A. LDA >= max(1,N).
131 *> \endverbatim
132 *>
133 *> \param[in,out] B
134 *> \verbatim
135 *> B is DOUBLE PRECISION array, dimension (LDB, N)
136 *> On entry, the N-by-N upper triangular matrix B.
137 *> On exit, the upper triangular matrix T = Q**T B Z. The
138 *> elements below the diagonal are set to zero.
139 *> \endverbatim
140 *>
141 *> \param[in] LDB
142 *> \verbatim
143 *> LDB is INTEGER
144 *> The leading dimension of the array B. LDB >= max(1,N).
145 *> \endverbatim
146 *>
147 *> \param[in,out] Q
148 *> \verbatim
149 *> Q is DOUBLE PRECISION array, dimension (LDQ, N)
150 *> On entry, if COMPQ = 'V', the orthogonal matrix Q1,
151 *> typically from the QR factorization of B.
152 *> On exit, if COMPQ='I', the orthogonal matrix Q, and if
153 *> COMPQ = 'V', the product Q1*Q.
154 *> Not referenced if COMPQ='N'.
155 *> \endverbatim
156 *>
157 *> \param[in] LDQ
158 *> \verbatim
159 *> LDQ is INTEGER
160 *> The leading dimension of the array Q.
161 *> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
162 *> \endverbatim
163 *>
164 *> \param[in,out] Z
165 *> \verbatim
166 *> Z is DOUBLE PRECISION array, dimension (LDZ, N)
167 *> On entry, if COMPZ = 'V', the orthogonal matrix Z1.
168 *> On exit, if COMPZ='I', the orthogonal matrix Z, and if
169 *> COMPZ = 'V', the product Z1*Z.
170 *> Not referenced if COMPZ='N'.
171 *> \endverbatim
172 *>
173 *> \param[in] LDZ
174 *> \verbatim
175 *> LDZ is INTEGER
176 *> The leading dimension of the array Z.
177 *> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
178 *> \endverbatim
179 *>
180 *> \param[out] WORK
181 *> \verbatim
182 *> WORK is DOUBLE PRECISION array, dimension (LWORK)
183 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
184 *> \endverbatim
185 *>
186 *> \param[in] LWORK
187 *> \verbatim
188 *> LWORK is INTEGER
189 *> The length of the array WORK. LWORK >= 1.
190 *> For optimum performance LWORK >= 6*N*NB, where NB is the
191 *> optimal blocksize.
192 *>
193 *> If LWORK = -1, then a workspace query is assumed; the routine
194 *> only calculates the optimal size of the WORK array, returns
195 *> this value as the first entry of the WORK array, and no error
196 *> message related to LWORK is issued by XERBLA.
197 *> \endverbatim
198 *>
199 *> \param[out] INFO
200 *> \verbatim
201 *> INFO is INTEGER
202 *> = 0: successful exit.
203 *> < 0: if INFO = -i, the i-th argument had an illegal value.
204 *> \endverbatim
205 *
206 * Authors:
207 * ========
208 *
209 *> \author Univ. of Tennessee
210 *> \author Univ. of California Berkeley
211 *> \author Univ. of Colorado Denver
212 *> \author NAG Ltd.
213 *
214 *> \date January 2015
215 *
216 *> \ingroup doubleOTHERcomputational
217 *
218 *> \par Further Details:
219 * =====================
220 *>
221 *> \verbatim
222 *>
223 *> This routine reduces A to Hessenberg form and maintains B in
224 *> using a blocked variant of Moler and Stewart's original algorithm,
225 *> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
226 *> (BIT 2008).
227 *> \endverbatim
228 *>
229 * =====================================================================
230  SUBROUTINE dgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
231  $ LDQ, Z, LDZ, WORK, LWORK, INFO )
232 *
233 * -- LAPACK computational routine (version 3.8.0) --
234 * -- LAPACK is a software package provided by Univ. of Tennessee, --
235 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
236 * January 2015
237 *
238  IMPLICIT NONE
239 *
240 * .. Scalar Arguments ..
241  CHARACTER COMPQ, COMPZ
242  INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
243 * ..
244 * .. Array Arguments ..
245  DOUBLE PRECISION A( lda, * ), B( ldb, * ), Q( ldq, * ),
246  $ z( ldz, * ), work( * )
247 * ..
248 *
249 * =====================================================================
250 *
251 * .. Parameters ..
252  DOUBLE PRECISION ZERO, ONE
253  parameter( zero = 0.0d+0, one = 1.0d+0 )
254 * ..
255 * .. Local Scalars ..
256  LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
257  CHARACTER*1 COMPQ2, COMPZ2
258  INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
259  $ kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,
260  $ nh, nnb, nx, ppw, ppwo, pw, top, topq
261  DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
262 * ..
263 * .. External Functions ..
264  LOGICAL LSAME
265  INTEGER ILAENV
266  EXTERNAL ilaenv, lsame
267 * ..
268 * .. External Subroutines ..
269  EXTERNAL dgghrd, dlartg, dlaset, dorm22, drot, dgemm,
270  $ dgemv, dtrmv, dlacpy, xerbla
271 * ..
272 * .. Intrinsic Functions ..
273  INTRINSIC dble, max
274 * ..
275 * .. Executable Statements ..
276 *
277 * Decode and test the input parameters.
278 *
279  info = 0
280  nb = ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 )
281  lwkopt = max( 6*n*nb, 1 )
282  work( 1 ) = dble( lwkopt )
283  initq = lsame( compq, 'I' )
284  wantq = initq .OR. lsame( compq, 'V' )
285  initz = lsame( compz, 'I' )
286  wantz = initz .OR. lsame( compz, 'V' )
287  lquery = ( lwork.EQ.-1 )
288 *
289  IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
290  info = -1
291  ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
292  info = -2
293  ELSE IF( n.LT.0 ) THEN
294  info = -3
295  ELSE IF( ilo.LT.1 ) THEN
296  info = -4
297  ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
298  info = -5
299  ELSE IF( lda.LT.max( 1, n ) ) THEN
300  info = -7
301  ELSE IF( ldb.LT.max( 1, n ) ) THEN
302  info = -9
303  ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
304  info = -11
305  ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
306  info = -13
307  ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
308  info = -15
309  END IF
310  IF( info.NE.0 ) THEN
311  CALL xerbla( 'DGGHD3', -info )
312  RETURN
313  ELSE IF( lquery ) THEN
314  RETURN
315  END IF
316 *
317 * Initialize Q and Z if desired.
318 *
319  IF( initq )
320  $ CALL dlaset( 'All', n, n, zero, one, q, ldq )
321  IF( initz )
322  $ CALL dlaset( 'All', n, n, zero, one, z, ldz )
323 *
324 * Zero out lower triangle of B.
325 *
326  IF( n.GT.1 )
327  $ CALL dlaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
328 *
329 * Quick return if possible
330 *
331  nh = ihi - ilo + 1
332  IF( nh.LE.1 ) THEN
333  work( 1 ) = one
334  RETURN
335  END IF
336 *
337 * Determine the blocksize.
338 *
339  nbmin = ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 )
340  IF( nb.GT.1 .AND. nb.LT.nh ) THEN
341 *
342 * Determine when to use unblocked instead of blocked code.
343 *
344  nx = max( nb, ilaenv( 3, 'DGGHD3', ' ', n, ilo, ihi, -1 ) )
345  IF( nx.LT.nh ) THEN
346 *
347 * Determine if workspace is large enough for blocked code.
348 *
349  IF( lwork.LT.lwkopt ) THEN
350 *
351 * Not enough workspace to use optimal NB: determine the
352 * minimum value of NB, and reduce NB or force use of
353 * unblocked code.
354 *
355  nbmin = max( 2, ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi,
356  $ -1 ) )
357  IF( lwork.GE.6*n*nbmin ) THEN
358  nb = lwork / ( 6*n )
359  ELSE
360  nb = 1
361  END IF
362  END IF
363  END IF
364  END IF
365 *
366  IF( nb.LT.nbmin .OR. nb.GE.nh ) THEN
367 *
368 * Use unblocked code below
369 *
370  jcol = ilo
371 *
372  ELSE
373 *
374 * Use blocked code
375 *
376  kacc22 = ilaenv( 16, 'DGGHD3', ' ', n, ilo, ihi, -1 )
377  blk22 = kacc22.EQ.2
378  DO jcol = ilo, ihi-2, nb
379  nnb = min( nb, ihi-jcol-1 )
380 *
381 * Initialize small orthogonal factors that will hold the
382 * accumulated Givens rotations in workspace.
383 * N2NB denotes the number of 2*NNB-by-2*NNB factors
384 * NBLST denotes the (possibly smaller) order of the last
385 * factor.
386 *
387  n2nb = ( ihi-jcol-1 ) / nnb - 1
388  nblst = ihi - jcol - n2nb*nnb
389  CALL dlaset( 'All', nblst, nblst, zero, one, work, nblst )
390  pw = nblst * nblst + 1
391  DO i = 1, n2nb
392  CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
393  $ work( pw ), 2*nnb )
394  pw = pw + 4*nnb*nnb
395  END DO
396 *
397 * Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
398 *
399  DO j = jcol, jcol+nnb-1
400 *
401 * Reduce Jth column of A. Store cosines and sines in Jth
402 * column of A and B, respectively.
403 *
404  DO i = ihi, j+2, -1
405  temp = a( i-1, j )
406  CALL dlartg( temp, a( i, j ), c, s, a( i-1, j ) )
407  a( i, j ) = c
408  b( i, j ) = s
409  END DO
410 *
411 * Accumulate Givens rotations into workspace array.
412 *
413  ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
414  len = 2 + j - jcol
415  jrow = j + n2nb*nnb + 2
416  DO i = ihi, jrow, -1
417  c = a( i, j )
418  s = b( i, j )
419  DO jj = ppw, ppw+len-1
420  temp = work( jj + nblst )
421  work( jj + nblst ) = c*temp - s*work( jj )
422  work( jj ) = s*temp + c*work( jj )
423  END DO
424  len = len + 1
425  ppw = ppw - nblst - 1
426  END DO
427 *
428  ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
429  j0 = jrow - nnb
430  DO jrow = j0, j+2, -nnb
431  ppw = ppwo
432  len = 2 + j - jcol
433  DO i = jrow+nnb-1, jrow, -1
434  c = a( i, j )
435  s = b( i, j )
436  DO jj = ppw, ppw+len-1
437  temp = work( jj + 2*nnb )
438  work( jj + 2*nnb ) = c*temp - s*work( jj )
439  work( jj ) = s*temp + c*work( jj )
440  END DO
441  len = len + 1
442  ppw = ppw - 2*nnb - 1
443  END DO
444  ppwo = ppwo + 4*nnb*nnb
445  END DO
446 *
447 * TOP denotes the number of top rows in A and B that will
448 * not be updated during the next steps.
449 *
450  IF( jcol.LE.2 ) THEN
451  top = 0
452  ELSE
453  top = jcol
454  END IF
455 *
456 * Propagate transformations through B and replace stored
457 * left sines/cosines by right sines/cosines.
458 *
459  DO jj = n, j+1, -1
460 *
461 * Update JJth column of B.
462 *
463  DO i = min( jj+1, ihi ), j+2, -1
464  c = a( i, j )
465  s = b( i, j )
466  temp = b( i, jj )
467  b( i, jj ) = c*temp - s*b( i-1, jj )
468  b( i-1, jj ) = s*temp + c*b( i-1, jj )
469  END DO
470 *
471 * Annihilate B( JJ+1, JJ ).
472 *
473  IF( jj.LT.ihi ) THEN
474  temp = b( jj+1, jj+1 )
475  CALL dlartg( temp, b( jj+1, jj ), c, s,
476  $ b( jj+1, jj+1 ) )
477  b( jj+1, jj ) = zero
478  CALL drot( jj-top, b( top+1, jj+1 ), 1,
479  $ b( top+1, jj ), 1, c, s )
480  a( jj+1, j ) = c
481  b( jj+1, j ) = -s
482  END IF
483  END DO
484 *
485 * Update A by transformations from right.
486 * Explicit loop unrolling provides better performance
487 * compared to DLASR.
488 * CALL DLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
489 * $ IHI-J, A( J+2, J ), B( J+2, J ),
490 * $ A( TOP+1, J+1 ), LDA )
491 *
492  jj = mod( ihi-j-1, 3 )
493  DO i = ihi-j-3, jj+1, -3
494  c = a( j+1+i, j )
495  s = -b( j+1+i, j )
496  c1 = a( j+2+i, j )
497  s1 = -b( j+2+i, j )
498  c2 = a( j+3+i, j )
499  s2 = -b( j+3+i, j )
500 *
501  DO k = top+1, ihi
502  temp = a( k, j+i )
503  temp1 = a( k, j+i+1 )
504  temp2 = a( k, j+i+2 )
505  temp3 = a( k, j+i+3 )
506  a( k, j+i+3 ) = c2*temp3 + s2*temp2
507  temp2 = -s2*temp3 + c2*temp2
508  a( k, j+i+2 ) = c1*temp2 + s1*temp1
509  temp1 = -s1*temp2 + c1*temp1
510  a( k, j+i+1 ) = c*temp1 + s*temp
511  a( k, j+i ) = -s*temp1 + c*temp
512  END DO
513  END DO
514 *
515  IF( jj.GT.0 ) THEN
516  DO i = jj, 1, -1
517  CALL drot( ihi-top, a( top+1, j+i+1 ), 1,
518  $ a( top+1, j+i ), 1, a( j+1+i, j ),
519  $ -b( j+1+i, j ) )
520  END DO
521  END IF
522 *
523 * Update (J+1)th column of A by transformations from left.
524 *
525  IF ( j .LT. jcol + nnb - 1 ) THEN
526  len = 1 + j - jcol
527 *
528 * Multiply with the trailing accumulated orthogonal
529 * matrix, which takes the form
530 *
531 * [ U11 U12 ]
532 * U = [ ],
533 * [ U21 U22 ]
534 *
535 * where U21 is a LEN-by-LEN matrix and U12 is lower
536 * triangular.
537 *
538  jrow = ihi - nblst + 1
539  CALL dgemv( 'Transpose', nblst, len, one, work,
540  $ nblst, a( jrow, j+1 ), 1, zero,
541  $ work( pw ), 1 )
542  ppw = pw + len
543  DO i = jrow, jrow+nblst-len-1
544  work( ppw ) = a( i, j+1 )
545  ppw = ppw + 1
546  END DO
547  CALL dtrmv( 'Lower', 'Transpose', 'Non-unit',
548  $ nblst-len, work( len*nblst + 1 ), nblst,
549  $ work( pw+len ), 1 )
550  CALL dgemv( 'Transpose', len, nblst-len, one,
551  $ work( (len+1)*nblst - len + 1 ), nblst,
552  $ a( jrow+nblst-len, j+1 ), 1, one,
553  $ work( pw+len ), 1 )
554  ppw = pw
555  DO i = jrow, jrow+nblst-1
556  a( i, j+1 ) = work( ppw )
557  ppw = ppw + 1
558  END DO
559 *
560 * Multiply with the other accumulated orthogonal
561 * matrices, which take the form
562 *
563 * [ U11 U12 0 ]
564 * [ ]
565 * U = [ U21 U22 0 ],
566 * [ ]
567 * [ 0 0 I ]
568 *
569 * where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
570 * matrix, U21 is a LEN-by-LEN upper triangular matrix
571 * and U12 is an NNB-by-NNB lower triangular matrix.
572 *
573  ppwo = 1 + nblst*nblst
574  j0 = jrow - nnb
575  DO jrow = j0, jcol+1, -nnb
576  ppw = pw + len
577  DO i = jrow, jrow+nnb-1
578  work( ppw ) = a( i, j+1 )
579  ppw = ppw + 1
580  END DO
581  ppw = pw
582  DO i = jrow+nnb, jrow+nnb+len-1
583  work( ppw ) = a( i, j+1 )
584  ppw = ppw + 1
585  END DO
586  CALL dtrmv( 'Upper', 'Transpose', 'Non-unit', len,
587  $ work( ppwo + nnb ), 2*nnb, work( pw ),
588  $ 1 )
589  CALL dtrmv( 'Lower', 'Transpose', 'Non-unit', nnb,
590  $ work( ppwo + 2*len*nnb ),
591  $ 2*nnb, work( pw + len ), 1 )
592  CALL dgemv( 'Transpose', nnb, len, one,
593  $ work( ppwo ), 2*nnb, a( jrow, j+1 ), 1,
594  $ one, work( pw ), 1 )
595  CALL dgemv( 'Transpose', len, nnb, one,
596  $ work( ppwo + 2*len*nnb + nnb ), 2*nnb,
597  $ a( jrow+nnb, j+1 ), 1, one,
598  $ work( pw+len ), 1 )
599  ppw = pw
600  DO i = jrow, jrow+len+nnb-1
601  a( i, j+1 ) = work( ppw )
602  ppw = ppw + 1
603  END DO
604  ppwo = ppwo + 4*nnb*nnb
605  END DO
606  END IF
607  END DO
608 *
609 * Apply accumulated orthogonal matrices to A.
610 *
611  cola = n - jcol - nnb + 1
612  j = ihi - nblst + 1
613  CALL dgemm( 'Transpose', 'No Transpose', nblst,
614  $ cola, nblst, one, work, nblst,
615  $ a( j, jcol+nnb ), lda, zero, work( pw ),
616  $ nblst )
617  CALL dlacpy( 'All', nblst, cola, work( pw ), nblst,
618  $ a( j, jcol+nnb ), lda )
619  ppwo = nblst*nblst + 1
620  j0 = j - nnb
621  DO j = j0, jcol+1, -nnb
622  IF ( blk22 ) THEN
623 *
624 * Exploit the structure of
625 *
626 * [ U11 U12 ]
627 * U = [ ]
628 * [ U21 U22 ],
629 *
630 * where all blocks are NNB-by-NNB, U21 is upper
631 * triangular and U12 is lower triangular.
632 *
633  CALL dorm22( 'Left', 'Transpose', 2*nnb, cola, nnb,
634  $ nnb, work( ppwo ), 2*nnb,
635  $ a( j, jcol+nnb ), lda, work( pw ),
636  $ lwork-pw+1, ierr )
637  ELSE
638 *
639 * Ignore the structure of U.
640 *
641  CALL dgemm( 'Transpose', 'No Transpose', 2*nnb,
642  $ cola, 2*nnb, one, work( ppwo ), 2*nnb,
643  $ a( j, jcol+nnb ), lda, zero, work( pw ),
644  $ 2*nnb )
645  CALL dlacpy( 'All', 2*nnb, cola, work( pw ), 2*nnb,
646  $ a( j, jcol+nnb ), lda )
647  END IF
648  ppwo = ppwo + 4*nnb*nnb
649  END DO
650 *
651 * Apply accumulated orthogonal matrices to Q.
652 *
653  IF( wantq ) THEN
654  j = ihi - nblst + 1
655  IF ( initq ) THEN
656  topq = max( 2, j - jcol + 1 )
657  nh = ihi - topq + 1
658  ELSE
659  topq = 1
660  nh = n
661  END IF
662  CALL dgemm( 'No Transpose', 'No Transpose', nh,
663  $ nblst, nblst, one, q( topq, j ), ldq,
664  $ work, nblst, zero, work( pw ), nh )
665  CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
666  $ q( topq, j ), ldq )
667  ppwo = nblst*nblst + 1
668  j0 = j - nnb
669  DO j = j0, jcol+1, -nnb
670  IF ( initq ) THEN
671  topq = max( 2, j - jcol + 1 )
672  nh = ihi - topq + 1
673  END IF
674  IF ( blk22 ) THEN
675 *
676 * Exploit the structure of U.
677 *
678  CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
679  $ nnb, nnb, work( ppwo ), 2*nnb,
680  $ q( topq, j ), ldq, work( pw ),
681  $ lwork-pw+1, ierr )
682  ELSE
683 *
684 * Ignore the structure of U.
685 *
686  CALL dgemm( 'No Transpose', 'No Transpose', nh,
687  $ 2*nnb, 2*nnb, one, q( topq, j ), ldq,
688  $ work( ppwo ), 2*nnb, zero, work( pw ),
689  $ nh )
690  CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
691  $ q( topq, j ), ldq )
692  END IF
693  ppwo = ppwo + 4*nnb*nnb
694  END DO
695  END IF
696 *
697 * Accumulate right Givens rotations if required.
698 *
699  IF ( wantz .OR. top.GT.0 ) THEN
700 *
701 * Initialize small orthogonal factors that will hold the
702 * accumulated Givens rotations in workspace.
703 *
704  CALL dlaset( 'All', nblst, nblst, zero, one, work,
705  $ nblst )
706  pw = nblst * nblst + 1
707  DO i = 1, n2nb
708  CALL dlaset( 'All', 2*nnb, 2*nnb, zero, one,
709  $ work( pw ), 2*nnb )
710  pw = pw + 4*nnb*nnb
711  END DO
712 *
713 * Accumulate Givens rotations into workspace array.
714 *
715  DO j = jcol, jcol+nnb-1
716  ppw = ( nblst + 1 )*( nblst - 2 ) - j + jcol + 1
717  len = 2 + j - jcol
718  jrow = j + n2nb*nnb + 2
719  DO i = ihi, jrow, -1
720  c = a( i, j )
721  a( i, j ) = zero
722  s = b( i, j )
723  b( i, j ) = zero
724  DO jj = ppw, ppw+len-1
725  temp = work( jj + nblst )
726  work( jj + nblst ) = c*temp - s*work( jj )
727  work( jj ) = s*temp + c*work( jj )
728  END DO
729  len = len + 1
730  ppw = ppw - nblst - 1
731  END DO
732 *
733  ppwo = nblst*nblst + ( nnb+j-jcol-1 )*2*nnb + nnb
734  j0 = jrow - nnb
735  DO jrow = j0, j+2, -nnb
736  ppw = ppwo
737  len = 2 + j - jcol
738  DO i = jrow+nnb-1, jrow, -1
739  c = a( i, j )
740  a( i, j ) = zero
741  s = b( i, j )
742  b( i, j ) = zero
743  DO jj = ppw, ppw+len-1
744  temp = work( jj + 2*nnb )
745  work( jj + 2*nnb ) = c*temp - s*work( jj )
746  work( jj ) = s*temp + c*work( jj )
747  END DO
748  len = len + 1
749  ppw = ppw - 2*nnb - 1
750  END DO
751  ppwo = ppwo + 4*nnb*nnb
752  END DO
753  END DO
754  ELSE
755 *
756  CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
757  $ a( jcol + 2, jcol ), lda )
758  CALL dlaset( 'Lower', ihi - jcol - 1, nnb, zero, zero,
759  $ b( jcol + 2, jcol ), ldb )
760  END IF
761 *
762 * Apply accumulated orthogonal matrices to A and B.
763 *
764  IF ( top.GT.0 ) THEN
765  j = ihi - nblst + 1
766  CALL dgemm( 'No Transpose', 'No Transpose', top,
767  $ nblst, nblst, one, a( 1, j ), lda,
768  $ work, nblst, zero, work( pw ), top )
769  CALL dlacpy( 'All', top, nblst, work( pw ), top,
770  $ a( 1, j ), lda )
771  ppwo = nblst*nblst + 1
772  j0 = j - nnb
773  DO j = j0, jcol+1, -nnb
774  IF ( blk22 ) THEN
775 *
776 * Exploit the structure of U.
777 *
778  CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,
779  $ nnb, nnb, work( ppwo ), 2*nnb,
780  $ a( 1, j ), lda, work( pw ),
781  $ lwork-pw+1, ierr )
782  ELSE
783 *
784 * Ignore the structure of U.
785 *
786  CALL dgemm( 'No Transpose', 'No Transpose', top,
787  $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
788  $ work( ppwo ), 2*nnb, zero,
789  $ work( pw ), top )
790  CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
791  $ a( 1, j ), lda )
792  END IF
793  ppwo = ppwo + 4*nnb*nnb
794  END DO
795 *
796  j = ihi - nblst + 1
797  CALL dgemm( 'No Transpose', 'No Transpose', top,
798  $ nblst, nblst, one, b( 1, j ), ldb,
799  $ work, nblst, zero, work( pw ), top )
800  CALL dlacpy( 'All', top, nblst, work( pw ), top,
801  $ b( 1, j ), ldb )
802  ppwo = nblst*nblst + 1
803  j0 = j - nnb
804  DO j = j0, jcol+1, -nnb
805  IF ( blk22 ) THEN
806 *
807 * Exploit the structure of U.
808 *
809  CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,
810  $ nnb, nnb, work( ppwo ), 2*nnb,
811  $ b( 1, j ), ldb, work( pw ),
812  $ lwork-pw+1, ierr )
813  ELSE
814 *
815 * Ignore the structure of U.
816 *
817  CALL dgemm( 'No Transpose', 'No Transpose', top,
818  $ 2*nnb, 2*nnb, one, b( 1, j ), ldb,
819  $ work( ppwo ), 2*nnb, zero,
820  $ work( pw ), top )
821  CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
822  $ b( 1, j ), ldb )
823  END IF
824  ppwo = ppwo + 4*nnb*nnb
825  END DO
826  END IF
827 *
828 * Apply accumulated orthogonal matrices to Z.
829 *
830  IF( wantz ) THEN
831  j = ihi - nblst + 1
832  IF ( initq ) THEN
833  topq = max( 2, j - jcol + 1 )
834  nh = ihi - topq + 1
835  ELSE
836  topq = 1
837  nh = n
838  END IF
839  CALL dgemm( 'No Transpose', 'No Transpose', nh,
840  $ nblst, nblst, one, z( topq, j ), ldz,
841  $ work, nblst, zero, work( pw ), nh )
842  CALL dlacpy( 'All', nh, nblst, work( pw ), nh,
843  $ z( topq, j ), ldz )
844  ppwo = nblst*nblst + 1
845  j0 = j - nnb
846  DO j = j0, jcol+1, -nnb
847  IF ( initq ) THEN
848  topq = max( 2, j - jcol + 1 )
849  nh = ihi - topq + 1
850  END IF
851  IF ( blk22 ) THEN
852 *
853 * Exploit the structure of U.
854 *
855  CALL dorm22( 'Right', 'No Transpose', nh, 2*nnb,
856  $ nnb, nnb, work( ppwo ), 2*nnb,
857  $ z( topq, j ), ldz, work( pw ),
858  $ lwork-pw+1, ierr )
859  ELSE
860 *
861 * Ignore the structure of U.
862 *
863  CALL dgemm( 'No Transpose', 'No Transpose', nh,
864  $ 2*nnb, 2*nnb, one, z( topq, j ), ldz,
865  $ work( ppwo ), 2*nnb, zero, work( pw ),
866  $ nh )
867  CALL dlacpy( 'All', nh, 2*nnb, work( pw ), nh,
868  $ z( topq, j ), ldz )
869  END IF
870  ppwo = ppwo + 4*nnb*nnb
871  END DO
872  END IF
873  END DO
874  END IF
875 *
876 * Use unblocked code to reduce the rest of the matrix
877 * Avoid re-initialization of modified Q and Z.
878 *
879  compq2 = compq
880  compz2 = compz
881  IF ( jcol.NE.ilo ) THEN
882  IF ( wantq )
883  $ compq2 = 'V'
884  IF ( wantz )
885  $ compz2 = 'V'
886  END IF
887 *
888  IF ( jcol.LT.ihi )
889  $ CALL dgghrd( compq2, compz2, n, jcol, ihi, a, lda, b, ldb, q,
890  $ ldq, z, ldz, ierr )
891  work( 1 ) = dble( lwkopt )
892 *
893  RETURN
894 *
895 * End of DGGHD3
896 *
897  END
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:105
subroutine dgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
DGEMV
Definition: dgemv.f:158
subroutine dgghrd(COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, LDQ, Z, LDZ, INFO)
DGGHRD
Definition: dgghrd.f:209
subroutine drot(N, DX, INCX, DY, INCY, C, S)
DROT
Definition: drot.f:94
subroutine dlartg(F, G, CS, SN, R)
DLARTG generates a plane rotation with real cosine and real sine.
Definition: dlartg.f:99
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: dlaset.f:112
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:189
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine dorm22(SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, WORK, LWORK, INFO)
DORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition: dorm22.f:165
subroutine dgghd3(COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, LDQ, Z, LDZ, WORK, LWORK, INFO)
DGGHD3
Definition: dgghd3.f:232
subroutine dtrmv(UPLO, TRANS, DIAG, N, A, LDA, X, INCX)
DTRMV
Definition: dtrmv.f:149