LAPACK  3.10.1
LAPACK: Linear Algebra PACKage
chb2st_kernels.f
Go to the documentation of this file.
1 *> \brief \b CHB2ST_KERNELS
2 *
3 * @generated from zhb2st_kernels.f, fortran z -> c, Wed Dec 7 08:22:40 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download CHB2ST_KERNELS + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chb2st_kernels.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chb2st_kernels.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chb2st_kernels.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE CHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24 * ST, ED, SWEEP, N, NB, IB,
25 * A, LDA, V, TAU, LDVT, WORK)
26 *
27 * IMPLICIT NONE
28 *
29 * .. Scalar Arguments ..
30 * CHARACTER UPLO
31 * LOGICAL WANTZ
32 * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33 * ..
34 * .. Array Arguments ..
35 * COMPLEX A( LDA, * ), V( * ),
36 * TAU( * ), WORK( * )
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST
44 *> subroutine.
45 *> \endverbatim
46 *
47 * Arguments:
48 * ==========
49 *
50 *> \param[in] UPLO
51 *> \verbatim
52 *> UPLO is CHARACTER*1
53 *> \endverbatim
54 *>
55 *> \param[in] WANTZ
56 *> \verbatim
57 *> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58 *> Eigenvalue/Eigenvectors.
59 *> \endverbatim
60 *>
61 *> \param[in] TTYPE
62 *> \verbatim
63 *> TTYPE is INTEGER
64 *> \endverbatim
65 *>
66 *> \param[in] ST
67 *> \verbatim
68 *> ST is INTEGER
69 *> internal parameter for indices.
70 *> \endverbatim
71 *>
72 *> \param[in] ED
73 *> \verbatim
74 *> ED is INTEGER
75 *> internal parameter for indices.
76 *> \endverbatim
77 *>
78 *> \param[in] SWEEP
79 *> \verbatim
80 *> SWEEP is INTEGER
81 *> internal parameter for indices.
82 *> \endverbatim
83 *>
84 *> \param[in] N
85 *> \verbatim
86 *> N is INTEGER. The order of the matrix A.
87 *> \endverbatim
88 *>
89 *> \param[in] NB
90 *> \verbatim
91 *> NB is INTEGER. The size of the band.
92 *> \endverbatim
93 *>
94 *> \param[in] IB
95 *> \verbatim
96 *> IB is INTEGER.
97 *> \endverbatim
98 *>
99 *> \param[in, out] A
100 *> \verbatim
101 *> A is COMPLEX array. A pointer to the matrix A.
102 *> \endverbatim
103 *>
104 *> \param[in] LDA
105 *> \verbatim
106 *> LDA is INTEGER. The leading dimension of the matrix A.
107 *> \endverbatim
108 *>
109 *> \param[out] V
110 *> \verbatim
111 *> V is COMPLEX array, dimension 2*n if eigenvalues only are
112 *> requested or to be queried for vectors.
113 *> \endverbatim
114 *>
115 *> \param[out] TAU
116 *> \verbatim
117 *> TAU is COMPLEX array, dimension (2*n).
118 *> The scalar factors of the Householder reflectors are stored
119 *> in this array.
120 *> \endverbatim
121 *>
122 *> \param[in] LDVT
123 *> \verbatim
124 *> LDVT is INTEGER.
125 *> \endverbatim
126 *>
127 *> \param[out] WORK
128 *> \verbatim
129 *> WORK is COMPLEX array. Workspace of size nb.
130 *> \endverbatim
131 *>
132 *> \par Further Details:
133 * =====================
134 *>
135 *> \verbatim
136 *>
137 *> Implemented by Azzam Haidar.
138 *>
139 *> All details are available on technical report, SC11, SC13 papers.
140 *>
141 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
142 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
143 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
144 *> of 2011 International Conference for High Performance Computing,
145 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
146 *> Article 8 , 11 pages.
147 *> http://doi.acm.org/10.1145/2063384.2063394
148 *>
149 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
150 *> An improved parallel singular value algorithm and its implementation
151 *> for multicore hardware, In Proceedings of 2013 International Conference
152 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
153 *> Denver, Colorado, USA, 2013.
154 *> Article 90, 12 pages.
155 *> http://doi.acm.org/10.1145/2503210.2503292
156 *>
157 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
158 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
159 *> calculations based on fine-grained memory aware tasks.
160 *> International Journal of High Performance Computing Applications.
161 *> Volume 28 Issue 2, Pages 196-209, May 2014.
162 *> http://hpc.sagepub.com/content/28/2/196
163 *>
164 *> \endverbatim
165 *>
166 * =====================================================================
167  SUBROUTINE chb2st_kernels( UPLO, WANTZ, TTYPE,
168  $ ST, ED, SWEEP, N, NB, IB,
169  $ A, LDA, V, TAU, LDVT, WORK)
170 *
171  IMPLICIT NONE
172 *
173 * -- LAPACK computational routine --
174 * -- LAPACK is a software package provided by Univ. of Tennessee, --
175 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176 *
177 * .. Scalar Arguments ..
178  CHARACTER UPLO
179  LOGICAL WANTZ
180  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
181 * ..
182 * .. Array Arguments ..
183  COMPLEX A( LDA, * ), V( * ),
184  $ TAU( * ), WORK( * )
185 * ..
186 *
187 * =====================================================================
188 *
189 * .. Parameters ..
190  COMPLEX ZERO, ONE
191  PARAMETER ( ZERO = ( 0.0e+0, 0.0e+0 ),
192  $ one = ( 1.0e+0, 0.0e+0 ) )
193 * ..
194 * .. Local Scalars ..
195  LOGICAL UPPER
196  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
197  $ dpos, ofdpos, ajeter
198  COMPLEX CTMP
199 * ..
200 * .. External Subroutines ..
201  EXTERNAL clarfg, clarfx, clarfy
202 * ..
203 * .. Intrinsic Functions ..
204  INTRINSIC conjg, mod
205 * .. External Functions ..
206  LOGICAL LSAME
207  EXTERNAL LSAME
208 * ..
209 * ..
210 * .. Executable Statements ..
211 *
212  ajeter = ib + ldvt
213  upper = lsame( uplo, 'U' )
214 
215  IF( upper ) THEN
216  dpos = 2 * nb + 1
217  ofdpos = 2 * nb
218  ELSE
219  dpos = 1
220  ofdpos = 2
221  ENDIF
222 
223 *
224 * Upper case
225 *
226  IF( upper ) THEN
227 *
228  IF( wantz ) THEN
229  vpos = mod( sweep-1, 2 ) * n + st
230  taupos = mod( sweep-1, 2 ) * n + st
231  ELSE
232  vpos = mod( sweep-1, 2 ) * n + st
233  taupos = mod( sweep-1, 2 ) * n + st
234  ENDIF
235 *
236  IF( ttype.EQ.1 ) THEN
237  lm = ed - st + 1
238 *
239  v( vpos ) = one
240  DO 10 i = 1, lm-1
241  v( vpos+i ) = conjg( a( ofdpos-i, st+i ) )
242  a( ofdpos-i, st+i ) = zero
243  10 CONTINUE
244  ctmp = conjg( a( ofdpos, st ) )
245  CALL clarfg( lm, ctmp, v( vpos+1 ), 1,
246  $ tau( taupos ) )
247  a( ofdpos, st ) = ctmp
248 *
249  lm = ed - st + 1
250  CALL clarfy( uplo, lm, v( vpos ), 1,
251  $ conjg( tau( taupos ) ),
252  $ a( dpos, st ), lda-1, work)
253  ENDIF
254 *
255  IF( ttype.EQ.3 ) THEN
256 *
257  lm = ed - st + 1
258  CALL clarfy( uplo, lm, v( vpos ), 1,
259  $ conjg( tau( taupos ) ),
260  $ a( dpos, st ), lda-1, work)
261  ENDIF
262 *
263  IF( ttype.EQ.2 ) THEN
264  j1 = ed+1
265  j2 = min( ed+nb, n )
266  ln = ed-st+1
267  lm = j2-j1+1
268  IF( lm.GT.0) THEN
269  CALL clarfx( 'Left', ln, lm, v( vpos ),
270  $ conjg( tau( taupos ) ),
271  $ a( dpos-nb, j1 ), lda-1, work)
272 *
273  IF( wantz ) THEN
274  vpos = mod( sweep-1, 2 ) * n + j1
275  taupos = mod( sweep-1, 2 ) * n + j1
276  ELSE
277  vpos = mod( sweep-1, 2 ) * n + j1
278  taupos = mod( sweep-1, 2 ) * n + j1
279  ENDIF
280 *
281  v( vpos ) = one
282  DO 30 i = 1, lm-1
283  v( vpos+i ) =
284  $ conjg( a( dpos-nb-i, j1+i ) )
285  a( dpos-nb-i, j1+i ) = zero
286  30 CONTINUE
287  ctmp = conjg( a( dpos-nb, j1 ) )
288  CALL clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
289  a( dpos-nb, j1 ) = ctmp
290 *
291  CALL clarfx( 'Right', ln-1, lm, v( vpos ),
292  $ tau( taupos ),
293  $ a( dpos-nb+1, j1 ), lda-1, work)
294  ENDIF
295  ENDIF
296 *
297 * Lower case
298 *
299  ELSE
300 *
301  IF( wantz ) THEN
302  vpos = mod( sweep-1, 2 ) * n + st
303  taupos = mod( sweep-1, 2 ) * n + st
304  ELSE
305  vpos = mod( sweep-1, 2 ) * n + st
306  taupos = mod( sweep-1, 2 ) * n + st
307  ENDIF
308 *
309  IF( ttype.EQ.1 ) THEN
310  lm = ed - st + 1
311 *
312  v( vpos ) = one
313  DO 20 i = 1, lm-1
314  v( vpos+i ) = a( ofdpos+i, st-1 )
315  a( ofdpos+i, st-1 ) = zero
316  20 CONTINUE
317  CALL clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
318  $ tau( taupos ) )
319 *
320  lm = ed - st + 1
321 *
322  CALL clarfy( uplo, lm, v( vpos ), 1,
323  $ conjg( tau( taupos ) ),
324  $ a( dpos, st ), lda-1, work)
325 
326  ENDIF
327 *
328  IF( ttype.EQ.3 ) THEN
329  lm = ed - st + 1
330 *
331  CALL clarfy( uplo, lm, v( vpos ), 1,
332  $ conjg( tau( taupos ) ),
333  $ a( dpos, st ), lda-1, work)
334 
335  ENDIF
336 *
337  IF( ttype.EQ.2 ) THEN
338  j1 = ed+1
339  j2 = min( ed+nb, n )
340  ln = ed-st+1
341  lm = j2-j1+1
342 *
343  IF( lm.GT.0) THEN
344  CALL clarfx( 'Right', lm, ln, v( vpos ),
345  $ tau( taupos ), a( dpos+nb, st ),
346  $ lda-1, work)
347 *
348  IF( wantz ) THEN
349  vpos = mod( sweep-1, 2 ) * n + j1
350  taupos = mod( sweep-1, 2 ) * n + j1
351  ELSE
352  vpos = mod( sweep-1, 2 ) * n + j1
353  taupos = mod( sweep-1, 2 ) * n + j1
354  ENDIF
355 *
356  v( vpos ) = one
357  DO 40 i = 1, lm-1
358  v( vpos+i ) = a( dpos+nb+i, st )
359  a( dpos+nb+i, st ) = zero
360  40 CONTINUE
361  CALL clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
362  $ tau( taupos ) )
363 *
364  CALL clarfx( 'Left', lm, ln-1, v( vpos ),
365  $ conjg( tau( taupos ) ),
366  $ a( dpos+nb-1, st+1 ), lda-1, work)
367 
368  ENDIF
369  ENDIF
370  ENDIF
371 *
372  RETURN
373 *
374 * End of CHB2ST_KERNELS
375 *
376  END
subroutine chb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
CHB2ST_KERNELS
subroutine clarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
CLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: clarfx.f:119
subroutine clarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
CLARFY
Definition: clarfy.f:108
subroutine clarfg(N, ALPHA, X, INCX, TAU)
CLARFG generates an elementary reflector (Householder matrix).
Definition: clarfg.f:106