LAPACK  3.8.0
LAPACK: Linear Algebra PACKage
dsb2st_kernels.f
Go to the documentation of this file.
1 *> \brief \b DSB2ST_KERNELS
2 *
3 * @generated from zhb2st_kernels.f, fortran z -> d, Wed Dec 7 08:22:39 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download DSB2ST_KERNELS + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsb2st_kernels.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsb2st_kernels.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsb2st_kernels.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24 * ST, ED, SWEEP, N, NB, IB,
25 * A, LDA, V, TAU, LDVT, WORK)
26 *
27 * IMPLICIT NONE
28 *
29 * .. Scalar Arguments ..
30 * CHARACTER UPLO
31 * LOGICAL WANTZ
32 * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33 * ..
34 * .. Array Arguments ..
35 * DOUBLE PRECISION A( LDA, * ), V( * ),
36 * TAU( * ), WORK( * )
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST
44 *> subroutine.
45 *> \endverbatim
46 *
47 * Arguments:
48 * ==========
49 *
50 *> \param[in] UPLO
51 *> \verbatim
52 *> UPLO is CHARACTER*1
53 *> \endverbatim
54 *>
55 *> \param[in] WANTZ
56 *> \verbatim
57 *> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58 *> Eigenvalue/Eigenvectors.
59 *> \endverbatim
60 *>
61 *> \param[in] TTYPE
62 *> \verbatim
63 *> TTYPE is INTEGER
64 *> \endverbatim
65 *>
66 *> \param[in] ST
67 *> \verbatim
68 *> ST is INTEGER
69 *> internal parameter for indices.
70 *> \endverbatim
71 *>
72 *> \param[in] ED
73 *> \verbatim
74 *> ED is INTEGER
75 *> internal parameter for indices.
76 *> \endverbatim
77 *>
78 *> \param[in] SWEEP
79 *> \verbatim
80 *> SWEEP is INTEGER
81 *> internal parameter for indices.
82 *> \endverbatim
83 *>
84 *> \param[in] N
85 *> \verbatim
86 *> N is INTEGER. The order of the matrix A.
87 *> \endverbatim
88 *>
89 *> \param[in] NB
90 *> \verbatim
91 *> NB is INTEGER. The size of the band.
92 *> \endverbatim
93 *>
94 *> \param[in] IB
95 *> \verbatim
96 *> IB is INTEGER.
97 *> \endverbatim
98 *>
99 *> \param[in, out] A
100 *> \verbatim
101 *> A is DOUBLE PRECISION array. A pointer to the matrix A.
102 *> \endverbatim
103 *>
104 *> \param[in] LDA
105 *> \verbatim
106 *> LDA is INTEGER. The leading dimension of the matrix A.
107 *> \endverbatim
108 *>
109 *> \param[out] V
110 *> \verbatim
111 *> V is DOUBLE PRECISION array, dimension 2*n if eigenvalues only are
112 *> requested or to be queried for vectors.
113 *> \endverbatim
114 *>
115 *> \param[out] TAU
116 *> \verbatim
117 *> TAU is DOUBLE PRECISION array, dimension (2*n).
118 *> The scalar factors of the Householder reflectors are stored
119 *> in this array.
120 *> \endverbatim
121 *>
122 *> \param[in] LDVT
123 *> \verbatim
124 *> LDVT is INTEGER.
125 *> \endverbatim
126 *>
127 *> \param[in] WORK
128 *> \verbatim
129 *> WORK is DOUBLE PRECISION array. Workspace of size nb.
130 *> \endverbatim
131 *>
132 *> \par Further Details:
133 * =====================
134 *>
135 *> \verbatim
136 *>
137 *> Implemented by Azzam Haidar.
138 *>
139 *> All details are available on technical report, SC11, SC13 papers.
140 *>
141 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
142 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
143 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
144 *> of 2011 International Conference for High Performance Computing,
145 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
146 *> Article 8 , 11 pages.
147 *> http://doi.acm.org/10.1145/2063384.2063394
148 *>
149 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
150 *> An improved parallel singular value algorithm and its implementation
151 *> for multicore hardware, In Proceedings of 2013 International Conference
152 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
153 *> Denver, Colorado, USA, 2013.
154 *> Article 90, 12 pages.
155 *> http://doi.acm.org/10.1145/2503210.2503292
156 *>
157 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
158 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
159 *> calculations based on fine-grained memory aware tasks.
160 *> International Journal of High Performance Computing Applications.
161 *> Volume 28 Issue 2, Pages 196-209, May 2014.
162 *> http://hpc.sagepub.com/content/28/2/196
163 *>
164 *> \endverbatim
165 *>
166 * =====================================================================
167  SUBROUTINE dsb2st_kernels( UPLO, WANTZ, TTYPE,
168  $ ST, ED, SWEEP, N, NB, IB,
169  $ A, LDA, V, TAU, LDVT, WORK)
170 *
171  IMPLICIT NONE
172 *
173 * -- LAPACK computational routine (version 3.7.1) --
174 * -- LAPACK is a software package provided by Univ. of Tennessee, --
175 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176 * June 2017
177 *
178 * .. Scalar Arguments ..
179  CHARACTER UPLO
180  LOGICAL WANTZ
181  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
182 * ..
183 * .. Array Arguments ..
184  DOUBLE PRECISION A( lda, * ), V( * ),
185  $ tau( * ), work( * )
186 * ..
187 *
188 * =====================================================================
189 *
190 * .. Parameters ..
191  DOUBLE PRECISION ZERO, ONE
192  parameter( zero = 0.0d+0,
193  $ one = 1.0d+0 )
194 * ..
195 * .. Local Scalars ..
196  LOGICAL UPPER
197  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
198  $ dpos, ofdpos, ajeter
199  DOUBLE PRECISION CTMP
200 * ..
201 * .. External Subroutines ..
202  EXTERNAL dlarfg, dlarfx, dlarfy
203 * ..
204 * .. Intrinsic Functions ..
205  INTRINSIC mod
206 * .. External Functions ..
207  LOGICAL LSAME
208  EXTERNAL lsame
209 * ..
210 * ..
211 * .. Executable Statements ..
212 *
213  ajeter = ib + ldvt
214  upper = lsame( uplo, 'U' )
215 
216  IF( upper ) THEN
217  dpos = 2 * nb + 1
218  ofdpos = 2 * nb
219  ELSE
220  dpos = 1
221  ofdpos = 2
222  ENDIF
223 
224 *
225 * Upper case
226 *
227  IF( upper ) THEN
228 *
229  IF( wantz ) THEN
230  vpos = mod( sweep-1, 2 ) * n + st
231  taupos = mod( sweep-1, 2 ) * n + st
232  ELSE
233  vpos = mod( sweep-1, 2 ) * n + st
234  taupos = mod( sweep-1, 2 ) * n + st
235  ENDIF
236 *
237  IF( ttype.EQ.1 ) THEN
238  lm = ed - st + 1
239 *
240  v( vpos ) = one
241  DO 10 i = 1, lm-1
242  v( vpos+i ) = ( a( ofdpos-i, st+i ) )
243  a( ofdpos-i, st+i ) = zero
244  10 CONTINUE
245  ctmp = ( a( ofdpos, st ) )
246  CALL dlarfg( lm, ctmp, v( vpos+1 ), 1,
247  $ tau( taupos ) )
248  a( ofdpos, st ) = ctmp
249 *
250  lm = ed - st + 1
251  CALL dlarfy( uplo, lm, v( vpos ), 1,
252  $ ( tau( taupos ) ),
253  $ a( dpos, st ), lda-1, work)
254  ENDIF
255 *
256  IF( ttype.EQ.3 ) THEN
257 *
258  lm = ed - st + 1
259  CALL dlarfy( uplo, lm, v( vpos ), 1,
260  $ ( tau( taupos ) ),
261  $ a( dpos, st ), lda-1, work)
262  ENDIF
263 *
264  IF( ttype.EQ.2 ) THEN
265  j1 = ed+1
266  j2 = min( ed+nb, n )
267  ln = ed-st+1
268  lm = j2-j1+1
269  IF( lm.GT.0) THEN
270  CALL dlarfx( 'Left', ln, lm, v( vpos ),
271  $ ( tau( taupos ) ),
272  $ a( dpos-nb, j1 ), lda-1, work)
273 *
274  IF( wantz ) THEN
275  vpos = mod( sweep-1, 2 ) * n + j1
276  taupos = mod( sweep-1, 2 ) * n + j1
277  ELSE
278  vpos = mod( sweep-1, 2 ) * n + j1
279  taupos = mod( sweep-1, 2 ) * n + j1
280  ENDIF
281 *
282  v( vpos ) = one
283  DO 30 i = 1, lm-1
284  v( vpos+i ) =
285  $ ( a( dpos-nb-i, j1+i ) )
286  a( dpos-nb-i, j1+i ) = zero
287  30 CONTINUE
288  ctmp = ( a( dpos-nb, j1 ) )
289  CALL dlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
290  a( dpos-nb, j1 ) = ctmp
291 *
292  CALL dlarfx( 'Right', ln-1, lm, v( vpos ),
293  $ tau( taupos ),
294  $ a( dpos-nb+1, j1 ), lda-1, work)
295  ENDIF
296  ENDIF
297 *
298 * Lower case
299 *
300  ELSE
301 *
302  IF( wantz ) THEN
303  vpos = mod( sweep-1, 2 ) * n + st
304  taupos = mod( sweep-1, 2 ) * n + st
305  ELSE
306  vpos = mod( sweep-1, 2 ) * n + st
307  taupos = mod( sweep-1, 2 ) * n + st
308  ENDIF
309 *
310  IF( ttype.EQ.1 ) THEN
311  lm = ed - st + 1
312 *
313  v( vpos ) = one
314  DO 20 i = 1, lm-1
315  v( vpos+i ) = a( ofdpos+i, st-1 )
316  a( ofdpos+i, st-1 ) = zero
317  20 CONTINUE
318  CALL dlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
319  $ tau( taupos ) )
320 *
321  lm = ed - st + 1
322 *
323  CALL dlarfy( uplo, lm, v( vpos ), 1,
324  $ ( tau( taupos ) ),
325  $ a( dpos, st ), lda-1, work)
326 
327  ENDIF
328 *
329  IF( ttype.EQ.3 ) THEN
330  lm = ed - st + 1
331 *
332  CALL dlarfy( uplo, lm, v( vpos ), 1,
333  $ ( tau( taupos ) ),
334  $ a( dpos, st ), lda-1, work)
335 
336  ENDIF
337 *
338  IF( ttype.EQ.2 ) THEN
339  j1 = ed+1
340  j2 = min( ed+nb, n )
341  ln = ed-st+1
342  lm = j2-j1+1
343 *
344  IF( lm.GT.0) THEN
345  CALL dlarfx( 'Right', lm, ln, v( vpos ),
346  $ tau( taupos ), a( dpos+nb, st ),
347  $ lda-1, work)
348 *
349  IF( wantz ) THEN
350  vpos = mod( sweep-1, 2 ) * n + j1
351  taupos = mod( sweep-1, 2 ) * n + j1
352  ELSE
353  vpos = mod( sweep-1, 2 ) * n + j1
354  taupos = mod( sweep-1, 2 ) * n + j1
355  ENDIF
356 *
357  v( vpos ) = one
358  DO 40 i = 1, lm-1
359  v( vpos+i ) = a( dpos+nb+i, st )
360  a( dpos+nb+i, st ) = zero
361  40 CONTINUE
362  CALL dlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
363  $ tau( taupos ) )
364 *
365  CALL dlarfx( 'Left', lm, ln-1, v( vpos ),
366  $ ( tau( taupos ) ),
367  $ a( dpos+nb-1, st+1 ), lda-1, work)
368 
369  ENDIF
370  ENDIF
371  ENDIF
372 *
373  RETURN
374 *
375 * END OF DSB2ST_KERNELS
376 *
377  END
subroutine dlarfg(N, ALPHA, X, INCX, TAU)
DLARFG generates an elementary reflector (Householder matrix).
Definition: dlarfg.f:108
subroutine dsb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
DSB2ST_KERNELS
subroutine dlarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
DLARFY
Definition: dlarfy.f:110
subroutine dlarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
DLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: dlarfx.f:122