LAPACK  3.10.1
LAPACK: Linear Algebra PACKage
ssb2st_kernels.f
Go to the documentation of this file.
1 *> \brief \b SSB2ST_KERNELS
2 *
3 * @generated from zhb2st_kernels.f, fortran z -> s, Wed Dec 7 08:22:40 2016
4 *
5 * =========== DOCUMENTATION ===========
6 *
7 * Online html documentation available at
8 * http://www.netlib.org/lapack/explore-html/
9 *
10 *> \htmlonly
11 *> Download SSB2ST_KERNELS + dependencies
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssb2st_kernels.f">
13 *> [TGZ]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssb2st_kernels.f">
15 *> [ZIP]</a>
16 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssb2st_kernels.f">
17 *> [TXT]</a>
18 *> \endhtmlonly
19 *
20 * Definition:
21 * ===========
22 *
23 * SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24 * ST, ED, SWEEP, N, NB, IB,
25 * A, LDA, V, TAU, LDVT, WORK)
26 *
27 * IMPLICIT NONE
28 *
29 * .. Scalar Arguments ..
30 * CHARACTER UPLO
31 * LOGICAL WANTZ
32 * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33 * ..
34 * .. Array Arguments ..
35 * REAL A( LDA, * ), V( * ),
36 * TAU( * ), WORK( * )
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
44 *> subroutine.
45 *> \endverbatim
46 *
47 * Arguments:
48 * ==========
49 *
50 *> \param[in] UPLO
51 *> \verbatim
52 *> UPLO is CHARACTER*1
53 *> \endverbatim
54 *>
55 *> \param[in] WANTZ
56 *> \verbatim
57 *> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58 *> Eigenvalue/Eigenvectors.
59 *> \endverbatim
60 *>
61 *> \param[in] TTYPE
62 *> \verbatim
63 *> TTYPE is INTEGER
64 *> \endverbatim
65 *>
66 *> \param[in] ST
67 *> \verbatim
68 *> ST is INTEGER
69 *> internal parameter for indices.
70 *> \endverbatim
71 *>
72 *> \param[in] ED
73 *> \verbatim
74 *> ED is INTEGER
75 *> internal parameter for indices.
76 *> \endverbatim
77 *>
78 *> \param[in] SWEEP
79 *> \verbatim
80 *> SWEEP is INTEGER
81 *> internal parameter for indices.
82 *> \endverbatim
83 *>
84 *> \param[in] N
85 *> \verbatim
86 *> N is INTEGER. The order of the matrix A.
87 *> \endverbatim
88 *>
89 *> \param[in] NB
90 *> \verbatim
91 *> NB is INTEGER. The size of the band.
92 *> \endverbatim
93 *>
94 *> \param[in] IB
95 *> \verbatim
96 *> IB is INTEGER.
97 *> \endverbatim
98 *>
99 *> \param[in, out] A
100 *> \verbatim
101 *> A is REAL array. A pointer to the matrix A.
102 *> \endverbatim
103 *>
104 *> \param[in] LDA
105 *> \verbatim
106 *> LDA is INTEGER. The leading dimension of the matrix A.
107 *> \endverbatim
108 *>
109 *> \param[out] V
110 *> \verbatim
111 *> V is REAL array, dimension 2*n if eigenvalues only are
112 *> requested or to be queried for vectors.
113 *> \endverbatim
114 *>
115 *> \param[out] TAU
116 *> \verbatim
117 *> TAU is REAL array, dimension (2*n).
118 *> The scalar factors of the Householder reflectors are stored
119 *> in this array.
120 *> \endverbatim
121 *>
122 *> \param[in] LDVT
123 *> \verbatim
124 *> LDVT is INTEGER.
125 *> \endverbatim
126 *>
127 *> \param[out] WORK
128 *> \verbatim
129 *> WORK is REAL array. Workspace of size nb.
130 *> \endverbatim
131 *>
132 *>
133 *> \par Further Details:
134 * =====================
135 *>
136 *> \verbatim
137 *>
138 *> Implemented by Azzam Haidar.
139 *>
140 *> All details are available on technical report, SC11, SC13 papers.
141 *>
142 *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
143 *> Parallel reduction to condensed forms for symmetric eigenvalue problems
144 *> using aggregated fine-grained and memory-aware kernels. In Proceedings
145 *> of 2011 International Conference for High Performance Computing,
146 *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
147 *> Article 8 , 11 pages.
148 *> http://doi.acm.org/10.1145/2063384.2063394
149 *>
150 *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
151 *> An improved parallel singular value algorithm and its implementation
152 *> for multicore hardware, In Proceedings of 2013 International Conference
153 *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
154 *> Denver, Colorado, USA, 2013.
155 *> Article 90, 12 pages.
156 *> http://doi.acm.org/10.1145/2503210.2503292
157 *>
158 *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
159 *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
160 *> calculations based on fine-grained memory aware tasks.
161 *> International Journal of High Performance Computing Applications.
162 *> Volume 28 Issue 2, Pages 196-209, May 2014.
163 *> http://hpc.sagepub.com/content/28/2/196
164 *>
165 *> \endverbatim
166 *>
167 * =====================================================================
168  SUBROUTINE ssb2st_kernels( UPLO, WANTZ, TTYPE,
169  $ ST, ED, SWEEP, N, NB, IB,
170  $ A, LDA, V, TAU, LDVT, WORK)
171 *
172  IMPLICIT NONE
173 *
174 * -- LAPACK computational routine --
175 * -- LAPACK is a software package provided by Univ. of Tennessee, --
176 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
177 *
178 * .. Scalar Arguments ..
179  CHARACTER UPLO
180  LOGICAL WANTZ
181  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
182 * ..
183 * .. Array Arguments ..
184  REAL A( LDA, * ), V( * ),
185  $ TAU( * ), WORK( * )
186 * ..
187 *
188 * =====================================================================
189 *
190 * .. Parameters ..
191  REAL ZERO, ONE
192  PARAMETER ( ZERO = 0.0e+0,
193  $ one = 1.0e+0 )
194 * ..
195 * .. Local Scalars ..
196  LOGICAL UPPER
197  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
198  $ dpos, ofdpos, ajeter
199  REAL CTMP
200 * ..
201 * .. External Subroutines ..
202  EXTERNAL slarfg, slarfx, slarfy
203 * ..
204 * .. Intrinsic Functions ..
205  INTRINSIC mod
206 * .. External Functions ..
207  LOGICAL LSAME
208  EXTERNAL LSAME
209 * ..
210 * ..
211 * .. Executable Statements ..
212 *
213  ajeter = ib + ldvt
214  upper = lsame( uplo, 'U' )
215 
216  IF( upper ) THEN
217  dpos = 2 * nb + 1
218  ofdpos = 2 * nb
219  ELSE
220  dpos = 1
221  ofdpos = 2
222  ENDIF
223 
224 *
225 * Upper case
226 *
227  IF( upper ) THEN
228 *
229  IF( wantz ) THEN
230  vpos = mod( sweep-1, 2 ) * n + st
231  taupos = mod( sweep-1, 2 ) * n + st
232  ELSE
233  vpos = mod( sweep-1, 2 ) * n + st
234  taupos = mod( sweep-1, 2 ) * n + st
235  ENDIF
236 *
237  IF( ttype.EQ.1 ) THEN
238  lm = ed - st + 1
239 *
240  v( vpos ) = one
241  DO 10 i = 1, lm-1
242  v( vpos+i ) = ( a( ofdpos-i, st+i ) )
243  a( ofdpos-i, st+i ) = zero
244  10 CONTINUE
245  ctmp = ( a( ofdpos, st ) )
246  CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
247  $ tau( taupos ) )
248  a( ofdpos, st ) = ctmp
249 *
250  lm = ed - st + 1
251  CALL slarfy( uplo, lm, v( vpos ), 1,
252  $ ( tau( taupos ) ),
253  $ a( dpos, st ), lda-1, work)
254  ENDIF
255 *
256  IF( ttype.EQ.3 ) THEN
257 *
258  lm = ed - st + 1
259  CALL slarfy( uplo, lm, v( vpos ), 1,
260  $ ( tau( taupos ) ),
261  $ a( dpos, st ), lda-1, work)
262  ENDIF
263 *
264  IF( ttype.EQ.2 ) THEN
265  j1 = ed+1
266  j2 = min( ed+nb, n )
267  ln = ed-st+1
268  lm = j2-j1+1
269  IF( lm.GT.0) THEN
270  CALL slarfx( 'Left', ln, lm, v( vpos ),
271  $ ( tau( taupos ) ),
272  $ a( dpos-nb, j1 ), lda-1, work)
273 *
274  IF( wantz ) THEN
275  vpos = mod( sweep-1, 2 ) * n + j1
276  taupos = mod( sweep-1, 2 ) * n + j1
277  ELSE
278  vpos = mod( sweep-1, 2 ) * n + j1
279  taupos = mod( sweep-1, 2 ) * n + j1
280  ENDIF
281 *
282  v( vpos ) = one
283  DO 30 i = 1, lm-1
284  v( vpos+i ) =
285  $ ( a( dpos-nb-i, j1+i ) )
286  a( dpos-nb-i, j1+i ) = zero
287  30 CONTINUE
288  ctmp = ( a( dpos-nb, j1 ) )
289  CALL slarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
290  a( dpos-nb, j1 ) = ctmp
291 *
292  CALL slarfx( 'Right', ln-1, lm, v( vpos ),
293  $ tau( taupos ),
294  $ a( dpos-nb+1, j1 ), lda-1, work)
295  ENDIF
296  ENDIF
297 *
298 * Lower case
299 *
300  ELSE
301 *
302  IF( wantz ) THEN
303  vpos = mod( sweep-1, 2 ) * n + st
304  taupos = mod( sweep-1, 2 ) * n + st
305  ELSE
306  vpos = mod( sweep-1, 2 ) * n + st
307  taupos = mod( sweep-1, 2 ) * n + st
308  ENDIF
309 *
310  IF( ttype.EQ.1 ) THEN
311  lm = ed - st + 1
312 *
313  v( vpos ) = one
314  DO 20 i = 1, lm-1
315  v( vpos+i ) = a( ofdpos+i, st-1 )
316  a( ofdpos+i, st-1 ) = zero
317  20 CONTINUE
318  CALL slarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
319  $ tau( taupos ) )
320 *
321  lm = ed - st + 1
322 *
323  CALL slarfy( uplo, lm, v( vpos ), 1,
324  $ ( tau( taupos ) ),
325  $ a( dpos, st ), lda-1, work)
326 
327  ENDIF
328 *
329  IF( ttype.EQ.3 ) THEN
330  lm = ed - st + 1
331 *
332  CALL slarfy( uplo, lm, v( vpos ), 1,
333  $ ( tau( taupos ) ),
334  $ a( dpos, st ), lda-1, work)
335 
336  ENDIF
337 *
338  IF( ttype.EQ.2 ) THEN
339  j1 = ed+1
340  j2 = min( ed+nb, n )
341  ln = ed-st+1
342  lm = j2-j1+1
343 *
344  IF( lm.GT.0) THEN
345  CALL slarfx( 'Right', lm, ln, v( vpos ),
346  $ tau( taupos ), a( dpos+nb, st ),
347  $ lda-1, work)
348 *
349  IF( wantz ) THEN
350  vpos = mod( sweep-1, 2 ) * n + j1
351  taupos = mod( sweep-1, 2 ) * n + j1
352  ELSE
353  vpos = mod( sweep-1, 2 ) * n + j1
354  taupos = mod( sweep-1, 2 ) * n + j1
355  ENDIF
356 *
357  v( vpos ) = one
358  DO 40 i = 1, lm-1
359  v( vpos+i ) = a( dpos+nb+i, st )
360  a( dpos+nb+i, st ) = zero
361  40 CONTINUE
362  CALL slarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
363  $ tau( taupos ) )
364 *
365  CALL slarfx( 'Left', lm, ln-1, v( vpos ),
366  $ ( tau( taupos ) ),
367  $ a( dpos+nb-1, st+1 ), lda-1, work)
368 
369  ENDIF
370  ENDIF
371  ENDIF
372 *
373  RETURN
374 *
375 * End of SSB2ST_KERNELS
376 *
377  END
subroutine slarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: slarfx.f:120
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:106
subroutine slarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
SLARFY
Definition: slarfy.f:108
subroutine ssb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
SSB2ST_KERNELS