LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
ssb2st_kernels.f
Go to the documentation of this file.
1*> \brief \b SSB2ST_KERNELS
2*
3* @generated from zhb2st_kernels.f, fortran z -> s, Wed Dec 7 08:22:40 2016
4*
5* =========== DOCUMENTATION ===========
6*
7* Online html documentation available at
8* http://www.netlib.org/lapack/explore-html/
9*
10*> \htmlonly
11*> Download SSB2ST_KERNELS + dependencies
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssb2st_kernels.f">
13*> [TGZ]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssb2st_kernels.f">
15*> [ZIP]</a>
16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssb2st_kernels.f">
17*> [TXT]</a>
18*> \endhtmlonly
19*
20* Definition:
21* ===========
22*
23* SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24* ST, ED, SWEEP, N, NB, IB,
25* A, LDA, V, TAU, LDVT, WORK)
26*
27* IMPLICIT NONE
28*
29* .. Scalar Arguments ..
30* CHARACTER UPLO
31* LOGICAL WANTZ
32* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33* ..
34* .. Array Arguments ..
35* REAL A( LDA, * ), V( * ),
36* TAU( * ), WORK( * )
37*
38*> \par Purpose:
39* =============
40*>
41*> \verbatim
42*>
43*> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
44*> subroutine.
45*> \endverbatim
46*
47* Arguments:
48* ==========
49*
50*> \param[in] UPLO
51*> \verbatim
52*> UPLO is CHARACTER*1
53*> \endverbatim
54*>
55*> \param[in] WANTZ
56*> \verbatim
57*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58*> Eigenvalue/Eigenvectors.
59*> \endverbatim
60*>
61*> \param[in] TTYPE
62*> \verbatim
63*> TTYPE is INTEGER
64*> \endverbatim
65*>
66*> \param[in] ST
67*> \verbatim
68*> ST is INTEGER
69*> internal parameter for indices.
70*> \endverbatim
71*>
72*> \param[in] ED
73*> \verbatim
74*> ED is INTEGER
75*> internal parameter for indices.
76*> \endverbatim
77*>
78*> \param[in] SWEEP
79*> \verbatim
80*> SWEEP is INTEGER
81*> internal parameter for indices.
82*> \endverbatim
83*>
84*> \param[in] N
85*> \verbatim
86*> N is INTEGER. The order of the matrix A.
87*> \endverbatim
88*>
89*> \param[in] NB
90*> \verbatim
91*> NB is INTEGER. The size of the band.
92*> \endverbatim
93*>
94*> \param[in] IB
95*> \verbatim
96*> IB is INTEGER.
97*> \endverbatim
98*>
99*> \param[in, out] A
100*> \verbatim
101*> A is REAL array. A pointer to the matrix A.
102*> \endverbatim
103*>
104*> \param[in] LDA
105*> \verbatim
106*> LDA is INTEGER. The leading dimension of the matrix A.
107*> \endverbatim
108*>
109*> \param[out] V
110*> \verbatim
111*> V is REAL array, dimension 2*n if eigenvalues only are
112*> requested or to be queried for vectors.
113*> \endverbatim
114*>
115*> \param[out] TAU
116*> \verbatim
117*> TAU is REAL array, dimension (2*n).
118*> The scalar factors of the Householder reflectors are stored
119*> in this array.
120*> \endverbatim
121*>
122*> \param[in] LDVT
123*> \verbatim
124*> LDVT is INTEGER.
125*> \endverbatim
126*>
127*> \param[out] WORK
128*> \verbatim
129*> WORK is REAL array. Workspace of size nb.
130*> \endverbatim
131*>
132*>
133*> \par Further Details:
134* =====================
135*>
136*> \verbatim
137*>
138*> Implemented by Azzam Haidar.
139*>
140*> All details are available on technical report, SC11, SC13 papers.
141*>
142*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
143*> Parallel reduction to condensed forms for symmetric eigenvalue problems
144*> using aggregated fine-grained and memory-aware kernels. In Proceedings
145*> of 2011 International Conference for High Performance Computing,
146*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
147*> Article 8 , 11 pages.
148*> http://doi.acm.org/10.1145/2063384.2063394
149*>
150*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
151*> An improved parallel singular value algorithm and its implementation
152*> for multicore hardware, In Proceedings of 2013 International Conference
153*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
154*> Denver, Colorado, USA, 2013.
155*> Article 90, 12 pages.
156*> http://doi.acm.org/10.1145/2503210.2503292
157*>
158*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
159*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
160*> calculations based on fine-grained memory aware tasks.
161*> International Journal of High Performance Computing Applications.
162*> Volume 28 Issue 2, Pages 196-209, May 2014.
163*> http://hpc.sagepub.com/content/28/2/196
164*>
165*> \endverbatim
166*>
167*> \ingroup hb2st_kernels
168*>
169* =====================================================================
170 SUBROUTINE ssb2st_kernels( UPLO, WANTZ, TTYPE,
171 $ ST, ED, SWEEP, N, NB, IB,
172 $ A, LDA, V, TAU, LDVT, WORK)
173*
174 IMPLICIT NONE
175*
176* -- LAPACK computational routine --
177* -- LAPACK is a software package provided by Univ. of Tennessee, --
178* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
179*
180* .. Scalar Arguments ..
181 CHARACTER UPLO
182 LOGICAL WANTZ
183 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
184* ..
185* .. Array Arguments ..
186 REAL A( LDA, * ), V( * ),
187 $ TAU( * ), WORK( * )
188* ..
189*
190* =====================================================================
191*
192* .. Parameters ..
193 REAL ZERO, ONE
194 PARAMETER ( ZERO = 0.0e+0,
195 $ one = 1.0e+0 )
196* ..
197* .. Local Scalars ..
198 LOGICAL UPPER
199 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
200 $ dpos, ofdpos, ajeter
201 REAL CTMP
202* ..
203* .. External Subroutines ..
204 EXTERNAL slarfg, slarfx, slarfy
205* ..
206* .. Intrinsic Functions ..
207 INTRINSIC mod
208* .. External Functions ..
209 LOGICAL LSAME
210 EXTERNAL LSAME
211* ..
212* ..
213* .. Executable Statements ..
214*
215 ajeter = ib + ldvt
216 upper = lsame( uplo, 'U' )
217
218 IF( upper ) THEN
219 dpos = 2 * nb + 1
220 ofdpos = 2 * nb
221 ELSE
222 dpos = 1
223 ofdpos = 2
224 ENDIF
225
226*
227* Upper case
228*
229 IF( upper ) THEN
230*
231 IF( wantz ) THEN
232 vpos = mod( sweep-1, 2 ) * n + st
233 taupos = mod( sweep-1, 2 ) * n + st
234 ELSE
235 vpos = mod( sweep-1, 2 ) * n + st
236 taupos = mod( sweep-1, 2 ) * n + st
237 ENDIF
238*
239 IF( ttype.EQ.1 ) THEN
240 lm = ed - st + 1
241*
242 v( vpos ) = one
243 DO 10 i = 1, lm-1
244 v( vpos+i ) = ( a( ofdpos-i, st+i ) )
245 a( ofdpos-i, st+i ) = zero
246 10 CONTINUE
247 ctmp = ( a( ofdpos, st ) )
248 CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
249 $ tau( taupos ) )
250 a( ofdpos, st ) = ctmp
251*
252 lm = ed - st + 1
253 CALL slarfy( uplo, lm, v( vpos ), 1,
254 $ ( tau( taupos ) ),
255 $ a( dpos, st ), lda-1, work)
256 ENDIF
257*
258 IF( ttype.EQ.3 ) THEN
259*
260 lm = ed - st + 1
261 CALL slarfy( uplo, lm, v( vpos ), 1,
262 $ ( tau( taupos ) ),
263 $ a( dpos, st ), lda-1, work)
264 ENDIF
265*
266 IF( ttype.EQ.2 ) THEN
267 j1 = ed+1
268 j2 = min( ed+nb, n )
269 ln = ed-st+1
270 lm = j2-j1+1
271 IF( lm.GT.0) THEN
272 CALL slarfx( 'Left', ln, lm, v( vpos ),
273 $ ( tau( taupos ) ),
274 $ a( dpos-nb, j1 ), lda-1, work)
275*
276 IF( wantz ) THEN
277 vpos = mod( sweep-1, 2 ) * n + j1
278 taupos = mod( sweep-1, 2 ) * n + j1
279 ELSE
280 vpos = mod( sweep-1, 2 ) * n + j1
281 taupos = mod( sweep-1, 2 ) * n + j1
282 ENDIF
283*
284 v( vpos ) = one
285 DO 30 i = 1, lm-1
286 v( vpos+i ) =
287 $ ( a( dpos-nb-i, j1+i ) )
288 a( dpos-nb-i, j1+i ) = zero
289 30 CONTINUE
290 ctmp = ( a( dpos-nb, j1 ) )
291 CALL slarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
292 a( dpos-nb, j1 ) = ctmp
293*
294 CALL slarfx( 'Right', ln-1, lm, v( vpos ),
295 $ tau( taupos ),
296 $ a( dpos-nb+1, j1 ), lda-1, work)
297 ENDIF
298 ENDIF
299*
300* Lower case
301*
302 ELSE
303*
304 IF( wantz ) THEN
305 vpos = mod( sweep-1, 2 ) * n + st
306 taupos = mod( sweep-1, 2 ) * n + st
307 ELSE
308 vpos = mod( sweep-1, 2 ) * n + st
309 taupos = mod( sweep-1, 2 ) * n + st
310 ENDIF
311*
312 IF( ttype.EQ.1 ) THEN
313 lm = ed - st + 1
314*
315 v( vpos ) = one
316 DO 20 i = 1, lm-1
317 v( vpos+i ) = a( ofdpos+i, st-1 )
318 a( ofdpos+i, st-1 ) = zero
319 20 CONTINUE
320 CALL slarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
321 $ tau( taupos ) )
322*
323 lm = ed - st + 1
324*
325 CALL slarfy( uplo, lm, v( vpos ), 1,
326 $ ( tau( taupos ) ),
327 $ a( dpos, st ), lda-1, work)
328
329 ENDIF
330*
331 IF( ttype.EQ.3 ) THEN
332 lm = ed - st + 1
333*
334 CALL slarfy( uplo, lm, v( vpos ), 1,
335 $ ( tau( taupos ) ),
336 $ a( dpos, st ), lda-1, work)
337
338 ENDIF
339*
340 IF( ttype.EQ.2 ) THEN
341 j1 = ed+1
342 j2 = min( ed+nb, n )
343 ln = ed-st+1
344 lm = j2-j1+1
345*
346 IF( lm.GT.0) THEN
347 CALL slarfx( 'Right', lm, ln, v( vpos ),
348 $ tau( taupos ), a( dpos+nb, st ),
349 $ lda-1, work)
350*
351 IF( wantz ) THEN
352 vpos = mod( sweep-1, 2 ) * n + j1
353 taupos = mod( sweep-1, 2 ) * n + j1
354 ELSE
355 vpos = mod( sweep-1, 2 ) * n + j1
356 taupos = mod( sweep-1, 2 ) * n + j1
357 ENDIF
358*
359 v( vpos ) = one
360 DO 40 i = 1, lm-1
361 v( vpos+i ) = a( dpos+nb+i, st )
362 a( dpos+nb+i, st ) = zero
363 40 CONTINUE
364 CALL slarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
365 $ tau( taupos ) )
366*
367 CALL slarfx( 'Left', lm, ln-1, v( vpos ),
368 $ ( tau( taupos ) ),
369 $ a( dpos+nb-1, st+1 ), lda-1, work)
370
371 ENDIF
372 ENDIF
373 ENDIF
374*
375 RETURN
376*
377* End of SSB2ST_KERNELS
378*
379 END
subroutine ssb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
SSB2ST_KERNELS
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
Definition slarfg.f:106
subroutine slarfx(side, m, n, v, tau, c, ldc, work)
SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition slarfx.f:120
subroutine slarfy(uplo, n, v, incv, tau, c, ldc, work)
SLARFY
Definition slarfy.f:108