LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ ssb2st_kernels()

subroutine ssb2st_kernels ( character  UPLO,
logical  WANTZ,
integer  TTYPE,
integer  ST,
integer  ED,
integer  SWEEP,
integer  N,
integer  NB,
integer  IB,
real, dimension( lda, * )  A,
integer  LDA,
real, dimension( * )  V,
real, dimension( * )  TAU,
integer  LDVT,
real, dimension( * )  WORK 
)

SSB2ST_KERNELS

Download SSB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is REAL array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is REAL array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is REAL array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is REAL array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 168 of file ssb2st_kernels.f.

171*
172 IMPLICIT NONE
173*
174* -- LAPACK computational routine --
175* -- LAPACK is a software package provided by Univ. of Tennessee, --
176* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
177*
178* .. Scalar Arguments ..
179 CHARACTER UPLO
180 LOGICAL WANTZ
181 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
182* ..
183* .. Array Arguments ..
184 REAL A( LDA, * ), V( * ),
185 $ TAU( * ), WORK( * )
186* ..
187*
188* =====================================================================
189*
190* .. Parameters ..
191 REAL ZERO, ONE
192 parameter( zero = 0.0e+0,
193 $ one = 1.0e+0 )
194* ..
195* .. Local Scalars ..
196 LOGICAL UPPER
197 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
198 $ DPOS, OFDPOS, AJETER
199 REAL CTMP
200* ..
201* .. External Subroutines ..
202 EXTERNAL slarfg, slarfx, slarfy
203* ..
204* .. Intrinsic Functions ..
205 INTRINSIC mod
206* .. External Functions ..
207 LOGICAL LSAME
208 EXTERNAL lsame
209* ..
210* ..
211* .. Executable Statements ..
212*
213 ajeter = ib + ldvt
214 upper = lsame( uplo, 'U' )
215
216 IF( upper ) THEN
217 dpos = 2 * nb + 1
218 ofdpos = 2 * nb
219 ELSE
220 dpos = 1
221 ofdpos = 2
222 ENDIF
223
224*
225* Upper case
226*
227 IF( upper ) THEN
228*
229 IF( wantz ) THEN
230 vpos = mod( sweep-1, 2 ) * n + st
231 taupos = mod( sweep-1, 2 ) * n + st
232 ELSE
233 vpos = mod( sweep-1, 2 ) * n + st
234 taupos = mod( sweep-1, 2 ) * n + st
235 ENDIF
236*
237 IF( ttype.EQ.1 ) THEN
238 lm = ed - st + 1
239*
240 v( vpos ) = one
241 DO 10 i = 1, lm-1
242 v( vpos+i ) = ( a( ofdpos-i, st+i ) )
243 a( ofdpos-i, st+i ) = zero
244 10 CONTINUE
245 ctmp = ( a( ofdpos, st ) )
246 CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
247 $ tau( taupos ) )
248 a( ofdpos, st ) = ctmp
249*
250 lm = ed - st + 1
251 CALL slarfy( uplo, lm, v( vpos ), 1,
252 $ ( tau( taupos ) ),
253 $ a( dpos, st ), lda-1, work)
254 ENDIF
255*
256 IF( ttype.EQ.3 ) THEN
257*
258 lm = ed - st + 1
259 CALL slarfy( uplo, lm, v( vpos ), 1,
260 $ ( tau( taupos ) ),
261 $ a( dpos, st ), lda-1, work)
262 ENDIF
263*
264 IF( ttype.EQ.2 ) THEN
265 j1 = ed+1
266 j2 = min( ed+nb, n )
267 ln = ed-st+1
268 lm = j2-j1+1
269 IF( lm.GT.0) THEN
270 CALL slarfx( 'Left', ln, lm, v( vpos ),
271 $ ( tau( taupos ) ),
272 $ a( dpos-nb, j1 ), lda-1, work)
273*
274 IF( wantz ) THEN
275 vpos = mod( sweep-1, 2 ) * n + j1
276 taupos = mod( sweep-1, 2 ) * n + j1
277 ELSE
278 vpos = mod( sweep-1, 2 ) * n + j1
279 taupos = mod( sweep-1, 2 ) * n + j1
280 ENDIF
281*
282 v( vpos ) = one
283 DO 30 i = 1, lm-1
284 v( vpos+i ) =
285 $ ( a( dpos-nb-i, j1+i ) )
286 a( dpos-nb-i, j1+i ) = zero
287 30 CONTINUE
288 ctmp = ( a( dpos-nb, j1 ) )
289 CALL slarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
290 a( dpos-nb, j1 ) = ctmp
291*
292 CALL slarfx( 'Right', ln-1, lm, v( vpos ),
293 $ tau( taupos ),
294 $ a( dpos-nb+1, j1 ), lda-1, work)
295 ENDIF
296 ENDIF
297*
298* Lower case
299*
300 ELSE
301*
302 IF( wantz ) THEN
303 vpos = mod( sweep-1, 2 ) * n + st
304 taupos = mod( sweep-1, 2 ) * n + st
305 ELSE
306 vpos = mod( sweep-1, 2 ) * n + st
307 taupos = mod( sweep-1, 2 ) * n + st
308 ENDIF
309*
310 IF( ttype.EQ.1 ) THEN
311 lm = ed - st + 1
312*
313 v( vpos ) = one
314 DO 20 i = 1, lm-1
315 v( vpos+i ) = a( ofdpos+i, st-1 )
316 a( ofdpos+i, st-1 ) = zero
317 20 CONTINUE
318 CALL slarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
319 $ tau( taupos ) )
320*
321 lm = ed - st + 1
322*
323 CALL slarfy( uplo, lm, v( vpos ), 1,
324 $ ( tau( taupos ) ),
325 $ a( dpos, st ), lda-1, work)
326
327 ENDIF
328*
329 IF( ttype.EQ.3 ) THEN
330 lm = ed - st + 1
331*
332 CALL slarfy( uplo, lm, v( vpos ), 1,
333 $ ( tau( taupos ) ),
334 $ a( dpos, st ), lda-1, work)
335
336 ENDIF
337*
338 IF( ttype.EQ.2 ) THEN
339 j1 = ed+1
340 j2 = min( ed+nb, n )
341 ln = ed-st+1
342 lm = j2-j1+1
343*
344 IF( lm.GT.0) THEN
345 CALL slarfx( 'Right', lm, ln, v( vpos ),
346 $ tau( taupos ), a( dpos+nb, st ),
347 $ lda-1, work)
348*
349 IF( wantz ) THEN
350 vpos = mod( sweep-1, 2 ) * n + j1
351 taupos = mod( sweep-1, 2 ) * n + j1
352 ELSE
353 vpos = mod( sweep-1, 2 ) * n + j1
354 taupos = mod( sweep-1, 2 ) * n + j1
355 ENDIF
356*
357 v( vpos ) = one
358 DO 40 i = 1, lm-1
359 v( vpos+i ) = a( dpos+nb+i, st )
360 a( dpos+nb+i, st ) = zero
361 40 CONTINUE
362 CALL slarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
363 $ tau( taupos ) )
364*
365 CALL slarfx( 'Left', lm, ln-1, v( vpos ),
366 $ ( tau( taupos ) ),
367 $ a( dpos+nb-1, st+1 ), lda-1, work)
368
369 ENDIF
370 ENDIF
371 ENDIF
372*
373 RETURN
374*
375* End of SSB2ST_KERNELS
376*
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine slarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: slarfx.f:120
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:106
subroutine slarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
SLARFY
Definition: slarfy.f:108
Here is the call graph for this function:
Here is the caller graph for this function: