LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ ssb2st_kernels()

subroutine ssb2st_kernels ( character  uplo,
logical  wantz,
integer  ttype,
integer  st,
integer  ed,
integer  sweep,
integer  n,
integer  nb,
integer  ib,
real, dimension( lda, * )  a,
integer  lda,
real, dimension( * )  v,
real, dimension( * )  tau,
integer  ldvt,
real, dimension( * )  work 
)

SSB2ST_KERNELS

Download SSB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is REAL array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is REAL array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is REAL array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is REAL array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 170 of file ssb2st_kernels.f.

173*
174 IMPLICIT NONE
175*
176* -- LAPACK computational routine --
177* -- LAPACK is a software package provided by Univ. of Tennessee, --
178* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
179*
180* .. Scalar Arguments ..
181 CHARACTER UPLO
182 LOGICAL WANTZ
183 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
184* ..
185* .. Array Arguments ..
186 REAL A( LDA, * ), V( * ),
187 $ TAU( * ), WORK( * )
188* ..
189*
190* =====================================================================
191*
192* .. Parameters ..
193 REAL ZERO, ONE
194 parameter( zero = 0.0e+0,
195 $ one = 1.0e+0 )
196* ..
197* .. Local Scalars ..
198 LOGICAL UPPER
199 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
200 $ DPOS, OFDPOS, AJETER
201 REAL CTMP
202* ..
203* .. External Subroutines ..
204 EXTERNAL slarfg, slarfx, slarfy
205* ..
206* .. Intrinsic Functions ..
207 INTRINSIC mod
208* .. External Functions ..
209 LOGICAL LSAME
210 EXTERNAL lsame
211* ..
212* ..
213* .. Executable Statements ..
214*
215 ajeter = ib + ldvt
216 upper = lsame( uplo, 'U' )
217
218 IF( upper ) THEN
219 dpos = 2 * nb + 1
220 ofdpos = 2 * nb
221 ELSE
222 dpos = 1
223 ofdpos = 2
224 ENDIF
225
226*
227* Upper case
228*
229 IF( upper ) THEN
230*
231 IF( wantz ) THEN
232 vpos = mod( sweep-1, 2 ) * n + st
233 taupos = mod( sweep-1, 2 ) * n + st
234 ELSE
235 vpos = mod( sweep-1, 2 ) * n + st
236 taupos = mod( sweep-1, 2 ) * n + st
237 ENDIF
238*
239 IF( ttype.EQ.1 ) THEN
240 lm = ed - st + 1
241*
242 v( vpos ) = one
243 DO 10 i = 1, lm-1
244 v( vpos+i ) = ( a( ofdpos-i, st+i ) )
245 a( ofdpos-i, st+i ) = zero
246 10 CONTINUE
247 ctmp = ( a( ofdpos, st ) )
248 CALL slarfg( lm, ctmp, v( vpos+1 ), 1,
249 $ tau( taupos ) )
250 a( ofdpos, st ) = ctmp
251*
252 lm = ed - st + 1
253 CALL slarfy( uplo, lm, v( vpos ), 1,
254 $ ( tau( taupos ) ),
255 $ a( dpos, st ), lda-1, work)
256 ENDIF
257*
258 IF( ttype.EQ.3 ) THEN
259*
260 lm = ed - st + 1
261 CALL slarfy( uplo, lm, v( vpos ), 1,
262 $ ( tau( taupos ) ),
263 $ a( dpos, st ), lda-1, work)
264 ENDIF
265*
266 IF( ttype.EQ.2 ) THEN
267 j1 = ed+1
268 j2 = min( ed+nb, n )
269 ln = ed-st+1
270 lm = j2-j1+1
271 IF( lm.GT.0) THEN
272 CALL slarfx( 'Left', ln, lm, v( vpos ),
273 $ ( tau( taupos ) ),
274 $ a( dpos-nb, j1 ), lda-1, work)
275*
276 IF( wantz ) THEN
277 vpos = mod( sweep-1, 2 ) * n + j1
278 taupos = mod( sweep-1, 2 ) * n + j1
279 ELSE
280 vpos = mod( sweep-1, 2 ) * n + j1
281 taupos = mod( sweep-1, 2 ) * n + j1
282 ENDIF
283*
284 v( vpos ) = one
285 DO 30 i = 1, lm-1
286 v( vpos+i ) =
287 $ ( a( dpos-nb-i, j1+i ) )
288 a( dpos-nb-i, j1+i ) = zero
289 30 CONTINUE
290 ctmp = ( a( dpos-nb, j1 ) )
291 CALL slarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
292 a( dpos-nb, j1 ) = ctmp
293*
294 CALL slarfx( 'Right', ln-1, lm, v( vpos ),
295 $ tau( taupos ),
296 $ a( dpos-nb+1, j1 ), lda-1, work)
297 ENDIF
298 ENDIF
299*
300* Lower case
301*
302 ELSE
303*
304 IF( wantz ) THEN
305 vpos = mod( sweep-1, 2 ) * n + st
306 taupos = mod( sweep-1, 2 ) * n + st
307 ELSE
308 vpos = mod( sweep-1, 2 ) * n + st
309 taupos = mod( sweep-1, 2 ) * n + st
310 ENDIF
311*
312 IF( ttype.EQ.1 ) THEN
313 lm = ed - st + 1
314*
315 v( vpos ) = one
316 DO 20 i = 1, lm-1
317 v( vpos+i ) = a( ofdpos+i, st-1 )
318 a( ofdpos+i, st-1 ) = zero
319 20 CONTINUE
320 CALL slarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
321 $ tau( taupos ) )
322*
323 lm = ed - st + 1
324*
325 CALL slarfy( uplo, lm, v( vpos ), 1,
326 $ ( tau( taupos ) ),
327 $ a( dpos, st ), lda-1, work)
328
329 ENDIF
330*
331 IF( ttype.EQ.3 ) THEN
332 lm = ed - st + 1
333*
334 CALL slarfy( uplo, lm, v( vpos ), 1,
335 $ ( tau( taupos ) ),
336 $ a( dpos, st ), lda-1, work)
337
338 ENDIF
339*
340 IF( ttype.EQ.2 ) THEN
341 j1 = ed+1
342 j2 = min( ed+nb, n )
343 ln = ed-st+1
344 lm = j2-j1+1
345*
346 IF( lm.GT.0) THEN
347 CALL slarfx( 'Right', lm, ln, v( vpos ),
348 $ tau( taupos ), a( dpos+nb, st ),
349 $ lda-1, work)
350*
351 IF( wantz ) THEN
352 vpos = mod( sweep-1, 2 ) * n + j1
353 taupos = mod( sweep-1, 2 ) * n + j1
354 ELSE
355 vpos = mod( sweep-1, 2 ) * n + j1
356 taupos = mod( sweep-1, 2 ) * n + j1
357 ENDIF
358*
359 v( vpos ) = one
360 DO 40 i = 1, lm-1
361 v( vpos+i ) = a( dpos+nb+i, st )
362 a( dpos+nb+i, st ) = zero
363 40 CONTINUE
364 CALL slarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
365 $ tau( taupos ) )
366*
367 CALL slarfx( 'Left', lm, ln-1, v( vpos ),
368 $ ( tau( taupos ) ),
369 $ a( dpos+nb-1, st+1 ), lda-1, work)
370
371 ENDIF
372 ENDIF
373 ENDIF
374*
375 RETURN
376*
377* End of SSB2ST_KERNELS
378*
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
Definition slarfg.f:106
subroutine slarfx(side, m, n, v, tau, c, ldc, work)
SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition slarfx.f:120
subroutine slarfy(uplo, n, v, incv, tau, c, ldc, work)
SLARFY
Definition slarfy.f:108
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: