LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ dsb2st_kernels()

subroutine dsb2st_kernels ( character  uplo,
logical  wantz,
integer  ttype,
integer  st,
integer  ed,
integer  sweep,
integer  n,
integer  nb,
integer  ib,
double precision, dimension( lda, * )  a,
integer  lda,
double precision, dimension( * )  v,
double precision, dimension( * )  tau,
integer  ldvt,
double precision, dimension( * )  work 
)

DSB2ST_KERNELS

Download DSB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is DOUBLE PRECISION array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is DOUBLE PRECISION array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is DOUBLE PRECISION array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is DOUBLE PRECISION array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 169 of file dsb2st_kernels.f.

172*
173 IMPLICIT NONE
174*
175* -- LAPACK computational routine --
176* -- LAPACK is a software package provided by Univ. of Tennessee, --
177* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
178*
179* .. Scalar Arguments ..
180 CHARACTER UPLO
181 LOGICAL WANTZ
182 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
183* ..
184* .. Array Arguments ..
185 DOUBLE PRECISION A( LDA, * ), V( * ),
186 $ TAU( * ), WORK( * )
187* ..
188*
189* =====================================================================
190*
191* .. Parameters ..
192 DOUBLE PRECISION ZERO, ONE
193 parameter( zero = 0.0d+0,
194 $ one = 1.0d+0 )
195* ..
196* .. Local Scalars ..
197 LOGICAL UPPER
198 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
199 $ DPOS, OFDPOS, AJETER
200 DOUBLE PRECISION CTMP
201* ..
202* .. External Subroutines ..
203 EXTERNAL dlarfg, dlarfx, dlarfy
204* ..
205* .. Intrinsic Functions ..
206 INTRINSIC mod
207* .. External Functions ..
208 LOGICAL LSAME
209 EXTERNAL lsame
210* ..
211* ..
212* .. Executable Statements ..
213*
214 ajeter = ib + ldvt
215 upper = lsame( uplo, 'U' )
216
217 IF( upper ) THEN
218 dpos = 2 * nb + 1
219 ofdpos = 2 * nb
220 ELSE
221 dpos = 1
222 ofdpos = 2
223 ENDIF
224
225*
226* Upper case
227*
228 IF( upper ) THEN
229*
230 IF( wantz ) THEN
231 vpos = mod( sweep-1, 2 ) * n + st
232 taupos = mod( sweep-1, 2 ) * n + st
233 ELSE
234 vpos = mod( sweep-1, 2 ) * n + st
235 taupos = mod( sweep-1, 2 ) * n + st
236 ENDIF
237*
238 IF( ttype.EQ.1 ) THEN
239 lm = ed - st + 1
240*
241 v( vpos ) = one
242 DO 10 i = 1, lm-1
243 v( vpos+i ) = ( a( ofdpos-i, st+i ) )
244 a( ofdpos-i, st+i ) = zero
245 10 CONTINUE
246 ctmp = ( a( ofdpos, st ) )
247 CALL dlarfg( lm, ctmp, v( vpos+1 ), 1,
248 $ tau( taupos ) )
249 a( ofdpos, st ) = ctmp
250*
251 lm = ed - st + 1
252 CALL dlarfy( uplo, lm, v( vpos ), 1,
253 $ ( tau( taupos ) ),
254 $ a( dpos, st ), lda-1, work)
255 ENDIF
256*
257 IF( ttype.EQ.3 ) THEN
258*
259 lm = ed - st + 1
260 CALL dlarfy( uplo, lm, v( vpos ), 1,
261 $ ( tau( taupos ) ),
262 $ a( dpos, st ), lda-1, work)
263 ENDIF
264*
265 IF( ttype.EQ.2 ) THEN
266 j1 = ed+1
267 j2 = min( ed+nb, n )
268 ln = ed-st+1
269 lm = j2-j1+1
270 IF( lm.GT.0) THEN
271 CALL dlarfx( 'Left', ln, lm, v( vpos ),
272 $ ( tau( taupos ) ),
273 $ a( dpos-nb, j1 ), lda-1, work)
274*
275 IF( wantz ) THEN
276 vpos = mod( sweep-1, 2 ) * n + j1
277 taupos = mod( sweep-1, 2 ) * n + j1
278 ELSE
279 vpos = mod( sweep-1, 2 ) * n + j1
280 taupos = mod( sweep-1, 2 ) * n + j1
281 ENDIF
282*
283 v( vpos ) = one
284 DO 30 i = 1, lm-1
285 v( vpos+i ) =
286 $ ( a( dpos-nb-i, j1+i ) )
287 a( dpos-nb-i, j1+i ) = zero
288 30 CONTINUE
289 ctmp = ( a( dpos-nb, j1 ) )
290 CALL dlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
291 a( dpos-nb, j1 ) = ctmp
292*
293 CALL dlarfx( 'Right', ln-1, lm, v( vpos ),
294 $ tau( taupos ),
295 $ a( dpos-nb+1, j1 ), lda-1, work)
296 ENDIF
297 ENDIF
298*
299* Lower case
300*
301 ELSE
302*
303 IF( wantz ) THEN
304 vpos = mod( sweep-1, 2 ) * n + st
305 taupos = mod( sweep-1, 2 ) * n + st
306 ELSE
307 vpos = mod( sweep-1, 2 ) * n + st
308 taupos = mod( sweep-1, 2 ) * n + st
309 ENDIF
310*
311 IF( ttype.EQ.1 ) THEN
312 lm = ed - st + 1
313*
314 v( vpos ) = one
315 DO 20 i = 1, lm-1
316 v( vpos+i ) = a( ofdpos+i, st-1 )
317 a( ofdpos+i, st-1 ) = zero
318 20 CONTINUE
319 CALL dlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
320 $ tau( taupos ) )
321*
322 lm = ed - st + 1
323*
324 CALL dlarfy( uplo, lm, v( vpos ), 1,
325 $ ( tau( taupos ) ),
326 $ a( dpos, st ), lda-1, work)
327
328 ENDIF
329*
330 IF( ttype.EQ.3 ) THEN
331 lm = ed - st + 1
332*
333 CALL dlarfy( uplo, lm, v( vpos ), 1,
334 $ ( tau( taupos ) ),
335 $ a( dpos, st ), lda-1, work)
336
337 ENDIF
338*
339 IF( ttype.EQ.2 ) THEN
340 j1 = ed+1
341 j2 = min( ed+nb, n )
342 ln = ed-st+1
343 lm = j2-j1+1
344*
345 IF( lm.GT.0) THEN
346 CALL dlarfx( 'Right', lm, ln, v( vpos ),
347 $ tau( taupos ), a( dpos+nb, st ),
348 $ lda-1, work)
349*
350 IF( wantz ) THEN
351 vpos = mod( sweep-1, 2 ) * n + j1
352 taupos = mod( sweep-1, 2 ) * n + j1
353 ELSE
354 vpos = mod( sweep-1, 2 ) * n + j1
355 taupos = mod( sweep-1, 2 ) * n + j1
356 ENDIF
357*
358 v( vpos ) = one
359 DO 40 i = 1, lm-1
360 v( vpos+i ) = a( dpos+nb+i, st )
361 a( dpos+nb+i, st ) = zero
362 40 CONTINUE
363 CALL dlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
364 $ tau( taupos ) )
365*
366 CALL dlarfx( 'Left', lm, ln-1, v( vpos ),
367 $ ( tau( taupos ) ),
368 $ a( dpos+nb-1, st+1 ), lda-1, work)
369
370 ENDIF
371 ENDIF
372 ENDIF
373*
374 RETURN
375*
376* End of DSB2ST_KERNELS
377*
subroutine dlarfg(n, alpha, x, incx, tau)
DLARFG generates an elementary reflector (Householder matrix).
Definition dlarfg.f:106
subroutine dlarfx(side, m, n, v, tau, c, ldc, work)
DLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition dlarfx.f:120
subroutine dlarfy(uplo, n, v, incv, tau, c, ldc, work)
DLARFY
Definition dlarfy.f:108
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: