LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ zhb2st_kernels()

subroutine zhb2st_kernels ( character  UPLO,
logical  WANTZ,
integer  TTYPE,
integer  ST,
integer  ED,
integer  SWEEP,
integer  N,
integer  NB,
integer  IB,
complex*16, dimension( lda, * )  A,
integer  LDA,
complex*16, dimension( * )  V,
complex*16, dimension( * )  TAU,
integer  LDVT,
complex*16, dimension( * )  WORK 
)

ZHB2ST_KERNELS

Download ZHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is COMPLEX*16 array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is COMPLEX*16 array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is COMPLEX*16 array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[out]WORK
          WORK is COMPLEX*16 array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 167 of file zhb2st_kernels.f.

170 *
171  IMPLICIT NONE
172 *
173 * -- LAPACK computational routine --
174 * -- LAPACK is a software package provided by Univ. of Tennessee, --
175 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176 *
177 * .. Scalar Arguments ..
178  CHARACTER UPLO
179  LOGICAL WANTZ
180  INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
181 * ..
182 * .. Array Arguments ..
183  COMPLEX*16 A( LDA, * ), V( * ),
184  $ TAU( * ), WORK( * )
185 * ..
186 *
187 * =====================================================================
188 *
189 * .. Parameters ..
190  COMPLEX*16 ZERO, ONE
191  parameter( zero = ( 0.0d+0, 0.0d+0 ),
192  $ one = ( 1.0d+0, 0.0d+0 ) )
193 * ..
194 * .. Local Scalars ..
195  LOGICAL UPPER
196  INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
197  $ DPOS, OFDPOS, AJETER
198  COMPLEX*16 CTMP
199 * ..
200 * .. External Subroutines ..
201  EXTERNAL zlarfg, zlarfx, zlarfy
202 * ..
203 * .. Intrinsic Functions ..
204  INTRINSIC dconjg, mod
205 * .. External Functions ..
206  LOGICAL LSAME
207  EXTERNAL lsame
208 * ..
209 * ..
210 * .. Executable Statements ..
211 *
212  ajeter = ib + ldvt
213  upper = lsame( uplo, 'U' )
214 
215  IF( upper ) THEN
216  dpos = 2 * nb + 1
217  ofdpos = 2 * nb
218  ELSE
219  dpos = 1
220  ofdpos = 2
221  ENDIF
222 
223 *
224 * Upper case
225 *
226  IF( upper ) THEN
227 *
228  IF( wantz ) THEN
229  vpos = mod( sweep-1, 2 ) * n + st
230  taupos = mod( sweep-1, 2 ) * n + st
231  ELSE
232  vpos = mod( sweep-1, 2 ) * n + st
233  taupos = mod( sweep-1, 2 ) * n + st
234  ENDIF
235 *
236  IF( ttype.EQ.1 ) THEN
237  lm = ed - st + 1
238 *
239  v( vpos ) = one
240  DO 10 i = 1, lm-1
241  v( vpos+i ) = dconjg( a( ofdpos-i, st+i ) )
242  a( ofdpos-i, st+i ) = zero
243  10 CONTINUE
244  ctmp = dconjg( a( ofdpos, st ) )
245  CALL zlarfg( lm, ctmp, v( vpos+1 ), 1,
246  $ tau( taupos ) )
247  a( ofdpos, st ) = ctmp
248 *
249  lm = ed - st + 1
250  CALL zlarfy( uplo, lm, v( vpos ), 1,
251  $ dconjg( tau( taupos ) ),
252  $ a( dpos, st ), lda-1, work)
253  ENDIF
254 *
255  IF( ttype.EQ.3 ) THEN
256 *
257  lm = ed - st + 1
258  CALL zlarfy( uplo, lm, v( vpos ), 1,
259  $ dconjg( tau( taupos ) ),
260  $ a( dpos, st ), lda-1, work)
261  ENDIF
262 *
263  IF( ttype.EQ.2 ) THEN
264  j1 = ed+1
265  j2 = min( ed+nb, n )
266  ln = ed-st+1
267  lm = j2-j1+1
268  IF( lm.GT.0) THEN
269  CALL zlarfx( 'Left', ln, lm, v( vpos ),
270  $ dconjg( tau( taupos ) ),
271  $ a( dpos-nb, j1 ), lda-1, work)
272 *
273  IF( wantz ) THEN
274  vpos = mod( sweep-1, 2 ) * n + j1
275  taupos = mod( sweep-1, 2 ) * n + j1
276  ELSE
277  vpos = mod( sweep-1, 2 ) * n + j1
278  taupos = mod( sweep-1, 2 ) * n + j1
279  ENDIF
280 *
281  v( vpos ) = one
282  DO 30 i = 1, lm-1
283  v( vpos+i ) =
284  $ dconjg( a( dpos-nb-i, j1+i ) )
285  a( dpos-nb-i, j1+i ) = zero
286  30 CONTINUE
287  ctmp = dconjg( a( dpos-nb, j1 ) )
288  CALL zlarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
289  a( dpos-nb, j1 ) = ctmp
290 *
291  CALL zlarfx( 'Right', ln-1, lm, v( vpos ),
292  $ tau( taupos ),
293  $ a( dpos-nb+1, j1 ), lda-1, work)
294  ENDIF
295  ENDIF
296 *
297 * Lower case
298 *
299  ELSE
300 *
301  IF( wantz ) THEN
302  vpos = mod( sweep-1, 2 ) * n + st
303  taupos = mod( sweep-1, 2 ) * n + st
304  ELSE
305  vpos = mod( sweep-1, 2 ) * n + st
306  taupos = mod( sweep-1, 2 ) * n + st
307  ENDIF
308 *
309  IF( ttype.EQ.1 ) THEN
310  lm = ed - st + 1
311 *
312  v( vpos ) = one
313  DO 20 i = 1, lm-1
314  v( vpos+i ) = a( ofdpos+i, st-1 )
315  a( ofdpos+i, st-1 ) = zero
316  20 CONTINUE
317  CALL zlarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
318  $ tau( taupos ) )
319 *
320  lm = ed - st + 1
321 *
322  CALL zlarfy( uplo, lm, v( vpos ), 1,
323  $ dconjg( tau( taupos ) ),
324  $ a( dpos, st ), lda-1, work)
325 
326  ENDIF
327 *
328  IF( ttype.EQ.3 ) THEN
329  lm = ed - st + 1
330 *
331  CALL zlarfy( uplo, lm, v( vpos ), 1,
332  $ dconjg( tau( taupos ) ),
333  $ a( dpos, st ), lda-1, work)
334 
335  ENDIF
336 *
337  IF( ttype.EQ.2 ) THEN
338  j1 = ed+1
339  j2 = min( ed+nb, n )
340  ln = ed-st+1
341  lm = j2-j1+1
342 *
343  IF( lm.GT.0) THEN
344  CALL zlarfx( 'Right', lm, ln, v( vpos ),
345  $ tau( taupos ), a( dpos+nb, st ),
346  $ lda-1, work)
347 *
348  IF( wantz ) THEN
349  vpos = mod( sweep-1, 2 ) * n + j1
350  taupos = mod( sweep-1, 2 ) * n + j1
351  ELSE
352  vpos = mod( sweep-1, 2 ) * n + j1
353  taupos = mod( sweep-1, 2 ) * n + j1
354  ENDIF
355 *
356  v( vpos ) = one
357  DO 40 i = 1, lm-1
358  v( vpos+i ) = a( dpos+nb+i, st )
359  a( dpos+nb+i, st ) = zero
360  40 CONTINUE
361  CALL zlarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
362  $ tau( taupos ) )
363 *
364  CALL zlarfx( 'Left', lm, ln-1, v( vpos ),
365  $ dconjg( tau( taupos ) ),
366  $ a( dpos+nb-1, st+1 ), lda-1, work)
367 
368  ENDIF
369  ENDIF
370  ENDIF
371 *
372  RETURN
373 *
374 * End of ZHB2ST_KERNELS
375 *
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine zlarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
ZLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: zlarfx.f:119
subroutine zlarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
ZLARFY
Definition: zlarfy.f:108
subroutine zlarfg(N, ALPHA, X, INCX, TAU)
ZLARFG generates an elementary reflector (Householder matrix).
Definition: zlarfg.f:106
Here is the call graph for this function:
Here is the caller graph for this function: