LAPACK  3.8.0
LAPACK: Linear Algebra PACKage

◆ chb2st_kernels()

subroutine chb2st_kernels ( character  UPLO,
logical  WANTZ,
integer  TTYPE,
integer  ST,
integer  ED,
integer  SWEEP,
integer  N,
integer  NB,
integer  IB,
complex, dimension( lda, * )  A,
integer  LDA,
complex, dimension( * )  V,
complex, dimension( * )  TAU,
integer  LDVT,
complex, dimension( * )  WORK 
)

CHB2ST_KERNELS

Download CHB2ST_KERNELS + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST
 subroutine.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
[in]WANTZ
          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
          Eigenvalue/Eigenvectors.
[in]TTYPE
          TTYPE is INTEGER
[in]ST
          ST is INTEGER
          internal parameter for indices.
[in]ED
          ED is INTEGER
          internal parameter for indices.
[in]SWEEP
          SWEEP is INTEGER
          internal parameter for indices.
[in]N
          N is INTEGER. The order of the matrix A.
[in]NB
          NB is INTEGER. The size of the band.
[in]IB
          IB is INTEGER.
[in,out]A
          A is COMPLEX array. A pointer to the matrix A.
[in]LDA
          LDA is INTEGER. The leading dimension of the matrix A.
[out]V
          V is COMPLEX array, dimension 2*n if eigenvalues only are
          requested or to be queried for vectors.
[out]TAU
          TAU is COMPLEX array, dimension (2*n).
          The scalar factors of the Householder reflectors are stored
          in this array.
[in]LDVT
          LDVT is INTEGER.
[in]WORK
          WORK is COMPLEX array. Workspace of size nb.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196 

Definition at line 170 of file chb2st_kernels.f.

170 *
171  IMPLICIT NONE
172 *
173 * -- LAPACK computational routine (version 3.7.1) --
174 * -- LAPACK is a software package provided by Univ. of Tennessee, --
175 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176 * June 2017
177 *
178 * .. Scalar Arguments ..
179  CHARACTER uplo
180  LOGICAL wantz
181  INTEGER ttype, st, ed, sweep, n, nb, ib, lda, ldvt
182 * ..
183 * .. Array Arguments ..
184  COMPLEX a( lda, * ), v( * ),
185  $ tau( * ), work( * )
186 * ..
187 *
188 * =====================================================================
189 *
190 * .. Parameters ..
191  COMPLEX zero, one
192  parameter( zero = ( 0.0e+0, 0.0e+0 ),
193  $ one = ( 1.0e+0, 0.0e+0 ) )
194 * ..
195 * .. Local Scalars ..
196  LOGICAL upper
197  INTEGER i, j1, j2, lm, ln, vpos, taupos,
198  $ dpos, ofdpos, ajeter
199  COMPLEX ctmp
200 * ..
201 * .. External Subroutines ..
202  EXTERNAL clarfg, clarfx, clarfy
203 * ..
204 * .. Intrinsic Functions ..
205  INTRINSIC conjg, mod
206 * .. External Functions ..
207  LOGICAL lsame
208  EXTERNAL lsame
209 * ..
210 * ..
211 * .. Executable Statements ..
212 *
213  ajeter = ib + ldvt
214  upper = lsame( uplo, 'U' )
215 
216  IF( upper ) THEN
217  dpos = 2 * nb + 1
218  ofdpos = 2 * nb
219  ELSE
220  dpos = 1
221  ofdpos = 2
222  ENDIF
223 
224 *
225 * Upper case
226 *
227  IF( upper ) THEN
228 *
229  IF( wantz ) THEN
230  vpos = mod( sweep-1, 2 ) * n + st
231  taupos = mod( sweep-1, 2 ) * n + st
232  ELSE
233  vpos = mod( sweep-1, 2 ) * n + st
234  taupos = mod( sweep-1, 2 ) * n + st
235  ENDIF
236 *
237  IF( ttype.EQ.1 ) THEN
238  lm = ed - st + 1
239 *
240  v( vpos ) = one
241  DO 10 i = 1, lm-1
242  v( vpos+i ) = conjg( a( ofdpos-i, st+i ) )
243  a( ofdpos-i, st+i ) = zero
244  10 CONTINUE
245  ctmp = conjg( a( ofdpos, st ) )
246  CALL clarfg( lm, ctmp, v( vpos+1 ), 1,
247  $ tau( taupos ) )
248  a( ofdpos, st ) = ctmp
249 *
250  lm = ed - st + 1
251  CALL clarfy( uplo, lm, v( vpos ), 1,
252  $ conjg( tau( taupos ) ),
253  $ a( dpos, st ), lda-1, work)
254  ENDIF
255 *
256  IF( ttype.EQ.3 ) THEN
257 *
258  lm = ed - st + 1
259  CALL clarfy( uplo, lm, v( vpos ), 1,
260  $ conjg( tau( taupos ) ),
261  $ a( dpos, st ), lda-1, work)
262  ENDIF
263 *
264  IF( ttype.EQ.2 ) THEN
265  j1 = ed+1
266  j2 = min( ed+nb, n )
267  ln = ed-st+1
268  lm = j2-j1+1
269  IF( lm.GT.0) THEN
270  CALL clarfx( 'Left', ln, lm, v( vpos ),
271  $ conjg( tau( taupos ) ),
272  $ a( dpos-nb, j1 ), lda-1, work)
273 *
274  IF( wantz ) THEN
275  vpos = mod( sweep-1, 2 ) * n + j1
276  taupos = mod( sweep-1, 2 ) * n + j1
277  ELSE
278  vpos = mod( sweep-1, 2 ) * n + j1
279  taupos = mod( sweep-1, 2 ) * n + j1
280  ENDIF
281 *
282  v( vpos ) = one
283  DO 30 i = 1, lm-1
284  v( vpos+i ) =
285  $ conjg( a( dpos-nb-i, j1+i ) )
286  a( dpos-nb-i, j1+i ) = zero
287  30 CONTINUE
288  ctmp = conjg( a( dpos-nb, j1 ) )
289  CALL clarfg( lm, ctmp, v( vpos+1 ), 1, tau( taupos ) )
290  a( dpos-nb, j1 ) = ctmp
291 *
292  CALL clarfx( 'Right', ln-1, lm, v( vpos ),
293  $ tau( taupos ),
294  $ a( dpos-nb+1, j1 ), lda-1, work)
295  ENDIF
296  ENDIF
297 *
298 * Lower case
299 *
300  ELSE
301 *
302  IF( wantz ) THEN
303  vpos = mod( sweep-1, 2 ) * n + st
304  taupos = mod( sweep-1, 2 ) * n + st
305  ELSE
306  vpos = mod( sweep-1, 2 ) * n + st
307  taupos = mod( sweep-1, 2 ) * n + st
308  ENDIF
309 *
310  IF( ttype.EQ.1 ) THEN
311  lm = ed - st + 1
312 *
313  v( vpos ) = one
314  DO 20 i = 1, lm-1
315  v( vpos+i ) = a( ofdpos+i, st-1 )
316  a( ofdpos+i, st-1 ) = zero
317  20 CONTINUE
318  CALL clarfg( lm, a( ofdpos, st-1 ), v( vpos+1 ), 1,
319  $ tau( taupos ) )
320 *
321  lm = ed - st + 1
322 *
323  CALL clarfy( uplo, lm, v( vpos ), 1,
324  $ conjg( tau( taupos ) ),
325  $ a( dpos, st ), lda-1, work)
326 
327  ENDIF
328 *
329  IF( ttype.EQ.3 ) THEN
330  lm = ed - st + 1
331 *
332  CALL clarfy( uplo, lm, v( vpos ), 1,
333  $ conjg( tau( taupos ) ),
334  $ a( dpos, st ), lda-1, work)
335 
336  ENDIF
337 *
338  IF( ttype.EQ.2 ) THEN
339  j1 = ed+1
340  j2 = min( ed+nb, n )
341  ln = ed-st+1
342  lm = j2-j1+1
343 *
344  IF( lm.GT.0) THEN
345  CALL clarfx( 'Right', lm, ln, v( vpos ),
346  $ tau( taupos ), a( dpos+nb, st ),
347  $ lda-1, work)
348 *
349  IF( wantz ) THEN
350  vpos = mod( sweep-1, 2 ) * n + j1
351  taupos = mod( sweep-1, 2 ) * n + j1
352  ELSE
353  vpos = mod( sweep-1, 2 ) * n + j1
354  taupos = mod( sweep-1, 2 ) * n + j1
355  ENDIF
356 *
357  v( vpos ) = one
358  DO 40 i = 1, lm-1
359  v( vpos+i ) = a( dpos+nb+i, st )
360  a( dpos+nb+i, st ) = zero
361  40 CONTINUE
362  CALL clarfg( lm, a( dpos+nb, st ), v( vpos+1 ), 1,
363  $ tau( taupos ) )
364 *
365  CALL clarfx( 'Left', lm, ln-1, v( vpos ),
366  $ conjg( tau( taupos ) ),
367  $ a( dpos+nb-1, st+1 ), lda-1, work)
368 
369  ENDIF
370  ENDIF
371  ENDIF
372 *
373  RETURN
374 *
375 * END OF CHB2ST_KERNELS
376 *
subroutine clarfx(SIDE, M, N, V, TAU, C, LDC, WORK)
CLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the ...
Definition: clarfx.f:121
subroutine clarfg(N, ALPHA, X, INCX, TAU)
CLARFG generates an elementary reflector (Householder matrix).
Definition: clarfg.f:108
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine clarfy(UPLO, N, V, INCV, TAU, C, LDC, WORK)
CLARFY
Definition: clarfy.f:110
Here is the call graph for this function: