LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ zhetrd_hb2st()

subroutine zhetrd_hb2st ( character  STAGE1,
character  VECT,
character  UPLO,
integer  N,
integer  KD,
complex*16, dimension( ldab, * )  AB,
integer  LDAB,
double precision, dimension( * )  D,
double precision, dimension( * )  E,
complex*16, dimension( * )  HOUS,
integer  LHOUS,
complex*16, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T

Download ZHETRD_HB2ST + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
 tridiagonal form T by a unitary similarity transformation:
 Q**H * A * Q = T.
Parameters
[in]STAGE1
          STAGE1 is CHARACTER*1
          = 'N':  "No": to mention that the stage 1 of the reduction  
                  from dense to band using the zhetrd_he2hb routine
                  was not called before this routine to reproduce AB. 
                  In other term this routine is called as standalone. 
          = 'Y':  "Yes": to mention that the stage 1 of the 
                  reduction from dense to band using the zhetrd_he2hb 
                  routine has been called to produce AB (e.g., AB is
                  the output of zhetrd_he2hb.
[in]VECT
          VECT is CHARACTER*1
          = 'N':  No need for the Housholder representation, 
                  and thus LHOUS is of size max(1, 4*N);
          = 'V':  the Householder representation is needed to 
                  either generate or to apply Q later on, 
                  then LHOUS is to be queried and computed.
                  (NOT AVAILABLE IN THIS RELEASE).
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in]KD
          KD is INTEGER
          The number of superdiagonals of the matrix A if UPLO = 'U',
          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
[in,out]AB
          AB is COMPLEX*16 array, dimension (LDAB,N)
          On entry, the upper or lower triangle of the Hermitian band
          matrix A, stored in the first KD+1 rows of the array.  The
          j-th column of A is stored in the j-th column of the array AB
          as follows:
          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
          On exit, the diagonal elements of AB are overwritten by the
          diagonal elements of the tridiagonal matrix T; if KD > 0, the
          elements on the first superdiagonal (if UPLO = 'U') or the
          first subdiagonal (if UPLO = 'L') are overwritten by the
          off-diagonal elements of T; the rest of AB is overwritten by
          values generated during the reduction.
[in]LDAB
          LDAB is INTEGER
          The leading dimension of the array AB.  LDAB >= KD+1.
[out]D
          D is DOUBLE PRECISION array, dimension (N)
          The diagonal elements of the tridiagonal matrix T.
[out]E
          E is DOUBLE PRECISION array, dimension (N-1)
          The off-diagonal elements of the tridiagonal matrix T:
          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
[out]HOUS
          HOUS is COMPLEX*16 array, dimension LHOUS, that
          store the Householder representation.
[in]LHOUS
          LHOUS is INTEGER
          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a query is assumed; the routine
          only calculates the optimal size of the HOUS array, returns
          this value as the first entry of the HOUS array, and no error
          message related to LHOUS is issued by XERBLA.
          LHOUS = MAX(1, dimension) where
          dimension = 4*N if VECT='N'
          not available now if VECT='H'     
[out]WORK
          WORK is COMPLEX*16 array, dimension LWORK.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK. LWORK = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
          LWORK = MAX(1, dimension) where
          dimension   = (2KD+1)*N + KD*NTHREADS
          where KD is the blocking size of the reduction,
          FACTOPTNB is the blocking used by the QR or LQ
          algorithm, usually FACTOPTNB=128 is a good choice
          NTHREADS is the number of threads used when
          openMP compilation is enabled, otherwise =1.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation 
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196 

Definition at line 228 of file zhetrd_hb2st.F.

230 *
231 *
232 #if defined(_OPENMP)
233  use omp_lib
234 #endif
235 *
236  IMPLICIT NONE
237 *
238 * -- LAPACK computational routine --
239 * -- LAPACK is a software package provided by Univ. of Tennessee, --
240 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
241 *
242 * .. Scalar Arguments ..
243  CHARACTER STAGE1, UPLO, VECT
244  INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
245 * ..
246 * .. Array Arguments ..
247  DOUBLE PRECISION D( * ), E( * )
248  COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * )
249 * ..
250 *
251 * =====================================================================
252 *
253 * .. Parameters ..
254  DOUBLE PRECISION RZERO
255  COMPLEX*16 ZERO, ONE
256  parameter( rzero = 0.0d+0,
257  $ zero = ( 0.0d+0, 0.0d+0 ),
258  $ one = ( 1.0d+0, 0.0d+0 ) )
259 * ..
260 * .. Local Scalars ..
261  LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
262  INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
263  $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
264  $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
265  $ NBTILES, TTYPE, TID, NTHREADS, DEBUG,
266  $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
267  $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
268  $ SIZEV, SIZETAU, LDV, LHMIN, LWMIN
269  DOUBLE PRECISION ABSTMP
270  COMPLEX*16 TMP
271 * ..
272 * .. External Subroutines ..
273  EXTERNAL zhb2st_kernels, zlacpy, zlaset, xerbla
274 * ..
275 * .. Intrinsic Functions ..
276  INTRINSIC min, max, ceiling, dble, real
277 * ..
278 * .. External Functions ..
279  LOGICAL LSAME
280  INTEGER ILAENV2STAGE
281  EXTERNAL lsame, ilaenv2stage
282 * ..
283 * .. Executable Statements ..
284 *
285 * Determine the minimal workspace size required.
286 * Test the input parameters
287 *
288  debug = 0
289  info = 0
290  afters1 = lsame( stage1, 'Y' )
291  wantq = lsame( vect, 'V' )
292  upper = lsame( uplo, 'U' )
293  lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
294 *
295 * Determine the block size, the workspace size and the hous size.
296 *
297  ib = ilaenv2stage( 2, 'ZHETRD_HB2ST', vect, n, kd, -1, -1 )
298  lhmin = ilaenv2stage( 3, 'ZHETRD_HB2ST', vect, n, kd, ib, -1 )
299  lwmin = ilaenv2stage( 4, 'ZHETRD_HB2ST', vect, n, kd, ib, -1 )
300 *
301  IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
302  info = -1
303  ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
304  info = -2
305  ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
306  info = -3
307  ELSE IF( n.LT.0 ) THEN
308  info = -4
309  ELSE IF( kd.LT.0 ) THEN
310  info = -5
311  ELSE IF( ldab.LT.(kd+1) ) THEN
312  info = -7
313  ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
314  info = -11
315  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
316  info = -13
317  END IF
318 *
319  IF( info.EQ.0 ) THEN
320  hous( 1 ) = lhmin
321  work( 1 ) = lwmin
322  END IF
323 *
324  IF( info.NE.0 ) THEN
325  CALL xerbla( 'ZHETRD_HB2ST', -info )
326  RETURN
327  ELSE IF( lquery ) THEN
328  RETURN
329  END IF
330 *
331 * Quick return if possible
332 *
333  IF( n.EQ.0 ) THEN
334  hous( 1 ) = 1
335  work( 1 ) = 1
336  RETURN
337  END IF
338 *
339 * Determine pointer position
340 *
341  ldv = kd + ib
342  sizetau = 2 * n
343  sizev = 2 * n
344  indtau = 1
345  indv = indtau + sizetau
346  lda = 2 * kd + 1
347  sizea = lda * n
348  inda = 1
349  indw = inda + sizea
350  nthreads = 1
351  tid = 0
352 *
353  IF( upper ) THEN
354  apos = inda + kd
355  awpos = inda
356  dpos = apos + kd
357  ofdpos = dpos - 1
358  abdpos = kd + 1
359  abofdpos = kd
360  ELSE
361  apos = inda
362  awpos = inda + kd + 1
363  dpos = apos
364  ofdpos = dpos + 1
365  abdpos = 1
366  abofdpos = 2
367 
368  ENDIF
369 *
370 * Case KD=0:
371 * The matrix is diagonal. We just copy it (convert to "real" for
372 * complex because D is double and the imaginary part should be 0)
373 * and store it in D. A sequential code here is better or
374 * in a parallel environment it might need two cores for D and E
375 *
376  IF( kd.EQ.0 ) THEN
377  DO 30 i = 1, n
378  d( i ) = dble( ab( abdpos, i ) )
379  30 CONTINUE
380  DO 40 i = 1, n-1
381  e( i ) = rzero
382  40 CONTINUE
383 *
384  hous( 1 ) = 1
385  work( 1 ) = 1
386  RETURN
387  END IF
388 *
389 * Case KD=1:
390 * The matrix is already Tridiagonal. We have to make diagonal
391 * and offdiagonal elements real, and store them in D and E.
392 * For that, for real precision just copy the diag and offdiag
393 * to D and E while for the COMPLEX case the bulge chasing is
394 * performed to convert the hermetian tridiagonal to symmetric
395 * tridiagonal. A simpler conversion formula might be used, but then
396 * updating the Q matrix will be required and based if Q is generated
397 * or not this might complicate the story.
398 *
399  IF( kd.EQ.1 ) THEN
400  DO 50 i = 1, n
401  d( i ) = dble( ab( abdpos, i ) )
402  50 CONTINUE
403 *
404 * make off-diagonal elements real and copy them to E
405 *
406  IF( upper ) THEN
407  DO 60 i = 1, n - 1
408  tmp = ab( abofdpos, i+1 )
409  abstmp = abs( tmp )
410  ab( abofdpos, i+1 ) = abstmp
411  e( i ) = abstmp
412  IF( abstmp.NE.rzero ) THEN
413  tmp = tmp / abstmp
414  ELSE
415  tmp = one
416  END IF
417  IF( i.LT.n-1 )
418  $ ab( abofdpos, i+2 ) = ab( abofdpos, i+2 )*tmp
419 C IF( WANTZ ) THEN
420 C CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 )
421 C END IF
422  60 CONTINUE
423  ELSE
424  DO 70 i = 1, n - 1
425  tmp = ab( abofdpos, i )
426  abstmp = abs( tmp )
427  ab( abofdpos, i ) = abstmp
428  e( i ) = abstmp
429  IF( abstmp.NE.rzero ) THEN
430  tmp = tmp / abstmp
431  ELSE
432  tmp = one
433  END IF
434  IF( i.LT.n-1 )
435  $ ab( abofdpos, i+1 ) = ab( abofdpos, i+1 )*tmp
436 C IF( WANTQ ) THEN
437 C CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 )
438 C END IF
439  70 CONTINUE
440  ENDIF
441 *
442  hous( 1 ) = 1
443  work( 1 ) = 1
444  RETURN
445  END IF
446 *
447 * Main code start here.
448 * Reduce the hermitian band of A to a tridiagonal matrix.
449 *
450  thgrsiz = n
451  grsiz = 1
452  shift = 3
453  nbtiles = ceiling( real(n)/real(kd) )
454  stepercol = ceiling( real(shift)/real(grsiz) )
455  thgrnb = ceiling( real(n-1)/real(thgrsiz) )
456 *
457  CALL zlacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
458  CALL zlaset( "A", kd, n, zero, zero, work( awpos ), lda )
459 *
460 *
461 * openMP parallelisation start here
462 *
463 #if defined(_OPENMP)
464 !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
465 !$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
466 !$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
467 !$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
468 !$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
469 !$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
470 !$OMP MASTER
471 #endif
472 *
473 * main bulge chasing loop
474 *
475  DO 100 thgrid = 1, thgrnb
476  stt = (thgrid-1)*thgrsiz+1
477  thed = min( (stt + thgrsiz -1), (n-1))
478  DO 110 i = stt, n-1
479  ed = min( i, thed )
480  IF( stt.GT.ed ) EXIT
481  DO 120 m = 1, stepercol
482  st = stt
483  DO 130 sweepid = st, ed
484  DO 140 k = 1, grsiz
485  myid = (i-sweepid)*(stepercol*grsiz)
486  $ + (m-1)*grsiz + k
487  IF ( myid.EQ.1 ) THEN
488  ttype = 1
489  ELSE
490  ttype = mod( myid, 2 ) + 2
491  ENDIF
492 
493  IF( ttype.EQ.2 ) THEN
494  colpt = (myid/2)*kd + sweepid
495  stind = colpt-kd+1
496  edind = min(colpt,n)
497  blklastind = colpt
498  ELSE
499  colpt = ((myid+1)/2)*kd + sweepid
500  stind = colpt-kd+1
501  edind = min(colpt,n)
502  IF( ( stind.GE.edind-1 ).AND.
503  $ ( edind.EQ.n ) ) THEN
504  blklastind = n
505  ELSE
506  blklastind = 0
507  ENDIF
508  ENDIF
509 *
510 * Call the kernel
511 *
512 #if defined(_OPENMP) && _OPENMP >= 201307
513 
514  IF( ttype.NE.1 ) THEN
515 !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
516 !$OMP$ DEPEND(in:WORK(MYID-1))
517 !$OMP$ DEPEND(out:WORK(MYID))
518  tid = omp_get_thread_num()
519  CALL zhb2st_kernels( uplo, wantq, ttype,
520  $ stind, edind, sweepid, n, kd, ib,
521  $ work( inda ), lda,
522  $ hous( indv ), hous( indtau ), ldv,
523  $ work( indw + tid*kd ) )
524 !$OMP END TASK
525  ELSE
526 !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
527 !$OMP$ DEPEND(out:WORK(MYID))
528  tid = omp_get_thread_num()
529  CALL zhb2st_kernels( uplo, wantq, ttype,
530  $ stind, edind, sweepid, n, kd, ib,
531  $ work( inda ), lda,
532  $ hous( indv ), hous( indtau ), ldv,
533  $ work( indw + tid*kd ) )
534 !$OMP END TASK
535  ENDIF
536 #else
537  CALL zhb2st_kernels( uplo, wantq, ttype,
538  $ stind, edind, sweepid, n, kd, ib,
539  $ work( inda ), lda,
540  $ hous( indv ), hous( indtau ), ldv,
541  $ work( indw + tid*kd ) )
542 #endif
543  IF ( blklastind.GE.(n-1) ) THEN
544  stt = stt + 1
545  EXIT
546  ENDIF
547  140 CONTINUE
548  130 CONTINUE
549  120 CONTINUE
550  110 CONTINUE
551  100 CONTINUE
552 *
553 #if defined(_OPENMP)
554 !$OMP END MASTER
555 !$OMP END PARALLEL
556 #endif
557 *
558 * Copy the diagonal from A to D. Note that D is REAL thus only
559 * the Real part is needed, the imaginary part should be zero.
560 *
561  DO 150 i = 1, n
562  d( i ) = dble( work( dpos+(i-1)*lda ) )
563  150 CONTINUE
564 *
565 * Copy the off diagonal from A to E. Note that E is REAL thus only
566 * the Real part is needed, the imaginary part should be zero.
567 *
568  IF( upper ) THEN
569  DO 160 i = 1, n-1
570  e( i ) = dble( work( ofdpos+i*lda ) )
571  160 CONTINUE
572  ELSE
573  DO 170 i = 1, n-1
574  e( i ) = dble( work( ofdpos+(i-1)*lda ) )
575  170 CONTINUE
576  ENDIF
577 *
578  hous( 1 ) = lhmin
579  work( 1 ) = lwmin
580  RETURN
581 *
582 * End of ZHETRD_HB2ST
583 *
integer function ilaenv2stage(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV2STAGE
Definition: ilaenv2stage.f:149
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
Definition: zlacpy.f:103
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: zlaset.f:106
subroutine zhb2st_kernels(UPLO, WANTZ, TTYPE, ST, ED, SWEEP, N, NB, IB, A, LDA, V, TAU, LDVT, WORK)
ZHB2ST_KERNELS
Here is the call graph for this function:
Here is the caller graph for this function: