LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ zhetrd_hb2st()

subroutine zhetrd_hb2st ( character stage1,
character vect,
character uplo,
integer n,
integer kd,
complex*16, dimension( ldab, * ) ab,
integer ldab,
double precision, dimension( * ) d,
double precision, dimension( * ) e,
complex*16, dimension( * ) hous,
integer lhous,
complex*16, dimension( * ) work,
integer lwork,
integer info )

ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T

Download ZHETRD_HB2ST + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
!> tridiagonal form T by a unitary similarity transformation:
!> Q**H * A * Q = T.
!> 
Parameters
[in]STAGE1
!>          STAGE1 is CHARACTER*1
!>          = 'N':  : to mention that the stage 1 of the reduction
!>                  from dense to band using the zhetrd_he2hb routine
!>                  was not called before this routine to reproduce AB.
!>                  In other term this routine is called as standalone.
!>          = 'Y':  : to mention that the stage 1 of the
!>                  reduction from dense to band using the zhetrd_he2hb
!>                  routine has been called to produce AB (e.g., AB is
!>                  the output of zhetrd_he2hb.
!> 
[in]VECT
!>          VECT is CHARACTER*1
!>          = 'N':  No need for the Housholder representation,
!>                  and thus LHOUS is of size max(1, 4*N);
!>          = 'V':  the Householder representation is needed to
!>                  either generate or to apply Q later on,
!>                  then LHOUS is to be queried and computed.
!>                  (NOT AVAILABLE IN THIS RELEASE).
!> 
[in]UPLO
!>          UPLO is CHARACTER*1
!>          = 'U':  Upper triangle of A is stored;
!>          = 'L':  Lower triangle of A is stored.
!> 
[in]N
!>          N is INTEGER
!>          The order of the matrix A.  N >= 0.
!> 
[in]KD
!>          KD is INTEGER
!>          The number of superdiagonals of the matrix A if UPLO = 'U',
!>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
!> 
[in,out]AB
!>          AB is COMPLEX*16 array, dimension (LDAB,N)
!>          On entry, the upper or lower triangle of the Hermitian band
!>          matrix A, stored in the first KD+1 rows of the array.  The
!>          j-th column of A is stored in the j-th column of the array AB
!>          as follows:
!>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
!>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
!>          On exit, the diagonal elements of AB are overwritten by the
!>          diagonal elements of the tridiagonal matrix T; if KD > 0, the
!>          elements on the first superdiagonal (if UPLO = 'U') or the
!>          first subdiagonal (if UPLO = 'L') are overwritten by the
!>          off-diagonal elements of T; the rest of AB is overwritten by
!>          values generated during the reduction.
!> 
[in]LDAB
!>          LDAB is INTEGER
!>          The leading dimension of the array AB.  LDAB >= KD+1.
!> 
[out]D
!>          D is DOUBLE PRECISION array, dimension (N)
!>          The diagonal elements of the tridiagonal matrix T.
!> 
[out]E
!>          E is DOUBLE PRECISION array, dimension (N-1)
!>          The off-diagonal elements of the tridiagonal matrix T:
!>          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
!> 
[out]HOUS
!>          HOUS is COMPLEX*16 array, dimension (MAX(1,LHOUS))
!>          Stores the Householder representation.
!> 
[in]LHOUS
!>          LHOUS is INTEGER
!>          The dimension of the array HOUS.
!>          If N = 0 or KD <= 1, LHOUS >= 1, else LHOUS = MAX(1, dimension).
!>
!>          If LWORK = -1, or LHOUS = -1,
!>          then a query is assumed; the routine
!>          only calculates the optimal size of the HOUS array, returns
!>          this value as the first entry of the HOUS array, and no error
!>          message related to LHOUS is issued by XERBLA.
!>          LHOUS = MAX(1, dimension) where
!>          dimension = 4*N if VECT='N'
!>          not available now if VECT='H'
!> 
[out]WORK
!>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)).
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          If N = 0 or KD <= 1, LWORK >= 1, else LWORK = MAX(1, dimension).
!>
!>          If LWORK = -1, or LHOUS = -1,
!>          then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!>          LWORK = MAX(1, dimension) where
!>          dimension   = (2KD+1)*N + KD*NTHREADS
!>          where KD is the blocking size of the reduction,
!>          FACTOPTNB is the blocking used by the QR or LQ
!>          algorithm, usually FACTOPTNB=128 is a good choice
!>          NTHREADS is the number of threads used when
!>          openMP compilation is enabled, otherwise =1.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit
!>          < 0:  if INFO = -i, the i-th argument had an illegal value
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  Implemented by Azzam Haidar.
!>
!>  All details are available on technical report, SC11, SC13 papers.
!>
!>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
!>  Parallel reduction to condensed forms for symmetric eigenvalue problems
!>  using aggregated fine-grained and memory-aware kernels. In Proceedings
!>  of 2011 International Conference for High Performance Computing,
!>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
!>  Article 8 , 11 pages.
!>  http://doi.acm.org/10.1145/2063384.2063394
!>
!>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
!>  An improved parallel singular value algorithm and its implementation
!>  for multicore hardware, In Proceedings of 2013 International Conference
!>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
!>  Denver, Colorado, USA, 2013.
!>  Article 90, 12 pages.
!>  http://doi.acm.org/10.1145/2503210.2503292
!>
!>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
!>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
!>  calculations based on fine-grained memory aware tasks.
!>  International Journal of High Performance Computing Applications.
!>  Volume 28 Issue 2, Pages 196-209, May 2014.
!>  http://hpc.sagepub.com/content/28/2/196
!>
!> 

Definition at line 231 of file zhetrd_hb2st.F.

233*
234*
235#if defined(_OPENMP)
236 use omp_lib
237#endif
238*
239 IMPLICIT NONE
240*
241* -- LAPACK computational routine --
242* -- LAPACK is a software package provided by Univ. of Tennessee, --
243* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
244*
245* .. Scalar Arguments ..
246 CHARACTER STAGE1, UPLO, VECT
247 INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
248* ..
249* .. Array Arguments ..
250 DOUBLE PRECISION D( * ), E( * )
251 COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * )
252* ..
253*
254* =====================================================================
255*
256* .. Parameters ..
257 DOUBLE PRECISION RZERO
258 COMPLEX*16 ZERO, ONE
259 parameter( rzero = 0.0d+0,
260 $ zero = ( 0.0d+0, 0.0d+0 ),
261 $ one = ( 1.0d+0, 0.0d+0 ) )
262* ..
263* .. Local Scalars ..
264 LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
265 INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
266 $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
267 $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
268 $ NBTILES, TTYPE, TID, NTHREADS,
269 $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
270 $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
271 $ SIZEV, SIZETAU, LDV, LHMIN, LWMIN
272 DOUBLE PRECISION ABSTMP
273 COMPLEX*16 TMP
274* ..
275* .. External Subroutines ..
276 EXTERNAL zhb2st_kernels, zlacpy,
277 $ zlaset, xerbla
278* ..
279* .. Intrinsic Functions ..
280 INTRINSIC min, max, ceiling, dble, real
281* ..
282* .. External Functions ..
283 LOGICAL LSAME
284 INTEGER ILAENV2STAGE
285 EXTERNAL lsame, ilaenv2stage
286* ..
287* .. Executable Statements ..
288*
289* Determine the minimal workspace size required.
290* Test the input parameters
291*
292 info = 0
293 afters1 = lsame( stage1, 'Y' )
294 wantq = lsame( vect, 'V' )
295 upper = lsame( uplo, 'U' )
296 lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
297*
298* Determine the block size, the workspace size and the hous size.
299*
300 ib = ilaenv2stage( 2, 'ZHETRD_HB2ST', vect, n, kd,
301 $ -1, -1 )
302 IF( n.EQ.0 .OR. kd.LE.1 ) THEN
303 lhmin = 1
304 lwmin = 1
305 ELSE
306 lhmin = ilaenv2stage( 3, 'ZHETRD_HB2ST', vect, n, kd, ib,
307 $ -1 )
308 lwmin = ilaenv2stage( 4, 'ZHETRD_HB2ST', vect, n, kd, ib,
309 $ -1 )
310 END IF
311*
312 IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
313 info = -1
314 ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
315 info = -2
316 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
317 info = -3
318 ELSE IF( n.LT.0 ) THEN
319 info = -4
320 ELSE IF( kd.LT.0 ) THEN
321 info = -5
322 ELSE IF( ldab.LT.(kd+1) ) THEN
323 info = -7
324 ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
325 info = -11
326 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
327 info = -13
328 END IF
329*
330 IF( info.EQ.0 ) THEN
331 hous( 1 ) = lhmin
332 work( 1 ) = lwmin
333 END IF
334*
335 IF( info.NE.0 ) THEN
336 CALL xerbla( 'ZHETRD_HB2ST', -info )
337 RETURN
338 ELSE IF( lquery ) THEN
339 RETURN
340 END IF
341*
342* Quick return if possible
343*
344 IF( n.EQ.0 ) THEN
345 hous( 1 ) = 1
346 work( 1 ) = 1
347 RETURN
348 END IF
349*
350* Determine pointer position
351*
352 ldv = kd + ib
353 sizetau = 2 * n
354 sizev = 2 * n
355 indtau = 1
356 indv = indtau + sizetau
357 lda = 2 * kd + 1
358 sizea = lda * n
359 inda = 1
360 indw = inda + sizea
361 nthreads = 1
362 tid = 0
363*
364 IF( upper ) THEN
365 apos = inda + kd
366 awpos = inda
367 dpos = apos + kd
368 ofdpos = dpos - 1
369 abdpos = kd + 1
370 abofdpos = kd
371 ELSE
372 apos = inda
373 awpos = inda + kd + 1
374 dpos = apos
375 ofdpos = dpos + 1
376 abdpos = 1
377 abofdpos = 2
378
379 ENDIF
380*
381* Case KD=0:
382* The matrix is diagonal. We just copy it (convert to "real" for
383* complex because D is double and the imaginary part should be 0)
384* and store it in D. A sequential code here is better or
385* in a parallel environment it might need two cores for D and E
386*
387 IF( kd.EQ.0 ) THEN
388 DO 30 i = 1, n
389 d( i ) = dble( ab( abdpos, i ) )
390 30 CONTINUE
391 DO 40 i = 1, n-1
392 e( i ) = rzero
393 40 CONTINUE
394*
395 hous( 1 ) = 1
396 work( 1 ) = 1
397 RETURN
398 END IF
399*
400* Case KD=1:
401* The matrix is already Tridiagonal. We have to make diagonal
402* and offdiagonal elements real, and store them in D and E.
403* For that, for real precision just copy the diag and offdiag
404* to D and E while for the COMPLEX case the bulge chasing is
405* performed to convert the hermetian tridiagonal to symmetric
406* tridiagonal. A simpler conversion formula might be used, but then
407* updating the Q matrix will be required and based if Q is generated
408* or not this might complicate the story.
409*
410 IF( kd.EQ.1 ) THEN
411 DO 50 i = 1, n
412 d( i ) = dble( ab( abdpos, i ) )
413 50 CONTINUE
414*
415* make off-diagonal elements real and copy them to E
416*
417 IF( upper ) THEN
418 DO 60 i = 1, n - 1
419 tmp = ab( abofdpos, i+1 )
420 abstmp = abs( tmp )
421 ab( abofdpos, i+1 ) = abstmp
422 e( i ) = abstmp
423 IF( abstmp.NE.rzero ) THEN
424 tmp = tmp / abstmp
425 ELSE
426 tmp = one
427 END IF
428 IF( i.LT.n-1 )
429 $ ab( abofdpos, i+2 ) = ab( abofdpos, i+2 )*tmp
430C IF( WANTZ ) THEN
431C CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 )
432C END IF
433 60 CONTINUE
434 ELSE
435 DO 70 i = 1, n - 1
436 tmp = ab( abofdpos, i )
437 abstmp = abs( tmp )
438 ab( abofdpos, i ) = abstmp
439 e( i ) = abstmp
440 IF( abstmp.NE.rzero ) THEN
441 tmp = tmp / abstmp
442 ELSE
443 tmp = one
444 END IF
445 IF( i.LT.n-1 )
446 $ ab( abofdpos, i+1 ) = ab( abofdpos, i+1 )*tmp
447C IF( WANTQ ) THEN
448C CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 )
449C END IF
450 70 CONTINUE
451 ENDIF
452*
453 hous( 1 ) = 1
454 work( 1 ) = 1
455 RETURN
456 END IF
457*
458* Main code start here.
459* Reduce the hermitian band of A to a tridiagonal matrix.
460*
461 thgrsiz = n
462 grsiz = 1
463 shift = 3
464 nbtiles = ceiling( real(n)/real(kd) )
465 stepercol = ceiling( real(shift)/real(grsiz) )
466 thgrnb = ceiling( real(n-1)/real(thgrsiz) )
467*
468 CALL zlacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
469 CALL zlaset( "A", kd, n, zero, zero, work( awpos ), lda )
470*
471*
472* openMP parallelisation start here
473*
474#if defined(_OPENMP)
475!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
476!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
477!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
478!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
479!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
480!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
481!$OMP MASTER
482#endif
483*
484* main bulge chasing loop
485*
486 DO 100 thgrid = 1, thgrnb
487 stt = (thgrid-1)*thgrsiz+1
488 thed = min( (stt + thgrsiz -1), (n-1))
489 DO 110 i = stt, n-1
490 ed = min( i, thed )
491 IF( stt.GT.ed ) EXIT
492 DO 120 m = 1, stepercol
493 st = stt
494 DO 130 sweepid = st, ed
495 DO 140 k = 1, grsiz
496 myid = (i-sweepid)*(stepercol*grsiz)
497 $ + (m-1)*grsiz + k
498 IF ( myid.EQ.1 ) THEN
499 ttype = 1
500 ELSE
501 ttype = mod( myid, 2 ) + 2
502 ENDIF
503
504 IF( ttype.EQ.2 ) THEN
505 colpt = (myid/2)*kd + sweepid
506 stind = colpt-kd+1
507 edind = min(colpt,n)
508 blklastind = colpt
509 ELSE
510 colpt = ((myid+1)/2)*kd + sweepid
511 stind = colpt-kd+1
512 edind = min(colpt,n)
513 IF( ( stind.GE.edind-1 ).AND.
514 $ ( edind.EQ.n ) ) THEN
515 blklastind = n
516 ELSE
517 blklastind = 0
518 ENDIF
519 ENDIF
520*
521* Call the kernel
522*
523#if defined(_OPENMP) && _OPENMP >= 201307
524
525 IF( ttype.NE.1 ) THEN
526!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
527!$OMP$ DEPEND(in:WORK(MYID-1))
528!$OMP$ DEPEND(out:WORK(MYID))
529 tid = omp_get_thread_num()
530 CALL zhb2st_kernels(
531 $ uplo, wantq, ttype,
532 $ stind, edind, sweepid, n, kd, ib,
533 $ work( inda ), lda,
534 $ hous( indv ), hous( indtau ), ldv,
535 $ work( indw + tid*kd ) )
536!$OMP END TASK
537 ELSE
538!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
539!$OMP$ DEPEND(out:WORK(MYID))
540 tid = omp_get_thread_num()
541 CALL zhb2st_kernels(
542 $ uplo, wantq, ttype,
543 $ stind, edind, sweepid, n, kd, ib,
544 $ work( inda ), lda,
545 $ hous( indv ), hous( indtau ), ldv,
546 $ work( indw + tid*kd ) )
547!$OMP END TASK
548 ENDIF
549#else
550 CALL zhb2st_kernels(
551 $ uplo, wantq, ttype,
552 $ stind, edind, sweepid, n, kd, ib,
553 $ work( inda ), lda,
554 $ hous( indv ), hous( indtau ), ldv,
555 $ work( indw ) )
556#endif
557 IF ( blklastind.GE.(n-1) ) THEN
558 stt = stt + 1
559 EXIT
560 ENDIF
561 140 CONTINUE
562 130 CONTINUE
563 120 CONTINUE
564 110 CONTINUE
565 100 CONTINUE
566*
567#if defined(_OPENMP)
568!$OMP END MASTER
569!$OMP END PARALLEL
570#endif
571*
572* Copy the diagonal from A to D. Note that D is REAL thus only
573* the Real part is needed, the imaginary part should be zero.
574*
575 DO 150 i = 1, n
576 d( i ) = dble( work( dpos+(i-1)*lda ) )
577 150 CONTINUE
578*
579* Copy the off diagonal from A to E. Note that E is REAL thus only
580* the Real part is needed, the imaginary part should be zero.
581*
582 IF( upper ) THEN
583 DO 160 i = 1, n-1
584 e( i ) = dble( work( ofdpos+i*lda ) )
585 160 CONTINUE
586 ELSE
587 DO 170 i = 1, n-1
588 e( i ) = dble( work( ofdpos+(i-1)*lda ) )
589 170 CONTINUE
590 ENDIF
591*
592 work( 1 ) = lwmin
593 RETURN
594*
595* End of ZHETRD_HB2ST
596*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zhb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
ZHB2ST_KERNELS
integer function ilaenv2stage(ispec, name, opts, n1, n2, n3, n4)
ILAENV2STAGE
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
Definition zlacpy.f:101
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:104
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: