LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ ssytrd_sb2st()

subroutine ssytrd_sb2st ( character  stage1,
character  vect,
character  uplo,
integer  n,
integer  kd,
real, dimension( ldab, * )  ab,
integer  ldab,
real, dimension( * )  d,
real, dimension( * )  e,
real, dimension( * )  hous,
integer  lhous,
real, dimension( * )  work,
integer  lwork,
integer  info 
)

SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T

Download SSYTRD_SB2ST + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric
 tridiagonal form T by a orthogonal similarity transformation:
 Q**T * A * Q = T.
Parameters
[in]STAGE1
          STAGE1 is CHARACTER*1
          = 'N':  "No": to mention that the stage 1 of the reduction
                  from dense to band using the ssytrd_sy2sb routine
                  was not called before this routine to reproduce AB.
                  In other term this routine is called as standalone.
          = 'Y':  "Yes": to mention that the stage 1 of the
                  reduction from dense to band using the ssytrd_sy2sb
                  routine has been called to produce AB (e.g., AB is
                  the output of ssytrd_sy2sb.
[in]VECT
          VECT is CHARACTER*1
          = 'N':  No need for the Housholder representation,
                  and thus LHOUS is of size max(1, 4*N);
          = 'V':  the Householder representation is needed to
                  either generate or to apply Q later on,
                  then LHOUS is to be queried and computed.
                  (NOT AVAILABLE IN THIS RELEASE).
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in]KD
          KD is INTEGER
          The number of superdiagonals of the matrix A if UPLO = 'U',
          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
[in,out]AB
          AB is REAL array, dimension (LDAB,N)
          On entry, the upper or lower triangle of the symmetric band
          matrix A, stored in the first KD+1 rows of the array.  The
          j-th column of A is stored in the j-th column of the array AB
          as follows:
          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
          On exit, the diagonal elements of AB are overwritten by the
          diagonal elements of the tridiagonal matrix T; if KD > 0, the
          elements on the first superdiagonal (if UPLO = 'U') or the
          first subdiagonal (if UPLO = 'L') are overwritten by the
          off-diagonal elements of T; the rest of AB is overwritten by
          values generated during the reduction.
[in]LDAB
          LDAB is INTEGER
          The leading dimension of the array AB.  LDAB >= KD+1.
[out]D
          D is REAL array, dimension (N)
          The diagonal elements of the tridiagonal matrix T.
[out]E
          E is REAL array, dimension (N-1)
          The off-diagonal elements of the tridiagonal matrix T:
          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
[out]HOUS
          HOUS is REAL array, dimension LHOUS, that
          store the Householder representation.
[in]LHOUS
          LHOUS is INTEGER
          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a query is assumed; the routine
          only calculates the optimal size of the HOUS array, returns
          this value as the first entry of the HOUS array, and no error
          message related to LHOUS is issued by XERBLA.
          LHOUS = MAX(1, dimension) where
          dimension = 4*N if VECT='N'
          not available now if VECT='H'
[out]WORK
          WORK is REAL array, dimension LWORK.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK. LWORK = MAX(1, dimension)
          If LWORK = -1, or LHOUS=-1,
          then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
          LWORK = MAX(1, dimension) where
          dimension   = (2KD+1)*N + KD*NTHREADS
          where KD is the blocking size of the reduction,
          FACTOPTNB is the blocking used by the QR or LQ
          algorithm, usually FACTOPTNB=128 is a good choice
          NTHREADS is the number of threads used when
          openMP compilation is enabled, otherwise =1.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
  Implemented by Azzam Haidar.

  All details are available on technical report, SC11, SC13 papers.

  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  Parallel reduction to condensed forms for symmetric eigenvalue problems
  using aggregated fine-grained and memory-aware kernels. In Proceedings
  of 2011 International Conference for High Performance Computing,
  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  Article 8 , 11 pages.
  http://doi.acm.org/10.1145/2063384.2063394

  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  An improved parallel singular value algorithm and its implementation
  for multicore hardware, In Proceedings of 2013 International Conference
  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  Denver, Colorado, USA, 2013.
  Article 90, 12 pages.
  http://doi.acm.org/10.1145/2503210.2503292

  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  calculations based on fine-grained memory aware tasks.
  International Journal of High Performance Computing Applications.
  Volume 28 Issue 2, Pages 196-209, May 2014.
  http://hpc.sagepub.com/content/28/2/196

Definition at line 228 of file ssytrd_sb2st.F.

230*
231#if defined(_OPENMP)
232 use omp_lib
233#endif
234*
235 IMPLICIT NONE
236*
237* -- LAPACK computational routine --
238* -- LAPACK is a software package provided by Univ. of Tennessee, --
239* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
240*
241* .. Scalar Arguments ..
242 CHARACTER STAGE1, UPLO, VECT
243 INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
244* ..
245* .. Array Arguments ..
246 REAL D( * ), E( * )
247 REAL AB( LDAB, * ), HOUS( * ), WORK( * )
248* ..
249*
250* =====================================================================
251*
252* .. Parameters ..
253 REAL RZERO
254 REAL ZERO, ONE
255 parameter( rzero = 0.0e+0,
256 $ zero = 0.0e+0,
257 $ one = 1.0e+0 )
258* ..
259* .. Local Scalars ..
260 LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
261 INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
262 $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
263 $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
264 $ NBTILES, TTYPE, TID, NTHREADS, DEBUG,
265 $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
266 $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
267 $ SISEV, SIZETAU, LDV, LHMIN, LWMIN
268* ..
269* .. External Subroutines ..
271* ..
272* .. Intrinsic Functions ..
273 INTRINSIC min, max, ceiling, real
274* ..
275* .. External Functions ..
276 LOGICAL LSAME
277 INTEGER ILAENV2STAGE
278 REAL SROUNDUP_LWORK
280* ..
281* .. Executable Statements ..
282*
283* Determine the minimal workspace size required.
284* Test the input parameters
285*
286 debug = 0
287 info = 0
288 afters1 = lsame( stage1, 'Y' )
289 wantq = lsame( vect, 'V' )
290 upper = lsame( uplo, 'U' )
291 lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
292*
293* Determine the block size, the workspace size and the hous size.
294*
295 ib = ilaenv2stage( 2, 'SSYTRD_SB2ST', vect, n, kd, -1, -1 )
296 lhmin = ilaenv2stage( 3, 'SSYTRD_SB2ST', vect, n, kd, ib, -1 )
297 lwmin = ilaenv2stage( 4, 'SSYTRD_SB2ST', vect, n, kd, ib, -1 )
298*
299 IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
300 info = -1
301 ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
302 info = -2
303 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
304 info = -3
305 ELSE IF( n.LT.0 ) THEN
306 info = -4
307 ELSE IF( kd.LT.0 ) THEN
308 info = -5
309 ELSE IF( ldab.LT.(kd+1) ) THEN
310 info = -7
311 ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
312 info = -11
313 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
314 info = -13
315 END IF
316*
317 IF( info.EQ.0 ) THEN
318 hous( 1 ) = lhmin
319 work( 1 ) = sroundup_lwork(lwmin)
320 END IF
321*
322 IF( info.NE.0 ) THEN
323 CALL xerbla( 'SSYTRD_SB2ST', -info )
324 RETURN
325 ELSE IF( lquery ) THEN
326 RETURN
327 END IF
328*
329* Quick return if possible
330*
331 IF( n.EQ.0 ) THEN
332 hous( 1 ) = 1
333 work( 1 ) = 1
334 RETURN
335 END IF
336*
337* Determine pointer position
338*
339 ldv = kd + ib
340 sizetau = 2 * n
341 sisev = 2 * n
342 indtau = 1
343 indv = indtau + sizetau
344 lda = 2 * kd + 1
345 sizea = lda * n
346 inda = 1
347 indw = inda + sizea
348 nthreads = 1
349 tid = 0
350*
351 IF( upper ) THEN
352 apos = inda + kd
353 awpos = inda
354 dpos = apos + kd
355 ofdpos = dpos - 1
356 abdpos = kd + 1
357 abofdpos = kd
358 ELSE
359 apos = inda
360 awpos = inda + kd + 1
361 dpos = apos
362 ofdpos = dpos + 1
363 abdpos = 1
364 abofdpos = 2
365
366 ENDIF
367*
368* Case KD=0:
369* The matrix is diagonal. We just copy it (convert to "real" for
370* real because D is double and the imaginary part should be 0)
371* and store it in D. A sequential code here is better or
372* in a parallel environment it might need two cores for D and E
373*
374 IF( kd.EQ.0 ) THEN
375 DO 30 i = 1, n
376 d( i ) = ( ab( abdpos, i ) )
377 30 CONTINUE
378 DO 40 i = 1, n-1
379 e( i ) = rzero
380 40 CONTINUE
381*
382 hous( 1 ) = 1
383 work( 1 ) = 1
384 RETURN
385 END IF
386*
387* Case KD=1:
388* The matrix is already Tridiagonal. We have to make diagonal
389* and offdiagonal elements real, and store them in D and E.
390* For that, for real precision just copy the diag and offdiag
391* to D and E while for the COMPLEX case the bulge chasing is
392* performed to convert the hermetian tridiagonal to symmetric
393* tridiagonal. A simpler conversion formula might be used, but then
394* updating the Q matrix will be required and based if Q is generated
395* or not this might complicate the story.
396*
397 IF( kd.EQ.1 ) THEN
398 DO 50 i = 1, n
399 d( i ) = ( ab( abdpos, i ) )
400 50 CONTINUE
401*
402 IF( upper ) THEN
403 DO 60 i = 1, n-1
404 e( i ) = ( ab( abofdpos, i+1 ) )
405 60 CONTINUE
406 ELSE
407 DO 70 i = 1, n-1
408 e( i ) = ( ab( abofdpos, i ) )
409 70 CONTINUE
410 ENDIF
411*
412 hous( 1 ) = 1
413 work( 1 ) = 1
414 RETURN
415 END IF
416*
417* Main code start here.
418* Reduce the symmetric band of A to a tridiagonal matrix.
419*
420 thgrsiz = n
421 grsiz = 1
422 shift = 3
423 nbtiles = ceiling( real(n)/real(kd) )
424 stepercol = ceiling( real(shift)/real(grsiz) )
425 thgrnb = ceiling( real(n-1)/real(thgrsiz) )
426*
427 CALL slacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
428 CALL slaset( "A", kd, n, zero, zero, work( awpos ), lda )
429*
430*
431* openMP parallelisation start here
432*
433#if defined(_OPENMP)
434!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
435!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
436!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
437!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
438!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
439!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
440!$OMP MASTER
441#endif
442*
443* main bulge chasing loop
444*
445 DO 100 thgrid = 1, thgrnb
446 stt = (thgrid-1)*thgrsiz+1
447 thed = min( (stt + thgrsiz -1), (n-1))
448 DO 110 i = stt, n-1
449 ed = min( i, thed )
450 IF( stt.GT.ed ) EXIT
451 DO 120 m = 1, stepercol
452 st = stt
453 DO 130 sweepid = st, ed
454 DO 140 k = 1, grsiz
455 myid = (i-sweepid)*(stepercol*grsiz)
456 $ + (m-1)*grsiz + k
457 IF ( myid.EQ.1 ) THEN
458 ttype = 1
459 ELSE
460 ttype = mod( myid, 2 ) + 2
461 ENDIF
462
463 IF( ttype.EQ.2 ) THEN
464 colpt = (myid/2)*kd + sweepid
465 stind = colpt-kd+1
466 edind = min(colpt,n)
467 blklastind = colpt
468 ELSE
469 colpt = ((myid+1)/2)*kd + sweepid
470 stind = colpt-kd+1
471 edind = min(colpt,n)
472 IF( ( stind.GE.edind-1 ).AND.
473 $ ( edind.EQ.n ) ) THEN
474 blklastind = n
475 ELSE
476 blklastind = 0
477 ENDIF
478 ENDIF
479*
480* Call the kernel
481*
482#if defined(_OPENMP) && _OPENMP >= 201307
483 IF( ttype.NE.1 ) THEN
484!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
485!$OMP$ DEPEND(in:WORK(MYID-1))
486!$OMP$ DEPEND(out:WORK(MYID))
487 tid = omp_get_thread_num()
488 CALL ssb2st_kernels( uplo, wantq, ttype,
489 $ stind, edind, sweepid, n, kd, ib,
490 $ work( inda ), lda,
491 $ hous( indv ), hous( indtau ), ldv,
492 $ work( indw + tid*kd ) )
493!$OMP END TASK
494 ELSE
495!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
496!$OMP$ DEPEND(out:WORK(MYID))
497 tid = omp_get_thread_num()
498 CALL ssb2st_kernels( uplo, wantq, ttype,
499 $ stind, edind, sweepid, n, kd, ib,
500 $ work( inda ), lda,
501 $ hous( indv ), hous( indtau ), ldv,
502 $ work( indw + tid*kd ) )
503!$OMP END TASK
504 ENDIF
505#else
506 CALL ssb2st_kernels( uplo, wantq, ttype,
507 $ stind, edind, sweepid, n, kd, ib,
508 $ work( inda ), lda,
509 $ hous( indv ), hous( indtau ), ldv,
510 $ work( indw ) )
511#endif
512 IF ( blklastind.GE.(n-1) ) THEN
513 stt = stt + 1
514 EXIT
515 ENDIF
516 140 CONTINUE
517 130 CONTINUE
518 120 CONTINUE
519 110 CONTINUE
520 100 CONTINUE
521*
522#if defined(_OPENMP)
523!$OMP END MASTER
524!$OMP END PARALLEL
525#endif
526*
527* Copy the diagonal from A to D. Note that D is REAL thus only
528* the Real part is needed, the imaginary part should be zero.
529*
530 DO 150 i = 1, n
531 d( i ) = ( work( dpos+(i-1)*lda ) )
532 150 CONTINUE
533*
534* Copy the off diagonal from A to E. Note that E is REAL thus only
535* the Real part is needed, the imaginary part should be zero.
536*
537 IF( upper ) THEN
538 DO 160 i = 1, n-1
539 e( i ) = ( work( ofdpos+i*lda ) )
540 160 CONTINUE
541 ELSE
542 DO 170 i = 1, n-1
543 e( i ) = ( work( ofdpos+(i-1)*lda ) )
544 170 CONTINUE
545 ENDIF
546*
547 hous( 1 ) = lhmin
548 work( 1 ) = sroundup_lwork(lwmin)
549 RETURN
550*
551* End of SSYTRD_SB2ST
552*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine ssb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
SSB2ST_KERNELS
integer function ilaenv2stage(ispec, name, opts, n1, n2, n3, n4)
ILAENV2STAGE
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
Here is the call graph for this function:
Here is the caller graph for this function: