d8/d37/ctrsyl3_8f_source.html

*> \brief \b CTRSYL3

*

* Definition:

* ===========

*

*

*>  \par Purpose

*  =============

*>

*> \verbatim

*>

*>  CTRSYL3 solves the complex Sylvester matrix equation:

*>

*>     op(A)*X + X*op(B) = scale*C or

*>     op(A)*X - X*op(B) = scale*C,

*>

*>  where op(A) = A or A**H, and  A and B are both upper triangular. A is

*>  M-by-M and B is N-by-N; the right hand side C and the solution X are

*>  M-by-N; and scale is an output scale factor, set <= 1 to avoid

*>  overflow in X.

*>

*>  This is the block version of the algorithm.

*> \endverbatim

*

*  Arguments

*  =========

*

*> \param[in] TRANA

*> \verbatim

*>          TRANA is CHARACTER*1

*>          Specifies the option op(A):

*>          = 'N': op(A) = A    (No transpose)

*>          = 'C': op(A) = A**H (Conjugate transpose)

*> \endverbatim

*>

*> \param[in] TRANB

*> \verbatim

*>          TRANB is CHARACTER*1

*>          Specifies the option op(B):

*>          = 'N': op(B) = B    (No transpose)

*>          = 'C': op(B) = B**H (Conjugate transpose)

*> \endverbatim

*>

*> \param[in] ISGN

*> \verbatim

*>          ISGN is INTEGER

*>          Specifies the sign in the equation:

*>          = +1: solve op(A)*X + X*op(B) = scale*C

*>          = -1: solve op(A)*X - X*op(B) = scale*C

*> \endverbatim

*>

*> \param[in] M

*> \verbatim

*>          M is INTEGER

*>          The order of the matrix A, and the number of rows in the

*>          matrices X and C. M >= 0.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The order of the matrix B, and the number of columns in the

*>          matrices X and C. N >= 0.

*> \endverbatim

*>

*> \param[in] A

*> \verbatim

*>          A is COMPLEX array, dimension (LDA,M)

*>          The upper triangular matrix A.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A. LDA >= max(1,M).

*> \endverbatim

*>

*> \param[in] B

*> \verbatim

*>          B is COMPLEX array, dimension (LDB,N)

*>          The upper triangular matrix B.

*> \endverbatim

*>

*> \param[in] LDB

*> \verbatim

*>          LDB is INTEGER

*>          The leading dimension of the array B. LDB >= max(1,N).

*> \endverbatim

*>

*> \param[in,out] C

*> \verbatim

*>          C is COMPLEX array, dimension (LDC,N)

*>          On entry, the M-by-N right hand side matrix C.

*>          On exit, C is overwritten by the solution matrix X.

*> \endverbatim

*>

*> \param[in] LDC

*> \verbatim

*>          LDC is INTEGER

*>          The leading dimension of the array C. LDC >= max(1,M)

*> \endverbatim

*>

*> \param[out] SCALE

*> \verbatim

*>          SCALE is REAL

*>          The scale factor, scale, set <= 1 to avoid overflow in X.

*> \endverbatim

*>

*> \param[out] SWORK

*> \verbatim

*>          SWORK is REAL array, dimension (MAX(2, ROWS), MAX(1,COLS)).

*>          On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS

*>          and SWORK(2) returns the optimal COLS.

*> \endverbatim

*>

*> \param[in] LDSWORK

*> \verbatim

*>          LDSWORK is INTEGER

*>          LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)

*>          and NB is the optimal block size.

*>

*>          If LDSWORK = -1, then a workspace query is assumed; the routine

*>          only calculates the optimal dimensions of the SWORK matrix,

*>          returns these values as the first and second entry of the SWORK

*>          matrix, and no error message related LWORK is issued by XERBLA.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0: successful exit

*>          < 0: if INFO = -i, the i-th argument had an illegal value

*>          = 1: A and B have common or very close eigenvalues; perturbed

*>               values were used to solve the equation (but the matrices

*>               A and B are unchanged).

*> \endverbatim

*

*> \ingroup trsyl3

*

*  =====================================================================

*  References:

*   E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of

*   algorithms: The triangular Sylvester equation, ACM Transactions

*   on Mathematical Software (TOMS), volume 29, pages 218--243.

*

*   A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel

*   Solution of the Triangular Sylvester Equation. Lecture Notes in

*   Computer Science, vol 12043, pages 82--92, Springer.

*

*  Contributor:

*   Angelika Schwarz, Umea University, Sweden.

*

*  =====================================================================

      SUBROUTINE ctrsyl3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,

     $                    LDC, SCALE, SWORK, LDSWORK, INFO )

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      CHARACTER          TRANA, TRANB

      INTEGER            INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N

      REAL               SCALE

*     ..

*     .. Array Arguments ..

      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )

      REAL               SWORK( LDSWORK, * )

*     ..

*     .. Parameters ..

      REAL               ZERO, ONE

      parameter( zero = 0.0e+0, one = 1.0e+0 )

      COMPLEX            CONE

      parameter( cone = ( 1.0e+0, 0.0e+0 ) )

*     ..

*     .. Local Scalars ..

      LOGICAL            NOTRNA, NOTRNB, LQUERY

      INTEGER            AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,

     $                   k, k1, k2, l, l1, l2, ll, nba, nb, nbb

      REAL               ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,

     $                   scamin, sgn, xnrm, buf, smlnum

      COMPLEX            CSGN

*     ..

*     .. Local Arrays ..

      REAL               WNRM( MAX( M, N ) )

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ILAENV

      REAL               CLANGE, SLAMCH, SLARMM

      EXTERNAL           clange, ilaenv, lsame, slamch, slarmm

*     ..

*     .. External Subroutines ..

      EXTERNAL           csscal, cgemm, clascl, ctrsyl, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, aimag, exponent, max, min, real

*     ..

*     .. Executable Statements ..

*

*     Decode and Test input parameters

*

      notrna = lsame( trana, 'N' )

      notrnb = lsame( tranb, 'N' )

*

*     Use the same block size for all matrices.

*

      nb = max( 8, ilaenv( 1, 'CTRSYL', '', m, n, -1, -1) )

*

*     Compute number of blocks in A and B

*

      nba = max( 1, (m + nb - 1) / nb )

      nbb = max( 1, (n + nb - 1) / nb )

*

*     Compute workspace

*

      info = 0

      lquery = ( ldswork.EQ.-1 )

      IF( lquery ) THEN

         ldswork = 2

         swork(1,1) = max( nba, nbb )

         swork(2,1) = 2 * nbb + nba

      END IF

*

*     Test the input arguments

*

      IF( .NOT.notrna .AND. .NOT. lsame( trana, 'C' ) ) THEN

         info = -1

      ELSE IF( .NOT.notrnb .AND. .NOT. lsame( tranb, 'C' ) ) THEN

         info = -2

      ELSE IF( isgn.NE.1 .AND. isgn.NE.-1 ) THEN

         info = -3

      ELSE IF( m.LT.0 ) THEN

         info = -4

      ELSE IF( n.LT.0 ) THEN

         info = -5

      ELSE IF( lda.LT.max( 1, m ) ) THEN

         info = -7

      ELSE IF( ldb.LT.max( 1, n ) ) THEN

         info = -9

      ELSE IF( ldc.LT.max( 1, m ) ) THEN

         info = -11

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'CTRSYL3', -info )

         RETURN

      ELSE IF( lquery ) THEN

         RETURN

      END IF

*

*     Quick return if possible

*

      scale = one

      IF( m.EQ.0 .OR. n.EQ.0 )

     $   RETURN

*

*     Use unblocked code for small problems or if insufficient

*     workspace is provided

*

      IF( min( nba, nbb ).EQ.1 .OR. ldswork.LT.max( nba, nbb ) ) THEN

        CALL ctrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb,

     $               c, ldc, scale, info )

        RETURN

      END IF

*

*     Set constants to control overflow

*

      smlnum = slamch( 'S' )

      bignum = one / smlnum

*

*     Set local scaling factors.

*

      DO l = 1, nbb

         DO k = 1, nba

            swork( k, l ) = one

         END DO

      END DO

*

*     Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.

*     This scaling is to ensure compatibility with TRSYL and may get flushed.

*

      buf = one

*

*      Compute upper bounds of blocks of A and B

*

      awrk = nbb

      DO k = 1, nba

         k1 = (k - 1) * nb + 1

         k2 = min( k * nb, m ) + 1

         DO l = k, nba

            l1 = (l - 1) * nb + 1

            l2 = min( l * nb, m ) + 1

            IF( notrna ) THEN

               swork( k, awrk + l ) = clange( 'I', k2-k1, l2-l1,

     $                                        a( k1, l1 ), lda, wnrm )

            ELSE

               swork( l, awrk + k ) = clange( '1', k2-k1, l2-l1,

     $                                        a( k1, l1 ), lda, wnrm )

            END IF

         END DO

      END DO

      bwrk = nbb + nba

      DO k = 1, nbb

         k1 = (k - 1) * nb + 1

         k2 = min( k * nb, n ) + 1

         DO l = k, nbb

            l1 = (l - 1) * nb + 1

            l2 = min( l * nb, n ) + 1

            IF( notrnb ) THEN

               swork( k, bwrk + l ) = clange( 'I', k2-k1, l2-l1,

     $                                        b( k1, l1 ), ldb, wnrm )

            ELSE

               swork( l, bwrk + k ) = clange( '1', k2-k1, l2-l1,

     $                                        b( k1, l1 ), ldb, wnrm )

            END IF

         END DO

      END DO

*

      sgn = real( isgn )

      csgn = cmplx( sgn, zero )

*

      IF( notrna .AND. notrnb ) THEN

*

*        Solve    A*X + ISGN*X*B = scale*C.

*

*        The (K,L)th block of X is determined starting from

*        bottom-left corner column by column by

*

*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)

*

*        Where

*                  M                         L-1

*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].

*                I=K+1                       J=1

*

*        Start loop over block rows (index = K) and block columns (index = L)

*

         DO k = nba, 1, -1

*

*           K1: row index of the first row in X( K, L )

*           K2: row index of the first row in X( K+1, L )

*           so the K2 - K1 is the column count of the block X( K, L )

*

            k1 = (k - 1) * nb + 1

            k2 = min( k * nb, m ) + 1

            DO l = 1, nbb

*

*              L1: column index of the first column in X( K, L )

*              L2: column index of the first column in X( K, L + 1)

*              so that L2 - L1 is the row count of the block X( K, L )

*

               l1 = (l - 1) * nb + 1

               l2 = min( l * nb, n ) + 1

*

               CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,

     $                      a( k1, k1 ), lda,

     $                      b( l1, l1 ), ldb,

     $                      c( k1, l1 ), ldc, scaloc, iinfo )

               info = max( info, iinfo )

*

               IF( scaloc * swork( k, l ) .EQ. zero ) THEN

                  IF( scaloc .EQ. zero ) THEN

*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)

*                    is larger than the product of BIGNUM**2 and cannot be

*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).

*                    Mark the computation as pointless.

                     buf = zero

                  ELSE

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                  END IF

                  DO jj = 1, nbb

                     DO ll = 1, nba

*                       Bound by BIGNUM to not introduce Inf. The value

*                       is irrelevant; corresponding entries of the

*                       solution will be flushed in consistency scaling.

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                     END DO

                  END DO

               END IF

               swork( k, l ) = scaloc * swork( k, l )

               xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,

     $                        wnrm )

*

               DO i = k - 1, 1, -1

*

*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )

*

                  i1 = (i - 1) * nb + 1

                  i2 = min( i * nb, m ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( i, l ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( i, l ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  anrm = swork( i, awrk + k )

                  scaloc = slarmm( anrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to C( I, L ) and C( K, L ).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal.NE.one ) THEN

                      DO jj = l1, l2-1

                         CALL csscal( k2-k1, scal, c( k1, jj ), 1)

                      END DO

                  ENDIF

*

                  scal = ( scamin / swork( i, l ) ) * scaloc

                  IF( scal.NE.one ) THEN

                      DO ll = l1, l2-1

                         CALL csscal( i2-i1, scal, c( i1, ll ), 1)

                      END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( i, l ) = scamin * scaloc

*

                  CALL cgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,

     $                        a( i1, k1 ), lda, c( k1, l1 ), ldc,

     $                        cone, c( i1, l1 ), ldc )

*

               END DO

*

               DO j = l + 1, nbb

*

*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )

*

                  j1 = (j - 1) * nb + 1

                  j2 = min( j * nb, n ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( k, j ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( k, j ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  bnrm = swork(l, bwrk + j)

                  scaloc = slarmm( bnrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to C( K, J ) and C( K, L).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( k2-k1, scal, c( k1, ll ), 1 )

                     END DO

                  ENDIF

*

                  scal = ( scamin / swork( k, j ) ) * scaloc

                  IF( scal .NE. one ) THEN

                      DO jj = j1, j2-1

                         CALL csscal( k2-k1, scal, c( k1, jj ), 1 )

                      END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( k, j ) = scamin * scaloc

*

                  CALL cgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,

     $                        c( k1, l1 ), ldc, b( l1, j1 ), ldb,

     $                        cone, c( k1, j1 ), ldc )

               END DO

            END DO

         END DO

      ELSE IF( .NOT.notrna .AND. notrnb ) THEN

*

*        Solve    A**H *X + ISGN*X*B = scale*C.

*

*        The (K,L)th block of X is determined starting from

*        upper-left corner column by column by

*

*          A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)

*

*        Where

*                   K-1                        L-1

*          R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]

*                   I=1                        J=1

*

*        Start loop over block rows (index = K) and block columns (index = L)

*

         DO k = 1, nba

*

*           K1: row index of the first row in X( K, L )

*           K2: row index of the first row in X( K+1, L )

*           so the K2 - K1 is the column count of the block X( K, L )

*

            k1 = (k - 1) * nb + 1

            k2 = min( k * nb, m ) + 1

            DO l = 1, nbb

*

*              L1: column index of the first column in X( K, L )

*              L2: column index of the first column in X( K, L + 1)

*              so that L2 - L1 is the row count of the block X( K, L )

*

               l1 = (l - 1) * nb + 1

               l2 = min( l * nb, n ) + 1

*

               CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,

     $                      a( k1, k1 ), lda,

     $                      b( l1, l1 ), ldb,

     $                      c( k1, l1 ), ldc, scaloc, iinfo )

               info = max( info, iinfo )

*

               IF( scaloc * swork( k, l ) .EQ. zero ) THEN

                  IF( scaloc .EQ. zero ) THEN

*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)

*                    is larger than the product of BIGNUM**2 and cannot be

*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).

*                    Mark the computation as pointless.

                     buf = zero

                  ELSE

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                  END IF

                  DO jj = 1, nbb

                     DO ll = 1, nba

*                       Bound by BIGNUM to not introduce Inf. The value

*                       is irrelevant; corresponding entries of the

*                       solution will be flushed in consistency scaling.

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                     END DO

                  END DO

               END IF

               swork( k, l ) = scaloc * swork( k, l )

               xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,

     $                        wnrm )

*

               DO i = k + 1, nba

*

*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )

*

                  i1 = (i - 1) * nb + 1

                  i2 = min( i * nb, m ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( i, l ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( i, l ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  anrm = swork( i, awrk + k )

                  scaloc = slarmm( anrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to to C( I, L ) and C( K, L).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( k2-k1, scal, c( k1, ll ), 1 )

                     END DO

                  ENDIF

*

                  scal = ( scamin / swork( i, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( i2-i1, scal, c( i1, ll ), 1 )

                     END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( i, l ) = scamin * scaloc

*

                  CALL cgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,

     $                        a( k1, i1 ), lda, c( k1, l1 ), ldc,

     $                        cone, c( i1, l1 ), ldc )

               END DO

*

               DO j = l + 1, nbb

*

*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )

*

                  j1 = (j - 1) * nb + 1

                  j2 = min( j * nb, n ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( k, j ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( k, j ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  bnrm = swork( l, bwrk + j )

                  scaloc = slarmm( bnrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to to C( K, J ) and C( K, L).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                      DO ll = l1, l2-1

                         CALL csscal( k2-k1, scal, c( k1, ll ), 1 )

                      END DO

                  ENDIF

*

                  scal = ( scamin / swork( k, j ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO jj = j1, j2-1

                        CALL csscal( k2-k1, scal, c( k1, jj ), 1 )

                     END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( k, j ) = scamin * scaloc

*

                  CALL cgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,

     $                        c( k1, l1 ), ldc, b( l1, j1 ), ldb,

     $                        cone, c( k1, j1 ), ldc )

               END DO

            END DO

         END DO

      ELSE IF( .NOT.notrna .AND. .NOT.notrnb ) THEN

*

*        Solve    A**H *X + ISGN*X*B**H = scale*C.

*

*        The (K,L)th block of X is determined starting from

*        top-right corner column by column by

*

*           A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)

*

*        Where

*                     K-1                          N

*            R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].

*                     I=1                        J=L+1

*

*        Start loop over block rows (index = K) and block columns (index = L)

*

         DO k = 1, nba

*

*           K1: row index of the first row in X( K, L )

*           K2: row index of the first row in X( K+1, L )

*           so the K2 - K1 is the column count of the block X( K, L )

*

            k1 = (k - 1) * nb + 1

            k2 = min( k * nb, m ) + 1

            DO l = nbb, 1, -1

*

*              L1: column index of the first column in X( K, L )

*              L2: column index of the first column in X( K, L + 1)

*              so that L2 - L1 is the row count of the block X( K, L )

*

               l1 = (l - 1) * nb + 1

               l2 = min( l * nb, n ) + 1

*

               CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,

     $                      a( k1, k1 ), lda,

     $                      b( l1, l1 ), ldb,

     $                      c( k1, l1 ), ldc, scaloc, iinfo )

               info = max( info, iinfo )

*

               IF( scaloc * swork( k, l ) .EQ. zero ) THEN

                  IF( scaloc .EQ. zero ) THEN

*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)

*                    is larger than the product of BIGNUM**2 and cannot be

*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).

*                    Mark the computation as pointless.

                     buf = zero

                  ELSE

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                  END IF

                  DO jj = 1, nbb

                     DO ll = 1, nba

*                       Bound by BIGNUM to not introduce Inf. The value

*                       is irrelevant; corresponding entries of the

*                       solution will be flushed in consistency scaling.

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                     END DO

                  END DO

               END IF

               swork( k, l ) = scaloc * swork( k, l )

               xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,

     $                        wnrm )

*

               DO i = k + 1, nba

*

*                 C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )

*

                  i1 = (i - 1) * nb + 1

                  i2 = min( i * nb, m ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( i, l ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( i, l ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  anrm = swork( i, awrk + k )

                  scaloc = slarmm( anrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to C( I, L ) and C( K, L).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( k2-k1, scal, c( k1, ll ), 1 )

                     END DO

                  ENDIF

*

                  scal = ( scamin / swork( i, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( i2-i1, scal, c( i1, ll ), 1 )

                     END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( i, l ) = scamin * scaloc

*

                  CALL cgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,

     $                        a( k1, i1 ), lda, c( k1, l1 ), ldc,

     $                        cone, c( i1, l1 ), ldc )

               END DO

*

               DO j = 1, l - 1

*

*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H

*

                  j1 = (j - 1) * nb + 1

                  j2 = min( j * nb, n ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( k, j ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( k, j ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  bnrm = swork( l, bwrk + j )

                  scaloc = slarmm( bnrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to C( K, J ) and C( K, L).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( k2-k1, scal, c( k1, ll ), 1)

                     END DO

                  ENDIF

*

                  scal = ( scamin / swork( k, j ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO jj = j1, j2-1

                        CALL csscal( k2-k1, scal, c( k1, jj ), 1 )

                     END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( k, j ) = scamin * scaloc

*

                  CALL cgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,

     $                        c( k1, l1 ), ldc, b( j1, l1 ), ldb,

     $                        cone, c( k1, j1 ), ldc )

               END DO

            END DO

         END DO

      ELSE IF( notrna .AND. .NOT.notrnb ) THEN

*

*        Solve    A*X + ISGN*X*B**H = scale*C.

*

*        The (K,L)th block of X is determined starting from

*        bottom-right corner column by column by

*

*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)

*

*        Where

*                      M                          N

*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].

*                    I=K+1                      J=L+1

*

*        Start loop over block rows (index = K) and block columns (index = L)

*

         DO k = nba, 1, -1

*

*           K1: row index of the first row in X( K, L )

*           K2: row index of the first row in X( K+1, L )

*           so the K2 - K1 is the column count of the block X( K, L )

*

            k1 = (k - 1) * nb + 1

            k2 = min( k * nb, m ) + 1

            DO l = nbb, 1, -1

*

*              L1: column index of the first column in X( K, L )

*              L2: column index of the first column in X( K, L + 1)

*              so that L2 - L1 is the row count of the block X( K, L )

*

               l1 = (l - 1) * nb + 1

               l2 = min( l * nb, n ) + 1

*

               CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,

     $                      a( k1, k1 ), lda,

     $                      b( l1, l1 ), ldb,

     $                      c( k1, l1 ), ldc, scaloc, iinfo )

               info = max( info, iinfo )

*

               IF( scaloc * swork( k, l ) .EQ. zero ) THEN

                  IF( scaloc .EQ. zero ) THEN

*                    The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)

*                    is larger than the product of BIGNUM**2 and cannot be

*                    represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).

*                    Mark the computation as pointless.

                     buf = zero

                  ELSE

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                  END IF

                  DO jj = 1, nbb

                     DO ll = 1, nba

*                       Bound by BIGNUM to not introduce Inf. The value

*                       is irrelevant; corresponding entries of the

*                       solution will be flushed in consistency scaling.

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                     END DO

                  END DO

               END IF

               swork( k, l ) = scaloc * swork( k, l )

               xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,

     $                        wnrm )

*

               DO i = 1, k - 1

*

*                 C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )

*

                  i1 = (i - 1) * nb + 1

                  i2 = min( i * nb, m ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( i, l ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( i, l ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  anrm = swork( i, awrk + k )

                  scaloc = slarmm( anrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to C( I, L ) and C( K, L).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( k2-k1, scal, c( k1, ll ), 1 )

                     END DO

                  ENDIF

*

                  scal = ( scamin / swork( i, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO ll = l1, l2-1

                        CALL csscal( i2-i1, scal, c( i1, ll ), 1 )

                     END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( i, l ) = scamin * scaloc

*

                  CALL cgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,

     $                        a( i1, k1 ), lda, c( k1, l1 ), ldc,

     $                        cone, c( i1, l1 ), ldc )

*

               END DO

*

               DO j = 1, l - 1

*

*                 C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H

*

                  j1 = (j - 1) * nb + 1

                  j2 = min( j * nb, n ) + 1

*

*                 Compute scaling factor to survive the linear update

*                 simulating consistent scaling.

*

                  cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),

     $                           ldc, wnrm )

                  scamin = min( swork( k, j ), swork( k, l ) )

                  cnrm = cnrm * ( scamin / swork( k, j ) )

                  xnrm = xnrm * ( scamin / swork( k, l ) )

                  bnrm = swork( l, bwrk + j )

                  scaloc = slarmm( bnrm, xnrm, cnrm )

                  IF( scaloc * scamin .EQ. zero ) THEN

*                    Use second scaling factor to prevent flushing to zero.

                     buf = buf*2.e0**exponent( scaloc )

                     DO jj = 1, nbb

                        DO ll = 1, nba

                        swork( ll, jj ) = min( bignum,

     $                     swork( ll, jj ) / 2.e0**exponent( scaloc ) )

                        END DO

                     END DO

                     scamin = scamin / 2.e0**exponent( scaloc )

                     scaloc = scaloc / 2.e0**exponent( scaloc )

                  END IF

                  cnrm = cnrm * scaloc

                  xnrm = xnrm * scaloc

*

*                 Simultaneously apply the robust update factor and the

*                 consistency scaling factor to C( K, J ) and C( K, L).

*

                  scal = ( scamin / swork( k, l ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO jj = l1, l2-1

                        CALL csscal( k2-k1, scal, c( k1, jj ), 1 )

                     END DO

                  ENDIF

*

                  scal = ( scamin / swork( k, j ) ) * scaloc

                  IF( scal .NE. one ) THEN

                     DO jj = j1, j2-1

                        CALL csscal( k2-k1, scal, c( k1, jj ), 1 )

                     END DO

                  ENDIF

*

*                 Record current scaling factor

*

                  swork( k, l ) = scamin * scaloc

                  swork( k, j ) = scamin * scaloc

*

                  CALL cgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,

     $                        c( k1, l1 ), ldc, b( j1, l1 ), ldb,

     $                        cone, c( k1, j1 ), ldc )

               END DO

            END DO

         END DO

*

      END IF

*

*     Reduce local scaling factors

*

      scale = swork( 1, 1 )

      DO k = 1, nba

         DO l = 1, nbb

            scale = min( scale, swork( k, l ) )

         END DO

      END DO

      IF( scale .EQ. zero ) THEN

*

*        The magnitude of the largest entry of the solution is larger

*        than the product of BIGNUM**2 and cannot be represented in the

*        form (1/SCALE)*X if SCALE is REAL. Set SCALE to

*        zero and give up.

*

         swork(1,1) = max( nba, nbb )

         swork(2,1) = 2 * nbb + nba

         RETURN

      END IF

*

*     Realize consistent scaling

*

      DO k = 1, nba

         k1 = (k - 1) * nb + 1

         k2 = min( k * nb, m ) + 1

         DO l = 1, nbb

            l1 = (l - 1) * nb + 1

            l2 = min( l * nb, n ) + 1

            scal = scale / swork( k, l )

            IF( scal .NE. one ) THEN

               DO ll = l1, l2-1

                  CALL csscal( k2-k1, scal, c( k1, ll ), 1 )

               END DO

            ENDIF

         END DO

      END DO

*

      IF( buf .NE. one .AND. buf.GT.zero ) THEN

*

*        Decrease SCALE as much as possible.

*

         scaloc = min( scale / smlnum, one / buf )

         buf = buf * scaloc

         scale = scale / scaloc

      END IF

*

      IF( buf.NE.one .AND. buf.GT.zero ) THEN

*

*        In case of overly aggressive scaling during the computation,

*        flushing of the global scale factor may be prevented by

*        undoing some of the scaling. This step is to ensure that

*        this routine flushes only scale factors that TRSYL also

*        flushes and be usable as a drop-in replacement.

*

*        How much can the normwise largest entry be upscaled?

*

         scal = max( abs( real( c( 1, 1 ) ) ),

     $               abs( aimag( c( 1, 1 ) ) ) )

         DO k = 1, m

            DO l = 1, n

               scal = max( scal, abs( real( c( k, l ) ) ),

     $                     abs( aimag( c( k, l ) ) ) )

            END DO

         END DO

*

*        Increase BUF as close to 1 as possible and apply scaling.

*

         scaloc = min( bignum / scal, one / buf )

         buf = buf * scaloc

         CALL clascl( 'G', -1, -1, one, scaloc, m, n, c, ldc, iinfo )

      END IF

*

*     Combine with buffer scaling factor. SCALE will be flushed if

*     BUF is less than one here.

*

      scale = scale * buf

*

*     Restore workspace dimensions

*

      swork(1,1) = max( nba, nbb )

      swork(2,1) = 2 * nbb + nba

*

      RETURN

*

*     End of CTRSYL3

*

      END

xerbla
subroutine xerbla(srname, info)
Definition cblat2.f:3285

cgemm
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188

clascl
subroutine clascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition clascl.f:143

csscal
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78

ctrsyl3
subroutine ctrsyl3(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, swork, ldswork, info)
CTRSYL3
Definition ctrsyl3.f:156

ctrsyl
subroutine ctrsyl(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, info)
CTRSYL
Definition ctrsyl.f:157