da/ddd/pzlahrd_8f_source.html

      SUBROUTINE pzlahrd( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,

     $                    DESCY, WORK )

*

*  -- ScaLAPACK auxiliary routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 1, 1997

*

*     .. Scalar Arguments ..

      INTEGER             IA, IY, JA, JY, K, N, NB

*     ..

*     .. Array Arguments ..

      INTEGER             DESCA( * ), DESCY( * )

      COMPLEX*16          A( * ), T( * ), TAU( * ), WORK( * ), Y( * )

*     ..

*

*  Purpose

*  =======

*

*  PZLAHRD reduces the first NB columns of a complex general

*  N-by-(N-K+1) distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that

*  elements below the k-th subdiagonal are zero. The reduction is

*  performed by an unitary similarity transformation Q' * A * Q. The

*  routine returns the matrices V and T which determine Q as a block

*  reflector I - V*T*V', and also the matrix Y = A * V * T.

*

*  This is an auxiliary routine called by PZGEHRD. In the following

*  comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).

*

*  Arguments

*  =========

*

*  N       (global input) INTEGER

*          The number of rows and columns to be operated on, i.e. the

*          order of the distributed submatrix sub( A ).

*          N >= 0.

*

*  K       (global input) INTEGER

*          The offset for the reduction. Elements below the k-th

*          subdiagonal in the first NB columns are reduced to zero.

*

*  NB      (global input) INTEGER

*          The number of columns to be reduced.

*

*  A       (local input/local output) COMPLEX*16 pointer into

*          the local memory to an array of dimension (LLD_A,

*          LOCc(JA+N-K)). On entry, this array contains the the local

*          pieces of the N-by-(N-K+1) general distributed matrix

*          A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above

*          the k-th subdiagonal in the first NB columns are overwritten

*          with the corresponding elements of the reduced distributed

*          matrix; the elements below the k-th subdiagonal, with the

*          array TAU, represent the matrix Q as a product of elementary

*          reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are

*          unchanged. See Further Details.

*

*  IA      (global input) INTEGER

*          The row index in the global array A indicating the first

*          row of sub( A ).

*

*  JA      (global input) INTEGER

*          The column index in the global array A indicating the

*          first column of sub( A ).

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  TAU     (local output) COMPLEX*16 array, dimension LOCc(JA+N-2)

*          The scalar factors of the elementary reflectors (see Further

*          Details). TAU is tied to the distributed matrix A.

*

*  T       (local output) COMPLEX*16 array, dimension (NB_A,NB_A)

*          The upper triangular matrix T.

*

*  Y       (local output) COMPLEX*16 pointer into the local memory

*          to an array of dimension (LLD_Y,NB_A). On exit, this array

*          contains the local pieces of the N-by-NB distributed

*          matrix Y. LLD_Y >= LOCr(IA+N-1).

*

*  IY      (global input) INTEGER

*          The row index in the global array Y indicating the first

*          row of sub( Y ).

*

*  JY      (global input) INTEGER

*          The column index in the global array Y indicating the

*          first column of sub( Y ).

*

*  DESCY   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Y.

*

*  WORK    (local workspace) COMPLEX*16 array, dimension (NB)

*

*  Further Details

*  ===============

*

*  The matrix Q is represented as a product of nb elementary reflectors

*

*     Q = H(1) H(2) . . . H(nb).

*

*  Each H(i) has the form

*

*     H(i) = I - tau * v * v'

*

*  where tau is a complex scalar, and v is a complex vector with

*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in

*  A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).

*

*  The elements of the vectors v together form the (n-k+1)-by-nb matrix

*  V which is needed, with T and Y, to apply the transformation to the

*  unreduced part of the matrix, using an update of the form:

*  A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').

*

*  The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the

*  following example with n = 7, k = 3 and nb = 2:

*

*     ( a   h   a   a   a )

*     ( a   h   a   a   a )

*     ( a   h   a   a   a )

*     ( h   h   a   a   a )

*     ( v1  h   a   a   a )

*     ( v1  v2  a   a   a )

*     ( v1  v2  a   a   a )

*

*  where a denotes an element of the original matrix

*  A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper

*  Hessenberg matrix H, and vi denotes an element of the vector

*  defining H(i).

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      COMPLEX*16         ONE, ZERO

      parameter( one = ( 1.0d+0, 0.0d+0 ),

     $                   zero = ( 0.0d+0, 0.0d+0 ) )

*     ..

*     .. Local Scalars ..

      LOGICAL            IPROC

      INTEGER            I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,

     $                   jt, jw, l, myrow, mycol, npcol, nprow, nq

      COMPLEX*16         EI

*     ..

*     .. Local Arrays ..

      INTEGER            DESCW( DLEN_ )

*     ..

*     .. External Functions ..

      INTEGER            NUMROC

      EXTERNAL           numroc

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, descset, infog2l, pzelset,

     $                   pzgemv, pzlacgv, pzlarfg, pzscal,

     $                   zaxpy, zcopy, zscal, ztrmv

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min, mod

*     ..

*     .. Executable Statements ..

*

*     Quick return if possible

*

      IF( n.LE.1 )

     $   RETURN

*

      ictxt = desca( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

      ioff = mod( ja-1, desca( nb_ ) )

      CALL infog2l( ia+k, ja, desca, nprow, npcol, myrow, mycol, ii,

     $              jj, iarow, iacol )

*

      iproc = ( myrow.EQ.iarow .AND. mycol.EQ.iacol )

      nq = numroc( n+ja-1, desca( nb_ ), mycol, iacol, npcol )

      IF( mycol.EQ.iacol )

     $   nq = nq - ioff

*

      ei = zero


      jw = ioff + 1

      CALL descset( descw, 1, desca( mb_ ), 1, desca( mb_ ), iarow,

     $              iacol, ictxt, 1 )

*

      DO 10 l = 1, nb

         i = ia + k + l - 2

         j = ja + l - 1

*

         IF( l.GT.1 ) THEN

*

*           Update A(ia:ia+n-1,j)

*

*           Compute i-th column of A - Y * V'

*

            CALL pzlacgv( l-1, a, i, ja, desca, desca( m_ ) )

            CALL pzgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,

     $                   a, i, ja, desca, desca( m_ ), one, a, ia, j,

     $                   desca, 1 )

            CALL pzlacgv( l-1, a, i, ja, desca, desca( m_ ) )

*

*           Apply I - V * T' * V' to this column (call it b) from the

*           left, using the last column of T as workspace

*

*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)

*                    ( V2 )             ( b2 )

*

*           where V1 is unit lower triangular

*

*           w := V1' * b1

*

            IF( iproc ) THEN

               CALL zcopy( l-1, a( (jj+l-2)*desca( lld_ )+ii ), 1,

     $                     work( jw ), 1 )

               CALL ztrmv( 'Lower', 'Conjugate transpose', 'Unit', l-1,

     $                     a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),

     $                     work( jw ), 1 )

            END IF

*

*           w := w + V2'*b2

*

            CALL pzgemv( 'Conjugate transpose', n-k-l+1, l-1, one, a,

     $                   i+1, ja, desca, a, i+1, j, desca, 1, one, work,

     $                   1, jw, descw, descw( m_ ) )

*

*           w := T'*w

*

            IF( iproc )

     $         CALL ztrmv( 'Upper', 'Conjugate transpose', 'Non-unit',

     $                     l-1, t, desca( nb_ ), work( jw ), 1 )

*

*           b2 := b2 - V2*w

*

            CALL pzgemv( 'No transpose', n-k-l+1, l-1, -one, a, i+1, ja,

     $                   desca, work, 1, jw, descw, descw( m_ ), one,

     $                   a, i+1, j, desca, 1 )

*

*           b1 := b1 - V1*w

*

            IF( iproc ) THEN

               CALL ztrmv( 'Lower', 'No transpose', 'Unit', l-1,

     $                     a( (jj-1)*desca( lld_ )+ii ), desca( lld_ ),

     $                     work( jw ), 1 )

               CALL zaxpy( l-1, -one, work( jw ), 1,

     $                     a( ( jj+l-2 )*desca( lld_ )+ii ), 1 )

            END IF

            CALL pzelset( a, i, j-1, desca, ei )

         END IF

*

*        Generate the elementary reflector H(i) to annihilate

*        A(ia+k+i:ia+n-1,j)

*

         CALL pzlarfg( n-k-l+1, ei, i+1, j, a, min( i+2, n+ia-1 ), j,

     $                 desca, 1, tau )

         CALL pzelset( a, i+1, j, desca, one )

*

*        Compute  Y(iy:y+n-1,jy+l-1)

*

         CALL pzgemv( 'No transpose', n, n-k-l+1, one, a, ia, j+1,

     $                desca, a, i+1, j, desca, 1, zero, y, iy, jy+l-1,

     $                descy, 1 )

         CALL pzgemv( 'Conjugate transpose', n-k-l+1, l-1, one, a, i+1,

     $                ja, desca, a, i+1, j, desca, 1, zero, work, 1, jw,

     $                descw, descw( m_ ) )

         CALL pzgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,

     $                work, 1, jw, descw, descw( m_ ), one, y, iy,

     $                jy+l-1, descy, 1 )

         jl = min( jj+l-1, ja+nq-1 )

         CALL pzscal( n, tau( jl ), y, iy, jy+l-1, descy, 1 )

*

*        Compute T(1:i,i)

*

         IF( iproc ) THEN

            jt = ( l-1 ) * desca( nb_ )

            CALL zscal( l-1, -tau( jl ), work( jw ), 1 )

            CALL zcopy( l-1, work( jw ), 1, t( jt+1 ), 1 )

            CALL ztrmv( 'Upper', 'No transpose', 'Non-unit', l-1, t,

     $                  desca( nb_ ), t( jt+1 ), 1 )

            t( jt+l ) = tau( jl )

         END IF

   10 CONTINUE

*

      CALL pzelset( a, k+nb+ia-1, j, desca, ei )

*

      RETURN

*

*     End of PZLAHRD

*

      END