ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pzgehrd.f
Go to the documentation of this file.
00001       SUBROUTINE PZGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK,
00002      $                    LWORK, INFO )
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 25, 2001
00008 *
00009 *     .. Scalar Arguments ..
00010       INTEGER             IA, IHI, ILO, INFO, JA, LWORK, N
00011 *     ..
00012 *     .. Array Arguments ..
00013       INTEGER             DESCA( * )
00014       COMPLEX*16          A( * ), TAU( * ), WORK( * )
00015 *     ..
00016 *
00017 *  Purpose
00018 *  =======
00019 *
00020 *  PZGEHRD reduces a complex general distributed matrix sub( A )
00021 *  to upper Hessenberg form H by an unitary similarity transformation:
00022 *  Q' * sub( A ) * Q = H, where
00023 *  sub( A ) = A(IA+N-1:IA+N-1,JA+N-1:JA+N-1).
00024 *
00025 *  Notes
00026 *  =====
00027 *
00028 *  Each global data object is described by an associated description
00029 *  vector.  This vector stores the information required to establish
00030 *  the mapping between an object element and its corresponding process
00031 *  and memory location.
00032 *
00033 *  Let A be a generic term for any 2D block cyclicly distributed array.
00034 *  Such a global array has an associated description vector DESCA.
00035 *  In the following comments, the character _ should be read as
00036 *  "of the global array".
00037 *
00038 *  NOTATION        STORED IN      EXPLANATION
00039 *  --------------- -------------- --------------------------------------
00040 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00041 *                                 DTYPE_A = 1.
00042 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00043 *                                 the BLACS process grid A is distribu-
00044 *                                 ted over. The context itself is glo-
00045 *                                 bal, but the handle (the integer
00046 *                                 value) may vary.
00047 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00048 *                                 array A.
00049 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00050 *                                 array A.
00051 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00052 *                                 the rows of the array.
00053 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00054 *                                 the columns of the array.
00055 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00056 *                                 row of the array A is distributed.
00057 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00058 *                                 first column of the array A is
00059 *                                 distributed.
00060 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00061 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00062 *
00063 *  Let K be the number of rows or columns of a distributed matrix,
00064 *  and assume that its process grid has dimension p x q.
00065 *  LOCr( K ) denotes the number of elements of K that a process
00066 *  would receive if K were distributed over the p processes of its
00067 *  process column.
00068 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00069 *  process would receive if K were distributed over the q processes of
00070 *  its process row.
00071 *  The values of LOCr() and LOCc() may be determined via a call to the
00072 *  ScaLAPACK tool function, NUMROC:
00073 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00074 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00075 *  An upper bound for these quantities may be computed by:
00076 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00077 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00078 *
00079 *  Arguments
00080 *  =========
00081 *
00082 *  N       (global input) INTEGER
00083 *          The number of rows and columns to be operated on, i.e. the
00084 *          order of the distributed submatrix sub( A ). N >= 0.
00085 *
00086 *  ILO     (global input) INTEGER
00087 *  IHI     (global input) INTEGER
00088 *          It is assumed that sub( A ) is already upper triangular in
00089 *          rows IA:IA+ILO-2 and IA+IHI:IA+N-1 and columns JA:JA+ILO-2
00090 *          and JA+IHI:JA+N-1. See Further Details. If N > 0,
00091 *          1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N.
00092 *
00093 *  A       (local input/local output) COMPLEX*16 pointer into the
00094 *          local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
00095 *          On entry, this array contains the local pieces of the N-by-N
00096 *          general distributed matrix sub( A ) to be reduced. On exit,
00097 *          the upper triangle and the first subdiagonal of sub( A ) are
00098 *          overwritten with the upper Hessenberg matrix H, and the ele-
00099 *          ments below the first subdiagonal, with the array TAU, repre-
00100 *          sent the unitary matrix Q as a product of elementary
00101 *          reflectors. See Further Details.
00102 *
00103 *  IA      (global input) INTEGER
00104 *          The row index in the global array A indicating the first
00105 *          row of sub( A ).
00106 *
00107 *  JA      (global input) INTEGER
00108 *          The column index in the global array A indicating the
00109 *          first column of sub( A ).
00110 *
00111 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00112 *          The array descriptor for the distributed matrix A.
00113 *
00114 *  TAU     (local output) COMPLEX*16 array, dimension LOCc(JA+N-2)
00115 *          The scalar factors of the elementary reflectors (see Further
00116 *          Details). Elements JA:JA+ILO-2 and JA+IHI:JA+N-2 of TAU are
00117 *          set to zero. TAU is tied to the distributed matrix A.
00118 *
00119 *  WORK    (local workspace/local output) COMPLEX*16 array,
00120 *                                                    dimension (LWORK)
00121 *          On exit, WORK( 1 ) returns the minimal and optimal LWORK.
00122 *
00123 *  LWORK   (local or global input) INTEGER
00124 *          The dimension of the array WORK.
00125 *          LWORK is local input and must be at least
00126 *          LWORK >= NB*NB + NB*MAX( IHIP+1, IHLP+INLQ )
00127 *
00128 *          where NB = MB_A = NB_A, IROFFA = MOD( IA-1, NB ),
00129 *          ICOFFA = MOD( JA-1, NB ), IOFF = MOD( IA+ILO-2, NB ),
00130 *          IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ),
00131 *          IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ),
00132 *          ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ),
00133 *          IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW ),
00134 *          ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ),
00135 *          INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL ),
00136 *
00137 *          INDXG2P and NUMROC are ScaLAPACK tool functions;
00138 *          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
00139 *          the subroutine BLACS_GRIDINFO.
00140 *
00141 *          If LWORK = -1, then LWORK is global input and a workspace
00142 *          query is assumed; the routine only calculates the minimum
00143 *          and optimal size for all work arrays. Each of these
00144 *          values is returned in the first entry of the corresponding
00145 *          work array, and no error message is issued by PXERBLA.
00146 *
00147 *  INFO    (global output) INTEGER
00148 *          = 0:  successful exit
00149 *          < 0:  If the i-th argument is an array and the j-entry had
00150 *                an illegal value, then INFO = -(i*100+j), if the i-th
00151 *                argument is a scalar and had an illegal value, then
00152 *                INFO = -i.
00153 *
00154 *  Further Details
00155 *  ===============
00156 *
00157 *  The matrix Q is represented as a product of (ihi-ilo) elementary
00158 *  reflectors
00159 *
00160 *     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
00161 *
00162 *  Each H(i) has the form
00163 *
00164 *     H(i) = I - tau * v * v'
00165 *
00166 *  where tau is a complex scalar, and v is a complex vector with
00167 *  v(1:I) = 0, v(I+1) = 1 and v(IHI+1:N) = 0; v(I+2:IHI) is stored on
00168 *  exit in A(IA+ILO+I:IA+IHI-1,JA+ILO+I-2), and tau in TAU(JA+ILO+I-2).
00169 *
00170 *  The contents of A(IA:IA+N-1,JA:JA+N-1) are illustrated by the follow-
00171 *  ing example, with N = 7, ILO = 2 and IHI = 6:
00172 *
00173 *  on entry                         on exit
00174 *
00175 *  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
00176 *  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
00177 *  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
00178 *  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
00179 *  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
00180 *  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
00181 *  (                         a )    (                          a )
00182 *
00183 *  where a denotes an element of the original matrix sub( A ), H denotes
00184 *  a modified element of the upper Hessenberg matrix H, and vi denotes
00185 *  an element of the vector defining H(JA+ILO+I-2).
00186 *
00187 *  Alignment requirements
00188 *  ======================
00189 *
00190 *  The distributed submatrix sub( A ) must verify some alignment proper-
00191 *  ties, namely the following expression should be true:
00192 *  ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
00193 *
00194 *  =====================================================================
00195 *
00196 *     .. Parameters ..
00197       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00198      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00199       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00200      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00201      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00202       COMPLEX*16         ONE, ZERO
00203       PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
00204      $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
00205 *     ..
00206 *     .. Local Scalars ..
00207       LOGICAL            LQUERY
00208       CHARACTER          COLCTOP, ROWCTOP
00209       INTEGER            I, IACOL, IAROW, IB, ICOFFA, ICTXT, IHIP,
00210      $                   IHLP, IIA, IINFO, ILCOL, ILROW, IMCOL, INLQ,
00211      $                   IOFF, IPT, IPW, IPY, IROFFA, J, JJ, JJA, JY,
00212      $                   K, L, LWMIN, MYCOL, MYROW, NB, NPCOL, NPROW,
00213      $                   NQ
00214       COMPLEX*16         EI
00215 *     ..
00216 *     .. Local Arrays ..
00217       INTEGER            DESCY( DLEN_ ), IDUM1( 3 ), IDUM2( 3 )
00218 *     ..
00219 *     .. External Subroutines ..
00220       EXTERNAL           BLACS_GRIDINFO, CHK1MAT, DESCSET, INFOG1L,
00221      $                   INFOG2L, PCHK1MAT, PB_TOPGET, PB_TOPSET,
00222      $                   PXERBLA, PZGEMM, PZGEHD2, PZLAHRD, PZLARFB
00223 *     ..
00224 *     .. External Functions ..
00225       INTEGER            INDXG2P, NUMROC
00226       EXTERNAL           INDXG2P, NUMROC
00227 *     ..
00228 *     .. Intrinsic Functions ..
00229       INTRINSIC          DBLE, DCMPLX, MAX, MIN, MOD
00230 *     ..
00231 *     .. Executable Statements ..
00232 *
00233 *     Get grid parameters
00234 *
00235       ICTXT = DESCA( CTXT_ )
00236       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00237 *
00238 *     Test the input parameters
00239 *
00240       INFO = 0
00241       IF( NPROW.EQ.-1 ) THEN
00242          INFO = -(700+CTXT_)
00243       ELSE
00244          CALL CHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, INFO )
00245          IF( INFO.EQ.0 ) THEN
00246             NB = DESCA( NB_ )
00247             IROFFA = MOD( IA-1, NB )
00248             ICOFFA = MOD( JA-1, NB )
00249             CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL,
00250      $                    IIA, JJA, IAROW, IACOL )
00251             IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW )
00252             IOFF = MOD( IA+ILO-2, NB )
00253             ILROW = INDXG2P( IA+ILO-1, NB, MYROW, DESCA( RSRC_ ),
00254      $                       NPROW )
00255             IHLP = NUMROC( IHI-ILO+IOFF+1, NB, MYROW, ILROW, NPROW )
00256             ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, DESCA( CSRC_ ),
00257      $                       NPCOL )
00258             INLQ = NUMROC( N-ILO+IOFF+1, NB, MYCOL, ILCOL, NPCOL )
00259             LWMIN = NB*( NB + MAX( IHIP+1, IHLP+INLQ ) )
00260 *
00261             WORK( 1 ) = DCMPLX( DBLE( LWMIN ) )
00262             LQUERY = ( LWORK.EQ.-1 )
00263             IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
00264                INFO = -2
00265             ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
00266                INFO = -3
00267             ELSE IF( IROFFA.NE.ICOFFA .OR. IROFFA.NE.0 ) THEN
00268                INFO = -6
00269             ELSE IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
00270                INFO = -(700+NB_)
00271             ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
00272                INFO = -10
00273             END IF
00274          END IF
00275          IDUM1( 1 ) = ILO
00276          IDUM2( 1 ) = 2
00277          IDUM1( 2 ) = IHI
00278          IDUM2( 2 ) = 3
00279          IF( LWORK.EQ.-1 ) THEN
00280             IDUM1( 3 ) = -1
00281          ELSE
00282             IDUM1( 3 ) = 1
00283          END IF
00284          IDUM2( 3 ) = 10
00285          CALL PCHK1MAT( N, 1, N, 1, IA, JA, DESCA, 7, 3, IDUM1, IDUM2,
00286      $                  INFO )
00287       END IF
00288 *
00289       IF( INFO.NE.0 ) THEN
00290          CALL PXERBLA( ICTXT, 'PZGEHRD', -INFO )
00291          RETURN
00292       ELSE IF( LQUERY ) THEN
00293          RETURN
00294       END IF
00295 *
00296 *     Set elements JA:JA+ILO-2 and JA+JHI-1:JA+N-2 of TAU to zero.
00297 *
00298       NQ = NUMROC( JA+N-2, NB, MYCOL, DESCA( CSRC_ ), NPCOL )
00299       CALL INFOG1L( JA+ILO-2, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ,
00300      $              IMCOL )
00301       DO 10 J = JJA, MIN( JJ, NQ )
00302          TAU( J ) = ZERO
00303    10 CONTINUE
00304 *
00305       CALL INFOG1L( JA+IHI-1, NB, NPCOL, MYCOL, DESCA( CSRC_ ), JJ,
00306      $              IMCOL )
00307       DO 20 J = JJ, NQ
00308          TAU( J ) = ZERO
00309    20 CONTINUE
00310 *
00311 *     Quick return if possible
00312 *
00313       IF( IHI-ILO.LE.0 )
00314      $   RETURN
00315 *
00316       CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP )
00317       CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise',    ROWCTOP )
00318       CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', '1-tree' )
00319       CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise',    '1-tree' )
00320 *
00321       IPT = 1
00322       IPY = IPT + NB * NB
00323       IPW = IPY + IHIP * NB
00324       CALL DESCSET( DESCY, IHI+IROFFA, NB, NB, NB, IAROW, ILCOL, ICTXT,
00325      $              MAX( 1, IHIP ) )
00326 *
00327       K = ILO
00328       IB = NB - IOFF
00329       JY = IOFF + 1
00330 *
00331 *     Loop over remaining block of columns
00332 *
00333       DO 30 L = 1, IHI-ILO+IOFF-NB, NB
00334          I = IA + K - 1
00335          J = JA + K - 1
00336 *
00337 *        Reduce columns j:j+ib-1 to Hessenberg form, returning the
00338 *        matrices V and T of the block reflector H = I - V*T*V'
00339 *        which performs the reduction, and also the matrix Y = A*V*T
00340 *
00341          CALL PZLAHRD( IHI, K, IB, A, IA, J, DESCA, TAU, WORK( IPT ),
00342      $                 WORK( IPY ), 1, JY, DESCY, WORK( IPW ) )
00343 *
00344 *        Apply the block reflector H to A(ia:ia+ihi-1,j+ib:ja+ihi-1)
00345 *        from the right, computing  A := A - Y * V'.
00346 *        V(i+ib,ib-1) must be set to 1.
00347 *
00348          CALL PZELSET2( EI, A, I+IB, J+IB-1, DESCA, ONE )
00349          CALL PZGEMM( 'No transpose', 'Conjugate transpose', IHI,
00350      $                IHI-K-IB+1, IB, -ONE, WORK( IPY ), 1, JY, DESCY,
00351      $                A, I+IB, J, DESCA, ONE, A, IA, J+IB, DESCA )
00352          CALL PZELSET( A, I+IB, J+IB-1, DESCA, EI )
00353 *
00354 *        Apply the block reflector H to A(i+1:ia+ihi-1,j+ib:ja+n-1) from
00355 *        the left
00356 *
00357          CALL PZLARFB( 'Left', 'Conjugate transpose', 'Forward',
00358      $                 'Columnwise', IHI-K, N-K-IB+1, IB, A, I+1, J,
00359      $                 DESCA, WORK( IPT ), A, I+1, J+IB, DESCA,
00360      $                 WORK( IPY ) )
00361 *
00362          K = K + IB
00363          IB = NB
00364          JY = 1
00365          DESCY( CSRC_ ) = MOD( DESCY( CSRC_ ) + 1, NPCOL )
00366 *
00367    30 CONTINUE
00368 *
00369 *     Use unblocked code to reduce the rest of the matrix
00370 *
00371       CALL PZGEHD2( N, K, IHI, A, IA, JA, DESCA, TAU, WORK, LWORK,
00372      $              IINFO )
00373 *
00374       CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP )
00375       CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise',    ROWCTOP )
00376 *
00377       WORK( 1 ) = DCMPLX( DBLE( LWMIN ) )
00378 *
00379       RETURN
00380 *
00381 *     End of PZGEHRD
00382 *
00383       END