ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pzggqrf.f
Go to the documentation of this file.
00001       SUBROUTINE PZGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB,
00002      $                    DESCB, TAUB, WORK, LWORK, INFO )
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       INTEGER            IA, IB, INFO, JA, JB, LWORK, M, N, P
00011 *     ..
00012 *     .. Array Arguments ..
00013       INTEGER            DESCA( * ), DESCB( * )
00014       COMPLEX*16         A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * )
00015 *     ..
00016 *
00017 *  Purpose
00018 *  =======
00019 *
00020 *  PZGGQRF computes a generalized QR factorization of
00021 *  an N-by-M matrix sub( A ) = A(IA:IA+N-1,JA:JA+M-1) and
00022 *  an N-by-P matrix sub( B ) = B(IB:IB+N-1,JB:JB+P-1):
00023 *
00024 *              sub( A ) = Q*R,        sub( B ) = Q*T*Z,
00025 *
00026 *  where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
00027 *  and R and T assume one of the forms:
00028 *
00029 *  if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
00030 *                  (  0  ) N-M                         N   M-N
00031 *                     M
00032 *
00033 *  where R11 is upper triangular, and
00034 *
00035 *  if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
00036 *                   P-N  N                           ( T21 ) P
00037 *                                                       P
00038 *
00039 *  where T12 or T21 is upper triangular.
00040 *
00041 *  In particular, if sub( B ) is square and nonsingular, the GQR
00042 *  factorization of sub( A ) and sub( B ) implicitly gives the QR
00043 *  factorization of inv( sub( B ) )* sub( A ):
00044 *
00045 *               inv( sub( B ) )*sub( A )= Z'*(inv(T)*R)
00046 *
00047 *  where inv( sub( B ) ) denotes the inverse of the matrix sub( B ),
00048 *  and Z' denotes the conjugate transpose of matrix Z.
00049 *
00050 *  Notes
00051 *  =====
00052 *
00053 *  Each global data object is described by an associated description
00054 *  vector.  This vector stores the information required to establish
00055 *  the mapping between an object element and its corresponding process
00056 *  and memory location.
00057 *
00058 *  Let A be a generic term for any 2D block cyclicly distributed array.
00059 *  Such a global array has an associated description vector DESCA.
00060 *  In the following comments, the character _ should be read as
00061 *  "of the global array".
00062 *
00063 *  NOTATION        STORED IN      EXPLANATION
00064 *  --------------- -------------- --------------------------------------
00065 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00066 *                                 DTYPE_A = 1.
00067 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00068 *                                 the BLACS process grid A is distribu-
00069 *                                 ted over. The context itself is glo-
00070 *                                 bal, but the handle (the integer
00071 *                                 value) may vary.
00072 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00073 *                                 array A.
00074 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00075 *                                 array A.
00076 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00077 *                                 the rows of the array.
00078 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00079 *                                 the columns of the array.
00080 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00081 *                                 row of the array A is distributed.
00082 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00083 *                                 first column of the array A is
00084 *                                 distributed.
00085 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00086 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00087 *
00088 *  Let K be the number of rows or columns of a distributed matrix,
00089 *  and assume that its process grid has dimension p x q.
00090 *  LOCr( K ) denotes the number of elements of K that a process
00091 *  would receive if K were distributed over the p processes of its
00092 *  process column.
00093 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00094 *  process would receive if K were distributed over the q processes of
00095 *  its process row.
00096 *  The values of LOCr() and LOCc() may be determined via a call to the
00097 *  ScaLAPACK tool function, NUMROC:
00098 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00099 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00100 *  An upper bound for these quantities may be computed by:
00101 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00102 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00103 *
00104 *  Arguments
00105 *  =========
00106 *
00107 *  N       (global input) INTEGER
00108 *          The number of rows to be operated on i.e the number of rows
00109 *          of the distributed submatrices sub( A ) and sub( B ). N >= 0.
00110 *
00111 *  M       (global input) INTEGER
00112 *          The number of columns to be operated on i.e the number of
00113 *          columns of the distributed submatrix sub( A ).  M >= 0.
00114 *
00115 *  P       (global input) INTEGER
00116 *          The number of columns to be operated on i.e the number of
00117 *          columns of the distributed submatrix sub( B ).  P >= 0.
00118 *
00119 *  A       (local input/local output) COMPLEX*16 pointer into the
00120 *          local memory to an array of dimension (LLD_A, LOCc(JA+M-1)).
00121 *          On entry, the local pieces of the N-by-M distributed matrix
00122 *          sub( A ) which is to be factored.  On exit, the elements on
00123 *          and above the diagonal of sub( A ) contain the min(N,M) by M
00124 *          upper trapezoidal matrix R (R is upper triangular if N >= M);
00125 *          the elements below the diagonal, with the array TAUA,
00126 *          represent the unitary matrix Q as a product of min(N,M)
00127 *          elementary reflectors (see Further Details).
00128 *
00129 *  IA      (global input) INTEGER
00130 *          The row index in the global array A indicating the first
00131 *          row of sub( A ).
00132 *
00133 *  JA      (global input) INTEGER
00134 *          The column index in the global array A indicating the
00135 *          first column of sub( A ).
00136 *
00137 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00138 *          The array descriptor for the distributed matrix A.
00139 *
00140 *  TAUA    (local output) COMPLEX*16, array, dimension
00141 *          LOCc(JA+MIN(N,M)-1). This array contains the scalar factors
00142 *          TAUA of the elementary reflectors which represent the unitary
00143 *          matrix Q. TAUA is tied to the distributed matrix A. (see
00144 *          Further Details).
00145 *
00146 *  B       (local input/local output) COMPLEX*16 pointer into the
00147 *          local memory to an array of dimension (LLD_B, LOCc(JB+P-1)).
00148 *          On entry, the local pieces of the N-by-P distributed matrix
00149 *          sub( B ) which is to be factored. On exit, if N <= P, the
00150 *          upper triangle of B(IB:IB+N-1,JB+P-N:JB+P-1) contains the
00151 *          N by N upper triangular matrix T; if N > P, the elements on
00152 *          and above the (N-P)-th subdiagonal contain the N by P upper
00153 *          trapezoidal matrix T; the remaining elements, with the array
00154 *          TAUB, represent the unitary matrix Z as a product of
00155 *          elementary reflectors (see Further Details).
00156 *
00157 *  IB      (global input) INTEGER
00158 *          The row index in the global array B indicating the first
00159 *          row of sub( B ).
00160 *
00161 *  JB      (global input) INTEGER
00162 *          The column index in the global array B indicating the
00163 *          first column of sub( B ).
00164 *
00165 *  DESCB   (global and local input) INTEGER array of dimension DLEN_.
00166 *          The array descriptor for the distributed matrix B.
00167 *
00168 *  TAUB    (local output) COMPLEX*16, array, dimension LOCr(IB+N-1)
00169 *          This array contains the scalar factors of the elementary
00170 *          reflectors which represent the unitary matrix Z. TAUB is
00171 *          tied to the distributed matrix B (see Further Details).
00172 *
00173 *  WORK    (local workspace/local output) COMPLEX*16 array,
00174 *                                                  dimension (LWORK)
00175 *          On exit, WORK(1) returns the minimal and optimal LWORK.
00176 *
00177 *  LWORK   (local or global input) INTEGER
00178 *          The dimension of the array WORK.
00179 *          LWORK is local input and must be at least
00180 *          LWORK >= MAX( NB_A * ( NpA0 + MqA0 + NB_A ),
00181 *                        MAX( (NB_A*(NB_A-1))/2, (PqB0 + NpB0)*NB_A ) +
00182 *                             NB_A * NB_A,
00183 *                        MB_B * ( NpB0 + PqB0 + MB_B ) ), where
00184 *
00185 *          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
00186 *          IAROW  = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
00187 *          IACOL  = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
00188 *          NpA0   = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ),
00189 *          MqA0   = NUMROC( M+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
00190 *
00191 *          IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ),
00192 *          IBROW  = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ),
00193 *          IBCOL  = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ),
00194 *          NpB0   = NUMROC( N+IROFFB, MB_B, MYROW, IBROW, NPROW ),
00195 *          PqB0   = NUMROC( P+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ),
00196 *
00197 *          and NUMROC, INDXG2P are ScaLAPACK tool functions;
00198 *          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
00199 *          the subroutine BLACS_GRIDINFO.
00200 *
00201 *          If LWORK = -1, then LWORK is global input and a workspace
00202 *          query is assumed; the routine only calculates the minimum
00203 *          and optimal size for all work arrays. Each of these
00204 *          values is returned in the first entry of the corresponding
00205 *          work array, and no error message is issued by PXERBLA.
00206 *
00207 *  INFO    (global output) INTEGER
00208 *          = 0:  successful exit
00209 *          < 0:  If the i-th argument is an array and the j-entry had
00210 *                an illegal value, then INFO = -(i*100+j), if the i-th
00211 *                argument is a scalar and had an illegal value, then
00212 *                INFO = -i.
00213 *
00214 *  Further Details
00215 *  ===============
00216 *
00217 *  The matrix Q is represented as a product of elementary reflectors
00218 *
00219 *     Q = H(ja) H(ja+1) . . . H(ja+k-1), where k = min(n,m).
00220 *
00221 *  Each H(i) has the form
00222 *
00223 *     H(i) = I - taua * v * v'
00224 *
00225 *  where taua is a complex scalar, and v is a complex vector with
00226 *  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in
00227 *  A(ia+i:ia+n-1,ja+i-1), and taua in TAUA(ja+i-1).
00228 *  To form Q explicitly, use ScaLAPACK subroutine PZUNGQR.
00229 *  To use Q to update another matrix, use ScaLAPACK subroutine PZUNMQR.
00230 *
00231 *  The matrix Z is represented as a product of elementary reflectors
00232 *
00233 *     Z = H(ib)' H(ib+1)' . . . H(ib+k-1)', where k = min(n,p).
00234 *
00235 *  Each H(i) has the form
00236 *
00237 *     H(i) = I - taub * v * v'
00238 *
00239 *  where taub is a complex scalar, and v is a complex vector with
00240 *  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; conjg(v(1:p-k+i-1)) is stored on
00241 *  exit in B(ib+n-k+i-1,jb:jb+p-k+i-2), and taub in TAUB(ib+n-k+i-1).
00242 *  To form Z explicitly, use ScaLAPACK subroutine PZUNGRQ.
00243 *  To use Z to update another matrix, use ScaLAPACK subroutine PZUNMRQ.
00244 *
00245 *  Alignment requirements
00246 *  ======================
00247 *
00248 *  The distributed submatrices sub( A ) and sub( B ) must verify some
00249 *  alignment properties, namely the following expression should be true:
00250 *
00251 *  ( MB_A.EQ.MB_B .AND. IROFFA.EQ.IROFFB .AND. IAROW.EQ.IBROW )
00252 *
00253 *  =====================================================================
00254 *
00255 *     .. Parameters ..
00256       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00257      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00258       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00259      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00260      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00261 *     ..
00262 *     .. Local Scalars ..
00263       LOGICAL            LQUERY
00264       INTEGER            IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
00265      $                   ICTXT, IROFFA, IROFFB, LWMIN, MQA0, MYCOL,
00266      $                   MYROW, NPA0, NPB0, NPCOL, NPROW, PQB0
00267 *     ..
00268 *     .. External Subroutines ..
00269       EXTERNAL           BLACS_GRIDINFO, CHK1MAT, PCHK2MAT, PXERBLA,
00270      $                   PZGEQRF, PZGERQF, PZUNMQR
00271 *     ..
00272 *     .. Local Arrays ..
00273       INTEGER            IDUM1( 1 ), IDUM2( 1 )
00274 *     ..
00275 *     .. External Functions ..
00276       INTEGER            INDXG2P, NUMROC
00277       EXTERNAL           INDXG2P, NUMROC
00278 *     ..
00279 *     .. Intrinsic Functions ..
00280       INTRINSIC          DBLE, DCMPLX, INT, MAX, MIN, MOD
00281 *     ..
00282 *     .. Executable Statements ..
00283 *
00284 *     Get grid parameters
00285 *
00286       ICTXT = DESCA( CTXT_ )
00287       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00288 *
00289 *     Test the input parameters
00290 *
00291       INFO = 0
00292       IF( NPROW.EQ.-1 ) THEN
00293          INFO = -707
00294       ELSE
00295          CALL CHK1MAT( N, 1, M, 2, IA, JA, DESCA, 7, INFO )
00296          CALL CHK1MAT( N, 1, P, 3, IB, JB, DESCB, 12, INFO )
00297          IF( INFO.EQ.0 ) THEN
00298             IROFFA = MOD( IA-1, DESCA( MB_ ) )
00299             ICOFFA = MOD( JA-1, DESCA( NB_ ) )
00300             IROFFB = MOD( IB-1, DESCB( MB_ ) )
00301             ICOFFB = MOD( JB-1, DESCB( NB_ ) )
00302             IAROW = INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
00303      $                       NPROW )
00304             IACOL = INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
00305      $                       NPCOL )
00306             IBROW = INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ),
00307      $                       NPROW )
00308             IBCOL = INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ),
00309      $                       NPCOL )
00310             NPA0 = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
00311             MQA0 = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
00312             NPB0 = NUMROC( N+IROFFB, DESCB( MB_ ), MYROW, IBROW, NPROW )
00313             PQB0 = NUMROC( P+ICOFFB, DESCB( NB_ ), MYCOL, IBCOL, NPCOL )
00314             LWMIN = MAX( DESCA( NB_ ) * ( NPA0 + MQA0 + DESCA( NB_ ) ),
00315      $        MAX( MAX( ( DESCA( NB_ )*( DESCA( NB_ ) - 1 ) ) / 2,
00316      $         ( PQB0 + NPB0 ) * DESCA( NB_ ) ) +
00317      $           DESCA( NB_ ) * DESCA( NB_ ),
00318      $         DESCB( MB_ ) * ( NPB0 + PQB0 + DESCB( MB_ ) ) ) )
00319 *
00320             WORK( 1 ) = DCMPLX( DBLE( LWMIN ) )
00321             LQUERY = ( LWORK.EQ.-1 )
00322             IF( IAROW.NE.IBROW .OR. IROFFA.NE.IROFFB ) THEN
00323                INFO = -10
00324             ELSE IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN
00325                INFO = -1203
00326             ELSE IF( ICTXT.NE.DESCB( CTXT_ ) ) THEN
00327                INFO = -1207
00328             ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
00329                INFO = -15
00330             END IF
00331          END IF
00332          IF( LQUERY ) THEN
00333             IDUM1( 1 ) = -1
00334          ELSE
00335             IDUM1( 1 ) = 1
00336          END IF
00337          IDUM2( 1 ) = 15
00338          CALL PCHK2MAT( N, 1, M, 2, IA, JA, DESCA, 7, N, 1, P, 3, IB,
00339      $                  JB, DESCB, 12, 1, IDUM1, IDUM2, INFO )
00340       END IF
00341 *
00342       IF( INFO.NE.0 ) THEN
00343          CALL PXERBLA( ICTXT, 'PZGGQRF', -INFO )
00344          RETURN
00345       ELSE IF( LQUERY ) THEN
00346          RETURN
00347       END IF
00348 *
00349 *     QR factorization of N-by-M matrix sub( A ): sub( A ) = Q*R
00350 *
00351       CALL PZGEQRF( N, M, A, IA, JA, DESCA, TAUA, WORK, LWORK, INFO )
00352       LWMIN = INT( WORK( 1 ) )
00353 *
00354 *     Update sub( B ) := Q'*sub( B ).
00355 *
00356       CALL PZUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A,
00357      $              IA, JA, DESCA, TAUA, B, IB, JB, DESCB, WORK, LWORK,
00358      $              INFO )
00359       LWMIN = MIN( LWMIN, INT( WORK( 1 ) ) )
00360 *
00361 *     RQ factorization of N-by-P matrix sub( B ): sub( B ) = T*Z.
00362 *
00363       CALL PZGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO )
00364       WORK( 1 ) = DCMPLX( DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) )
00365 *
00366       RETURN
00367 *
00368 *     End of PZGGQRF
00369 *
00370       END