ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pzqrt14.f
Go to the documentation of this file.
00001       DOUBLE PRECISION FUNCTION PZQRT14( TRANS, M, N, NRHS, A, IA, JA,
00002      $                                   DESCA, X, IX, JX, DESCX, WORK )
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          TRANS
00011       INTEGER            IA, IX, JA, JX, M, N, NRHS
00012 *     ..
00013 *     .. Array Arguments ..
00014       INTEGER            DESCA( * ), DESCX( * )
00015       COMPLEX*16         A( * ), WORK( * ), X( * )
00016 *     ..
00017 *
00018 *  Purpose
00019 *  =======
00020 *
00021 *  PZQRT14 checks whether sub( X ) is in the row space of sub( A ) or
00022 *  sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and
00023 *  sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and
00024 *  X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise.  It does so by scaling both
00025 *  sub( X ) and sub( A ) such that their norms are in the range
00026 *  [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of
00027 *  [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of
00028 *  [sub( A ),sub( X )] otherwise, and returning the norm of the trailing
00029 *  triangle, scaled by MAX(M,N,NRHS)*eps.
00030 *
00031 *  Notes
00032 *  =====
00033 *
00034 *  Each global data object is described by an associated description
00035 *  vector.  This vector stores the information required to establish
00036 *  the mapping between an object element and its corresponding process
00037 *  and memory location.
00038 *
00039 *  Let A be a generic term for any 2D block cyclicly distributed array.
00040 *  Such a global array has an associated description vector DESCA.
00041 *  In the following comments, the character _ should be read as
00042 *  "of the global array".
00043 *
00044 *  NOTATION        STORED IN      EXPLANATION
00045 *  --------------- -------------- --------------------------------------
00046 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00047 *                                 DTYPE_A = 1.
00048 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00049 *                                 the BLACS process grid A is distribu-
00050 *                                 ted over. The context itself is glo-
00051 *                                 bal, but the handle (the integer
00052 *                                 value) may vary.
00053 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00054 *                                 array A.
00055 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00056 *                                 array A.
00057 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00058 *                                 the rows of the array.
00059 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00060 *                                 the columns of the array.
00061 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00062 *                                 row of the array A is distributed.
00063 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00064 *                                 first column of the array A is
00065 *                                 distributed.
00066 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00067 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00068 *
00069 *  Let K be the number of rows or columns of a distributed matrix,
00070 *  and assume that its process grid has dimension p x q.
00071 *  LOCr( K ) denotes the number of elements of K that a process
00072 *  would receive if K were distributed over the p processes of its
00073 *  process column.
00074 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00075 *  process would receive if K were distributed over the q processes of
00076 *  its process row.
00077 *  The values of LOCr() and LOCc() may be determined via a call to the
00078 *  ScaLAPACK tool function, NUMROC:
00079 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00080 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00081 *  An upper bound for these quantities may be computed by:
00082 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00083 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00084 *
00085 *  Arguments
00086 *  =========
00087 *
00088 *  TRANS   (global input) CHARACTER*1
00089 *          = 'N':  No transpose, check for sub( X ) in the row space of
00090 *                  sub( A ),
00091 *          = 'C':  Conjugate transpose, check for sub( X ) in row space
00092 *                  of sub( A )'.
00093 *
00094 *  M       (global input) INTEGER
00095 *          The number of rows to be operated on, i.e. the number of rows
00096 *          of the distributed submatrix sub( A ). M >= 0.
00097 *
00098 *  N       (global input) INTEGER
00099 *          The number of columns to be operated on, i.e. the number of
00100 *          columns of the distributed submatrix sub( A ). N >= 0.
00101 *
00102 *  NRHS    (global input) INTEGER
00103 *          The number of right hand sides, i.e., the number of columns
00104 *          of the distributed submatrix sub( X ). NRHS >= 0.
00105 *
00106 *  A       (local input) COMPLEX*16 pointer into the local memory
00107 *          to an array of dimension (LLD_A, LOCc(JA+N-1)). This array
00108 *          contains the local pieces of the M-by-N distributed matrix
00109 *          sub( A ).
00110 *
00111 *  IA      (global input) INTEGER
00112 *          The row index in the global array A indicating the first
00113 *          row of sub( A ).
00114 *
00115 *  JA      (global input) INTEGER
00116 *          The column index in the global array A indicating the
00117 *          first column of sub( A ).
00118 *
00119 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00120 *          The array descriptor for the distributed matrix A.
00121 *
00122 *  X       (local input) COMPLEX*16 pointer into the local
00123 *          memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
00124 *          On entry, this array contains the local pieces of the
00125 *          N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',
00126 *          and the M-by-NRHS distributed submatrix sub( X ) otherwise.
00127 *
00128 *  IX      (global input) INTEGER
00129 *          The row index in the global array X indicating the first
00130 *          row of sub( X ).
00131 *
00132 *  JX      (global input) INTEGER
00133 *          The column index in the global array X indicating the
00134 *          first column of sub( X ).
00135 *
00136 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00137 *          The array descriptor for the distributed matrix X.
00138 *
00139 *  WORK    (local workspace) COMPLEX*16 array dimension (LWORK)
00140 *          If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and
00141 *          LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where
00142 *
00143 *          IF TRANS='N', (LQ fact)
00144 *            MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,
00145 *                             NPROW )
00146 *            LTAU   = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,
00147 *                             RSRC_A, NPROW )
00148 *            LWF    = MB_A * ( MB_A + MNRHSP + NQ0 )
00149 *          ELSE         (QR fact)
00150 *            NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,
00151 *                             NPCOL )
00152 *            LTAU   = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,
00153 *                             CSRC_A, NPCOL )
00154 *            LWF    = NB_A * ( NB_A + MP0 + NNRHSQ )
00155 *          END IF
00156 *
00157 *          and,
00158 *
00159 *          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
00160 *          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
00161 *          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
00162 *          MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
00163 *          NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).
00164 *
00165 *          INDXG2P and NUMROC are ScaLAPACK tool functions;
00166 *          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
00167 *          the subroutine BLACS_GRIDINFO.
00168 *
00169 *
00170 *  =====================================================================
00171 *
00172 *     .. Parameters ..
00173       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00174      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00175       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00176      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00177      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00178       DOUBLE PRECISION   ONE, ZERO
00179       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
00180 *     ..
00181 *     .. Local Scalars ..
00182       LOGICAL            TPSD
00183       INTEGER            IACOL, IAROW, ICOFFA, ICTXT, IDUM, IIA, INFO,
00184      $                   IPTAU, IPW, IPWA, IROFFA, IWA, IWX, J, JJA,
00185      $                   JWA, JWX, LDW, LWORK, MPWA, MPW, MQW, MYCOL,
00186      $                   MYROW, NPCOL, NPROW, NPW, NQWA, NQW
00187       DOUBLE PRECISION   ANRM, ERR, XNRM
00188       COMPLEX*16         AMAX
00189 *     ..
00190 *     .. Local Arrays ..
00191       INTEGER            DESCW( DLEN_ ), IDUM1( 1 ), IDUM2( 1 )
00192       DOUBLE PRECISION   RWORK( 1 )
00193 *     ..
00194 *     .. External Functions ..
00195       LOGICAL            LSAME
00196       INTEGER            NUMROC
00197       DOUBLE PRECISION   PDLAMCH, PZLANGE
00198       EXTERNAL           LSAME, NUMROC, PDLAMCH, PZLANGE
00199 *     ..
00200 *     .. External Subroutines ..
00201       EXTERNAL           BLACS_GRIDINFO, DESCSET, DGAMX2D, INFOG2L,
00202      $                   PXERBLA, PZMAX1, PZCOPY, PZGELQF,
00203      $                   PZGEQRF, PZLACGV, PZLACPY, PZLASCL
00204 *     ..
00205 *     .. Intrinsic Functions ..
00206       INTRINSIC          ABS, DBLE, MAX, MIN, MOD
00207 *     ..
00208 *     .. Executable Statements ..
00209 *
00210 *     Get grid parameters
00211 *
00212       ICTXT = DESCA( CTXT_ )
00213       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00214 *
00215       PZQRT14 = ZERO
00216 *
00217       IPWA = 1
00218       IROFFA = MOD( IA-1, DESCA( MB_ ) )
00219       ICOFFA = MOD( JA-1, DESCA( NB_ ) )
00220       IWA = IROFFA + 1
00221       JWA = ICOFFA + 1
00222       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA,
00223      $              JJA, IAROW, IACOL )
00224       MPWA = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
00225       NQWA = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
00226 *
00227       INFO = 0
00228       IF( LSAME( TRANS, 'N' ) ) THEN
00229          IF( N.LE.0 .OR. NRHS.LE.0 )
00230      $      RETURN
00231          TPSD = .FALSE.
00232          MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW,
00233      $                 NPROW )
00234          NQW = NQWA
00235 *
00236 *        Assign descriptor DESCW for workspace WORK and pointers to
00237 *        matrices sub( A ) and sub( X ) in workspace
00238 *
00239          IWX = IWA + M
00240          JWX = JWA
00241          LDW = MAX( 1, MPW )
00242          CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ),
00243      $                 DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
00244 *
00245       ELSE IF( LSAME( TRANS, 'C' ) ) THEN
00246          IF( M.LE.0 .OR. NRHS.LE.0 )
00247      $      RETURN
00248          TPSD = .TRUE.
00249          MPW = MPWA
00250          NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL,
00251      $                 NPCOL )
00252 *
00253 *        Assign descriptor DESCW for workspace WORK and pointers to
00254 *        matrices sub( A ) and sub( X ) in workspace
00255 *
00256          IWX = IWA
00257          JWX = JWA + N
00258          LDW = MAX( 1, MPW )
00259          CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ),
00260      $                 DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
00261       ELSE
00262          CALL PXERBLA( ICTXT, 'PZQRT14', -1 )
00263          RETURN
00264       END IF
00265 *
00266 *     Copy and scale sub( A )
00267 *
00268       IPTAU = IPWA + MPW*NQW
00269       CALL PZLACPY( 'All', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA,
00270      $              JWA, DESCW )
00271       RWORK( 1 ) = ZERO
00272       ANRM = PZLANGE( 'M', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK )
00273       IF( ANRM.NE.ZERO )
00274      $   CALL PZLASCL( 'G', ANRM, ONE, M, N, WORK( IPWA ), IWA,
00275      $                 JWA, DESCW, INFO )
00276 *
00277 *     Copy sub( X ) or sub( X )' into the right place and scale it
00278 *
00279       IF( TPSD ) THEN
00280 *
00281 *        Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work
00282 *
00283          DO 10 J = 1, NRHS
00284             CALL PZCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX,
00285      $                   JWX+J-1, DESCW, 1 )
00286    10    CONTINUE
00287          XNRM = PZLANGE( 'M', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW,
00288      $                   RWORK )
00289          IF( XNRM.NE.ZERO )
00290      $      CALL PZLASCL( 'G', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX,
00291      $                    JWX, DESCW, INFO )
00292 *
00293 *        Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)
00294 *
00295          MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
00296          IPW = IPTAU + MIN( MQW, NQW )
00297          LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) )
00298          CALL PZGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW,
00299      $                WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
00300 *
00301 *        Compute largest entry in upper triangle of
00302 *        work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)
00303 *
00304          ERR = ZERO
00305          IF( N.LT.M ) THEN
00306             DO 20 J = JWX, JWA+N+NRHS-1
00307                CALL PZMAX1( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ),
00308      $                      IWA+N, J, DESCW, 1 )
00309                ERR = MAX( ERR, ABS( AMAX ) )
00310    20       CONTINUE
00311          END IF
00312          CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
00313      $                 -1, -1, 0 )
00314 *
00315       ELSE
00316 *
00317 *        Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work
00318 *
00319          DO 30 J = 1, NRHS
00320             CALL PZCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ),
00321      $                   IWX+J-1, JWX, DESCW, DESCW( M_ ) )
00322             CALL PZLACGV( N, WORK( IPWA ), IWX+J-1, JWX, DESCW,
00323      $                    DESCW( M_ ) )
00324    30    CONTINUE
00325 *
00326          XNRM = PZLANGE( 'M', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW,
00327      $                   RWORK )
00328          IF( XNRM.NE.ZERO )
00329      $      CALL PZLASCL( 'G', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX,
00330      $                    JWX, DESCW, INFO )
00331 *
00332 *        Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)
00333 *
00334          NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
00335          IPW = IPTAU + MIN( MPW, NPW )
00336          LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) )
00337          CALL PZGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW,
00338      $                 WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
00339 *
00340 *        Compute largest entry in lower triangle in
00341 *        work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)
00342 *
00343          ERR = ZERO
00344          DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 )
00345             CALL PZMAX1( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ),
00346      $                   IWX+J-JWA-M, J, DESCW, 1 )
00347             ERR = MAX( ERR, ABS( AMAX ) )
00348    40    CONTINUE
00349          CALL DGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
00350      $                 -1, -1, 0 )
00351 *
00352       END IF
00353 *
00354       PZQRT14 = ERR / ( DBLE( MAX( M, N, NRHS ) ) *
00355      $          PDLAMCH( ICTXT, 'Epsilon' ) )
00356 *
00357       RETURN
00358 *
00359 *     End of PZQRT14
00360 *
00361       END