ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
psqrt14.f
Go to the documentation of this file.
00001       REAL             FUNCTION PSQRT14( TRANS, M, N, NRHS, A, IA, JA,
00002      $                                   DESCA, X, IX, JX, DESCX, WORK )
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          TRANS
00011       INTEGER            IA, IX, JA, JX, M, N, NRHS
00012 *     ..
00013 *     .. Array Arguments ..
00014       INTEGER            DESCA( * ), DESCX( * )
00015       REAL               A( * ), WORK( * ), X( * )
00016 *     ..
00017 *
00018 *  Purpose
00019 *  =======
00020 *
00021 *  PSQRT14 checks whether sub( X ) is in the row space of sub( A ) or
00022 *  sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and
00023 *  sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and
00024 *  X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise.  It does so by scaling both
00025 *  sub( X ) and sub( A ) such that their norms are in the range
00026 *  [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of
00027 *  [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of
00028 *  [sub( A ),sub( X )] otherwise, and returning the norm of the trailing
00029 *  triangle, scaled by MAX(M,N,NRHS)*eps.
00030 *
00031 *  Notes
00032 *  =====
00033 *
00034 *  Each global data object is described by an associated description
00035 *  vector.  This vector stores the information required to establish
00036 *  the mapping between an object element and its corresponding process
00037 *  and memory location.
00038 *
00039 *  Let A be a generic term for any 2D block cyclicly distributed array.
00040 *  Such a global array has an associated description vector DESCA.
00041 *  In the following comments, the character _ should be read as
00042 *  "of the global array".
00043 *
00044 *  NOTATION        STORED IN      EXPLANATION
00045 *  --------------- -------------- --------------------------------------
00046 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00047 *                                 DTYPE_A = 1.
00048 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00049 *                                 the BLACS process grid A is distribu-
00050 *                                 ted over. The context itself is glo-
00051 *                                 bal, but the handle (the integer
00052 *                                 value) may vary.
00053 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00054 *                                 array A.
00055 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00056 *                                 array A.
00057 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00058 *                                 the rows of the array.
00059 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00060 *                                 the columns of the array.
00061 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00062 *                                 row of the array A is distributed.
00063 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00064 *                                 first column of the array A is
00065 *                                 distributed.
00066 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00067 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00068 *
00069 *  Let K be the number of rows or columns of a distributed matrix,
00070 *  and assume that its process grid has dimension p x q.
00071 *  LOCr( K ) denotes the number of elements of K that a process
00072 *  would receive if K were distributed over the p processes of its
00073 *  process column.
00074 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00075 *  process would receive if K were distributed over the q processes of
00076 *  its process row.
00077 *  The values of LOCr() and LOCc() may be determined via a call to the
00078 *  ScaLAPACK tool function, NUMROC:
00079 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00080 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00081 *  An upper bound for these quantities may be computed by:
00082 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00083 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00084 *
00085 *  Arguments
00086 *  =========
00087 *
00088 *  TRANS   (global input) CHARACTER*1
00089 *          = 'N':  No transpose, check for sub( X ) in the row space of
00090 *                  sub( A ),
00091 *          = 'T':  Transpose, check for sub( X ) in row space of
00092 *                  sub( A )'.
00093 *
00094 *  M       (global input) INTEGER
00095 *          The number of rows to be operated on, i.e. the number of rows
00096 *          of the distributed submatrix sub( A ). M >= 0.
00097 *
00098 *  N       (global input) INTEGER
00099 *          The number of columns to be operated on, i.e. the number of
00100 *          columns of the distributed submatrix sub( A ). N >= 0.
00101 *
00102 *  NRHS    (global input) INTEGER
00103 *          The number of right hand sides, i.e., the number of columns
00104 *          of the distributed submatrix sub( X ). NRHS >= 0.
00105 *
00106 *  A       (local input) REAL pointer into the local memory
00107 *          to an array of dimension (LLD_A, LOCc(JA+N-1)). This array
00108 *          contains the local pieces of the M-by-N distributed matrix
00109 *          sub( A ).
00110 *
00111 *  IA      (global input) INTEGER
00112 *          The row index in the global array A indicating the first
00113 *          row of sub( A ).
00114 *
00115 *  JA      (global input) INTEGER
00116 *          The column index in the global array A indicating the
00117 *          first column of sub( A ).
00118 *
00119 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00120 *          The array descriptor for the distributed matrix A.
00121 *
00122 *  X       (local input) REAL pointer into the local
00123 *          memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
00124 *          On entry, this array contains the local pieces of the
00125 *          N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',
00126 *          and the M-by-NRHS distributed submatrix sub( X ) otherwise.
00127 *
00128 *  IX      (global input) INTEGER
00129 *          The row index in the global array X indicating the first
00130 *          row of sub( X ).
00131 *
00132 *  JX      (global input) INTEGER
00133 *          The column index in the global array X indicating the
00134 *          first column of sub( X ).
00135 *
00136 *  DESCX   (global and local input) INTEGER array of dimension DLEN_.
00137 *          The array descriptor for the distributed matrix X.
00138 *
00139 *  WORK    (local workspace) REAL array dimension (LWORK)
00140 *          If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and
00141 *          LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where
00142 *
00143 *          IF TRANS='N', (LQ fact)
00144 *            MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,
00145 *                             NPROW )
00146 *            LTAU   = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,
00147 *                             RSRC_A, NPROW )
00148 *            LWF    = MB_A * ( MB_A + MNRHSP + NQ0 )
00149 *          ELSE         (QR fact)
00150 *            NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,
00151 *                             NPCOL )
00152 *            LTAU   = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,
00153 *                             CSRC_A, NPCOL )
00154 *            LWF    = NB_A * ( NB_A + MP0 + NNRHSQ )
00155 *          END IF
00156 *
00157 *          and,
00158 *
00159 *          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
00160 *          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
00161 *          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
00162 *          MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
00163 *          NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).
00164 *
00165 *          INDXG2P and NUMROC are ScaLAPACK tool functions;
00166 *          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
00167 *          the subroutine BLACS_GRIDINFO.
00168 *
00169 *
00170 *  =====================================================================
00171 *
00172 *     .. Parameters ..
00173       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00174      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00175       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00176      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00177      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00178       REAL               ONE, ZERO
00179       PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
00180 *     ..
00181 *     .. Local Scalars ..
00182       LOGICAL            TPSD
00183       INTEGER            IACOL, IAROW, ICOFFA, ICTXT, IDUM, IIA, INFO,
00184      $                   IPTAU, IPW, IPWA, IROFFA, IWA, IWX, J, JJA,
00185      $                   JWA, JWX, LDW, LWORK, MPWA, MPW, MQW, MYCOL,
00186      $                   MYROW, NPCOL, NPROW, NPW, NQWA, NQW
00187       REAL               AMAX, ANRM, ERR, XNRM
00188 *     ..
00189 *     .. Local Arrays ..
00190       INTEGER            DESCW( DLEN_ ), IDUM1( 1 ), IDUM2( 1 )
00191       REAL               RWORK( 1 )
00192 *     ..
00193 *     .. External Functions ..
00194       LOGICAL            LSAME
00195       INTEGER            NUMROC
00196       REAL               PSLANGE, PSLAMCH
00197       EXTERNAL           LSAME, NUMROC, PSLANGE, PSLAMCH
00198 *     ..
00199 *     .. External Subroutines ..
00200       EXTERNAL           BLACS_GRIDINFO, DESCSET, INFOG2L, PSAMAX,
00201      $                   PSCOPY, PSGELQF, PSGEQRF, PSLACPY,
00202      $                   PSLASCL, PXERBLA, SGAMX2D
00203 *     ..
00204 *     .. Intrinsic Functions ..
00205       INTRINSIC          ABS, MAX, MIN, MOD, REAL
00206 *     ..
00207 *     .. Executable Statements ..
00208 *
00209 *     Get grid parameters
00210 *
00211       ICTXT = DESCA( CTXT_ )
00212       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00213 *
00214       PSQRT14 = ZERO
00215 *
00216       IPWA = 1
00217       IROFFA = MOD( IA-1, DESCA( MB_ ) )
00218       ICOFFA = MOD( JA-1, DESCA( NB_ ) )
00219       IWA = IROFFA + 1
00220       JWA = ICOFFA + 1
00221       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA,
00222      $              JJA, IAROW, IACOL )
00223       MPWA = NUMROC( M+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
00224       NQWA = NUMROC( N+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
00225 *
00226       INFO = 0
00227       IF( LSAME( TRANS, 'N' ) ) THEN
00228          IF( N.LE.0 .OR. NRHS.LE.0 )
00229      $      RETURN
00230          TPSD = .FALSE.
00231          MPW = NUMROC( M+NRHS+IROFFA, DESCA( MB_ ), MYROW, IAROW,
00232      $                 NPROW )
00233          NQW = NQWA
00234 *
00235 *        Assign descriptor DESCW for workspace WORK and pointers to
00236 *        matrices sub( A ) and sub( X ) in workspace
00237 *
00238          IWX = IWA + M
00239          JWX = JWA
00240          LDW = MAX( 1, MPW )
00241          CALL DESCSET( DESCW, M+NRHS+IROFFA, N+ICOFFA, DESCA( MB_ ),
00242      $                 DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
00243 *
00244       ELSE IF( LSAME( TRANS, 'T' ) ) THEN
00245          IF( M.LE.0 .OR. NRHS.LE.0 )
00246      $      RETURN
00247          TPSD = .TRUE.
00248          MPW = MPWA
00249          NQW = NUMROC( N+NRHS+ICOFFA, DESCA( NB_ ), MYCOL, IACOL,
00250      $                 NPCOL )
00251 *
00252 *        Assign descriptor DESCW for workspace WORK and pointers to
00253 *        matrices sub( A ) and sub( X ) in workspace
00254 *
00255          IWX = IWA
00256          JWX = JWA + N
00257          LDW = MAX( 1, MPW )
00258          CALL DESCSET( DESCW, M+IROFFA, N+NRHS+ICOFFA, DESCA( MB_ ),
00259      $                 DESCA( NB_ ), IAROW, IACOL, ICTXT, LDW )
00260       ELSE
00261          CALL PXERBLA( ICTXT, 'PSQRT14', -1 )
00262          RETURN
00263       END IF
00264 *
00265 *     Copy and scale sub( A )
00266 *
00267       IPTAU = IPWA + MPW*NQW
00268       CALL PSLACPY( 'All', M, N, A, IA, JA, DESCA, WORK( IPWA ), IWA,
00269      $              JWA, DESCW )
00270       RWORK( 1 ) = ZERO
00271       ANRM = PSLANGE( 'M', M, N, WORK( IPWA ), IWA, JWA, DESCW, RWORK )
00272       IF( ANRM.NE.ZERO )
00273      $   CALL PSLASCL( 'G', ANRM, ONE, M, N, WORK( IPWA ), IWA,
00274      $                 JWA, DESCW, INFO )
00275 *
00276 *     Copy sub( X ) or sub( X )' into the right place and scale it
00277 *
00278       IF( TPSD ) THEN
00279 *
00280 *        Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work
00281 *
00282          DO 10 J = 1, NRHS
00283             CALL PSCOPY( M, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ), IWX,
00284      $                   JWX+J-1, DESCW, 1 )
00285    10    CONTINUE
00286          XNRM = PSLANGE( 'M', M, NRHS, WORK( IPWA ), IWX, JWX, DESCW,
00287      $                   RWORK )
00288          IF( XNRM.NE.ZERO )
00289      $      CALL PSLASCL( 'G', XNRM, ONE, M, NRHS, WORK( IPWA ), IWX,
00290      $                    JWX, DESCW, INFO )
00291 *
00292 *        Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)
00293 *
00294          MQW = NUMROC( M+ICOFFA, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
00295          IPW = IPTAU + MIN( MQW, NQW )
00296          LWORK = DESCW( NB_ ) * ( MPW + NQW + DESCW( NB_ ) )
00297          CALL PSGEQRF( M, N+NRHS, WORK( IPWA ), IWA, JWA, DESCW,
00298      $                WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
00299 *
00300 *        Compute largest entry in upper triangle of
00301 *        work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)
00302 *
00303          ERR = ZERO
00304          IF( N.LT.M ) THEN
00305             DO 20 J = JWX, JWA+N+NRHS-1
00306                CALL PSAMAX( MIN(M-N,J-JWX+1), AMAX, IDUM, WORK( IPWA ),
00307      $                      IWA+N, J, DESCW, 1 )
00308                ERR = MAX( ERR, ABS( AMAX ) )
00309    20       CONTINUE
00310          END IF
00311          CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
00312      $                 -1, -1, 0 )
00313 *
00314       ELSE
00315 *
00316 *        Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work
00317 *
00318          DO 30 J = 1, NRHS
00319             CALL PSCOPY( N, X, IX, JX+J-1, DESCX, 1, WORK( IPWA ),
00320      $                   IWX+J-1, JWX, DESCW, DESCW( M_ ) )
00321    30    CONTINUE
00322 *
00323          XNRM = PSLANGE( 'M', NRHS, N, WORK( IPWA ), IWX, JWX, DESCW,
00324      $                   RWORK )
00325          IF( XNRM.NE.ZERO )
00326      $      CALL PSLASCL( 'G', XNRM, ONE, NRHS, N, WORK( IPWA ), IWX,
00327      $                    JWX, DESCW, INFO )
00328 *
00329 *        Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)
00330 *
00331          NPW = NUMROC( N+IROFFA, DESCA( MB_ ), MYROW, IAROW, NPROW )
00332          IPW = IPTAU + MIN( MPW, NPW )
00333          LWORK = DESCW( MB_ ) * ( MPW + NQW + DESCW( MB_ ) )
00334          CALL PSGELQF( M+NRHS, N, WORK( IPWA ), IWA, JWA, DESCW,
00335      $                 WORK( IPTAU ), WORK( IPW ), LWORK, INFO )
00336 *
00337 *        Compute largest entry in lower triangle in
00338 *        work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)
00339 *
00340          ERR = ZERO
00341          DO 40 J = JWA+M, MIN( JWA+N-1, JWA+M+NRHS-1 )
00342             CALL PSAMAX( JWA+M+NRHS-J, AMAX, IDUM, WORK( IPWA ),
00343      $                   IWX+J-JWA-M, J, DESCW, 1 )
00344             ERR = MAX( ERR, ABS( AMAX ) )
00345    40    CONTINUE
00346          CALL SGAMX2D( ICTXT, 'All', ' ', 1, 1, ERR, 1, IDUM1, IDUM2,
00347      $                 -1, -1, 0 )
00348 *
00349       END IF
00350 *
00351       PSQRT14 = ERR / ( REAL( MAX( M, N, NRHS ) ) *
00352      $          PSLAMCH( ICTXT, 'Epsilon' ) )
00353 *
00354       RETURN
00355 *
00356 *     End of PSQRT14
00357 *
00358       END