ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
psgeequ.f
Go to the documentation of this file.
00001       SUBROUTINE PSGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND,
00002      $                    AMAX, INFO )
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       INTEGER            IA, INFO, JA, M, N
00011       REAL               AMAX, COLCND, ROWCND
00012 *     ..
00013 *     .. Array Arguments ..
00014       INTEGER            DESCA( * )
00015       REAL               A( * ), C( * ), R( * )
00016 *     ..
00017 *
00018 *  Purpose
00019 *  =======
00020 *
00021 *  PSGEEQU computes row and column scalings intended to equilibrate an
00022 *  M-by-N distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA:JA+N-1) and
00023 *  reduce its condition number.  R returns the row scale factors and C
00024 *  the column scale factors, chosen to try to make the largest entry in
00025 *  each row and column of the distributed matrix B with elements
00026 *  B(i,j) = R(i) * A(i,j) * C(j) have absolute value 1.
00027 *
00028 *  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
00029 *  number and BIGNUM = largest safe number.  Use of these scaling
00030 *  factors is not guaranteed to reduce the condition number of
00031 *  sub( A ) but works well in practice.
00032 *
00033 *  Notes
00034 *  =====
00035 *
00036 *  Each global data object is described by an associated description
00037 *  vector.  This vector stores the information required to establish
00038 *  the mapping between an object element and its corresponding process
00039 *  and memory location.
00040 *
00041 *  Let A be a generic term for any 2D block cyclicly distributed array.
00042 *  Such a global array has an associated description vector DESCA.
00043 *  In the following comments, the character _ should be read as
00044 *  "of the global array".
00045 *
00046 *  NOTATION        STORED IN      EXPLANATION
00047 *  --------------- -------------- --------------------------------------
00048 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00049 *                                 DTYPE_A = 1.
00050 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00051 *                                 the BLACS process grid A is distribu-
00052 *                                 ted over. The context itself is glo-
00053 *                                 bal, but the handle (the integer
00054 *                                 value) may vary.
00055 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00056 *                                 array A.
00057 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00058 *                                 array A.
00059 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00060 *                                 the rows of the array.
00061 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00062 *                                 the columns of the array.
00063 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00064 *                                 row of the array A is distributed.
00065 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00066 *                                 first column of the array A is
00067 *                                 distributed.
00068 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00069 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00070 *
00071 *  Let K be the number of rows or columns of a distributed matrix,
00072 *  and assume that its process grid has dimension p x q.
00073 *  LOCr( K ) denotes the number of elements of K that a process
00074 *  would receive if K were distributed over the p processes of its
00075 *  process column.
00076 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00077 *  process would receive if K were distributed over the q processes of
00078 *  its process row.
00079 *  The values of LOCr() and LOCc() may be determined via a call to the
00080 *  ScaLAPACK tool function, NUMROC:
00081 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00082 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00083 *  An upper bound for these quantities may be computed by:
00084 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00085 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00086 *
00087 *  Arguments
00088 *  =========
00089 *
00090 *  M       (global input) INTEGER
00091 *          The number of rows to be operated on i.e the number of rows
00092 *          of the distributed submatrix sub( A ). M >= 0.
00093 *
00094 *  N       (global input) INTEGER
00095 *          The number of columns to be operated on i.e the number of
00096 *          columns of the distributed submatrix sub( A ). N >= 0.
00097 *
00098 *  A       (local input) REAL pointer into the local memory
00099 *          to an array of dimension ( LLD_A, LOCc(JA+N-1) ), the
00100 *          local pieces of the M-by-N distributed matrix whose
00101 *          equilibration factors are to be computed.
00102 *
00103 *  IA      (global input) INTEGER
00104 *          The row index in the global array A indicating the first
00105 *          row of sub( A ).
00106 *
00107 *  JA      (global input) INTEGER
00108 *          The column index in the global array A indicating the
00109 *          first column of sub( A ).
00110 *
00111 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00112 *          The array descriptor for the distributed matrix A.
00113 *
00114 *  R       (local output) REAL array, dimension LOCr(M_A)
00115 *          If INFO = 0 or INFO > IA+M-1, R(IA:IA+M-1) contains the row
00116 *          scale factors for sub( A ). R is aligned with the distributed
00117 *          matrix A, and replicated across every process column. R is
00118 *          tied to the distributed matrix A.
00119 *
00120 *  C       (local output) REAL array, dimension LOCc(N_A)
00121 *          If INFO = 0,  C(JA:JA+N-1) contains the column scale factors
00122 *          for sub( A ). C is aligned with the distributed matrix A, and
00123 *          replicated down every process row. C is tied to the distri-
00124 *          buted matrix A.
00125 *
00126 *  ROWCND  (global output) REAL
00127 *          If INFO = 0 or INFO > IA+M-1, ROWCND contains the ratio of
00128 *          the smallest R(i) to the largest R(i) (IA <= i <= IA+M-1).
00129 *          If ROWCND >= 0.1 and AMAX is neither too large nor too small,
00130 *          it is not worth scaling by R(IA:IA+M-1).
00131 *
00132 *  COLCND  (global output) REAL
00133 *          If INFO = 0, COLCND contains the ratio of the smallest C(j)
00134 *          to the largest C(j) (JA <= j <= JA+N-1). If COLCND >= 0.1, it
00135 *          is not worth scaling by C(JA:JA+N-1).
00136 *
00137 *  AMAX    (global output) REAL
00138 *          Absolute value of largest distributed matrix element.  If
00139 *          AMAX is very close to overflow or very close to underflow,
00140 *          the matrix should be scaled.
00141 *
00142 *  INFO    (global output) INTEGER
00143 *          = 0:  successful exit
00144 *          < 0:  If the i-th argument is an array and the j-entry had
00145 *                an illegal value, then INFO = -(i*100+j), if the i-th
00146 *                argument is a scalar and had an illegal value, then
00147 *                INFO = -i.
00148 *          > 0:  If INFO = i,  and i is
00149 *                <= M:  the i-th row of the distributed matrix sub( A )
00150 *                       is exactly zero,
00151 *                >  M:  the (i-M)-th column of the distributed
00152 *                       matrix sub( A ) is exactly zero.
00153 *
00154 *  =====================================================================
00155 *
00156 *     .. Parameters ..
00157       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00158      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00159       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00160      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00161      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00162       REAL               ONE, ZERO
00163       PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
00164 *     ..
00165 *     .. Local Scalars ..
00166       CHARACTER          COLCTOP, ROWCTOP
00167       INTEGER            I, IACOL, IAROW, ICOFF, ICTXT, IDUMM, IIA,
00168      $                   IOFFA, IROFF, J, JJA, LDA, MP, MYCOL, MYROW,
00169      $                   NPCOL, NPROW, NQ
00170       REAL               BIGNUM, RCMAX, RCMIN, SMLNUM
00171 *     ..
00172 *     .. Local Arrays ..
00173       INTEGER            DESCC( DLEN_ ), DESCR( DLEN_ )
00174 *     ..
00175 *     .. External Subroutines ..
00176       EXTERNAL           BLACS_GRIDINFO, CHK1MAT, DESCSET, IGAMX2D,
00177      $                   INFOG2L, PCHK1MAT, PB_TOPGET, PXERBLA, SGAMN2D,
00178      $                   SGAMX2D
00179 *     ..
00180 *     .. External Functions ..
00181       INTEGER            INDXL2G, NUMROC
00182       REAL               PSLAMCH
00183       EXTERNAL           INDXL2G, NUMROC, PSLAMCH
00184 *     ..
00185 *     .. Intrinsic Functions ..
00186       INTRINSIC          ABS, MAX, MIN, MOD
00187 *     ..
00188 *     .. Executable Statements ..
00189 *
00190 *     Get grid parameters
00191 *
00192       ICTXT = DESCA( CTXT_ )
00193       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00194 *
00195 *     Test the input parameters.
00196 *
00197       INFO = 0
00198       IF( NPROW.EQ.-1 ) THEN
00199          INFO = -(600+CTXT_)
00200       ELSE
00201          CALL CHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, INFO )
00202          CALL PCHK1MAT( M, 1, N, 2, IA, JA, DESCA, 6, 0, IDUMM, IDUMM,
00203      $                  INFO )
00204       END IF
00205 *
00206       IF( INFO.NE.0 ) THEN
00207          CALL PXERBLA( ICTXT, 'PSGEEQU', -INFO )
00208          RETURN
00209       END IF
00210 *
00211 *     Quick return if possible
00212 *
00213       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
00214          ROWCND = ONE
00215          COLCND = ONE
00216          AMAX = ZERO
00217          RETURN
00218       END IF
00219 *
00220       CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP )
00221       CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP )
00222 *
00223 *     Get machine constants and local indexes.
00224 *
00225       SMLNUM = PSLAMCH( ICTXT, 'S' )
00226       BIGNUM = ONE / SMLNUM
00227       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
00228      $              IAROW, IACOL )
00229       IROFF = MOD( IA-1, DESCA( MB_ ) )
00230       ICOFF = MOD( JA-1, DESCA( NB_ ) )
00231       MP = NUMROC( M+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW )
00232       NQ = NUMROC( N+ICOFF, DESCA( NB_ ), MYCOL, IACOL, NPCOL )
00233       IF( MYROW.EQ.IAROW )
00234      $   MP = MP - IROFF
00235       IF( MYCOL.EQ.IACOL )
00236      $   NQ = NQ - ICOFF
00237       LDA = DESCA( LLD_ )
00238 *
00239 *     Assign descriptors for R and C arrays
00240 *
00241       CALL DESCSET( DESCR, M, 1, DESCA( MB_ ), 1, 0, 0, ICTXT,
00242      $               MAX( 1, MP ) )
00243       CALL DESCSET( DESCC, 1, N, 1, DESCA( NB_ ), 0, 0, ICTXT, 1 )
00244 *
00245 *     Compute row scale factors.
00246 *
00247       DO 10 I = IIA, IIA+MP-1
00248          R( I ) = ZERO
00249    10 CONTINUE
00250 *
00251 *     Find the maximum element in each row.
00252 *
00253       IOFFA = (JJA-1)*LDA
00254       DO 30 J = JJA, JJA+NQ-1
00255          DO 20 I = IIA, IIA+MP-1
00256             R( I ) = MAX( R( I ), ABS( A( IOFFA + I ) ) )
00257    20    CONTINUE
00258          IOFFA = IOFFA + LDA
00259    30 CONTINUE
00260       CALL SGAMX2D( ICTXT, 'Rowwise', ROWCTOP, MP, 1, R( IIA ),
00261      $              MAX( 1, MP ), IDUMM, IDUMM, -1, -1, MYCOL )
00262 *
00263 *     Find the maximum and minimum scale factors.
00264 *
00265       RCMIN = BIGNUM
00266       RCMAX = ZERO
00267       DO 40 I = IIA, IIA+MP-1
00268          RCMAX = MAX( RCMAX, R( I ) )
00269          RCMIN = MIN( RCMIN, R( I ) )
00270    40 CONTINUE
00271       CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM,
00272      $              IDUMM, -1, -1, MYCOL )
00273       CALL SGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM,
00274      $              IDUMM, -1, -1, MYCOL )
00275       AMAX = RCMAX
00276 *
00277       IF( RCMIN.EQ.ZERO ) THEN
00278 *
00279 *        Find the first zero scale factor and return an error code.
00280 *
00281          DO 50 I = IIA, IIA+MP-1
00282             IF( R( I ).EQ.ZERO .AND. INFO.EQ.0 )
00283      $         INFO = INDXL2G( I, DESCA( MB_ ), MYROW, DESCA( RSRC_ ),
00284      $                NPROW ) - IA + 1
00285    50    CONTINUE
00286          CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1,
00287      $                 IDUMM, IDUMM, -1, -1, MYCOL )
00288          IF( INFO.NE.0 )
00289      $      RETURN
00290       ELSE
00291 *
00292 *        Invert the scale factors.
00293 *
00294          DO 60 I = IIA, IIA+MP-1
00295             R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
00296    60    CONTINUE
00297 *
00298 *        Compute ROWCND = min(R(I)) / max(R(I))
00299 *
00300          ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
00301 *
00302       END IF
00303 *
00304 *     Compute column scale factors
00305 *
00306       DO 70 J = JJA, JJA+NQ-1
00307          C( J ) = ZERO
00308    70 CONTINUE
00309 *
00310 *     Find the maximum element in each column,
00311 *     assuming the row scaling computed above.
00312 *
00313       IOFFA = (JJA-1)*LDA
00314       DO 90 J = JJA, JJA+NQ-1
00315          DO 80 I = IIA, IIA+MP-1
00316             C( J ) = MAX( C( J ), ABS( A( IOFFA + I ) )*R( I ) )
00317    80    CONTINUE
00318          IOFFA = IOFFA + LDA
00319    90 CONTINUE
00320       CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, NQ, C( JJA ),
00321      $              1, IDUMM, IDUMM, -1, -1, MYCOL )
00322 *
00323 *     Find the maximum and minimum scale factors.
00324 *
00325       RCMIN = BIGNUM
00326       RCMAX = ZERO
00327       DO 100 J = JJA, JJA+NQ-1
00328          RCMIN = MIN( RCMIN, C( J ) )
00329          RCMAX = MAX( RCMAX, C( J ) )
00330   100 CONTINUE
00331       CALL SGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMAX, 1, IDUMM,
00332      $              IDUMM, -1, -1, MYCOL )
00333       CALL SGAMN2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, RCMIN, 1, IDUMM,
00334      $              IDUMM, -1, -1, MYCOL )
00335 *
00336       IF( RCMIN.EQ.ZERO ) THEN
00337 *
00338 *        Find the first zero scale factor and return an error code.
00339 *
00340          DO 110 J = JJA, JJA+NQ-1
00341             IF( C( J ).EQ.ZERO .AND. INFO.EQ.0 )
00342      $         INFO = M + INDXL2G( J, DESCA( NB_ ), MYCOL,
00343      $                DESCA( CSRC_ ), NPCOL ) - JA + 1
00344   110    CONTINUE
00345          CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1,
00346      $                 IDUMM, IDUMM, -1, -1, MYCOL )
00347          IF( INFO.NE.0 )
00348      $      RETURN
00349       ELSE
00350 *
00351 *        Invert the scale factors.
00352 *
00353          DO 120 J = JJA, JJA+NQ-1
00354             C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
00355   120    CONTINUE
00356 *
00357 *        Compute COLCND = min(C(J)) / max(C(J))
00358 *
00359          COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
00360 *
00361       END IF
00362 *
00363       RETURN
00364 *
00365 *     End of PSGEEQU
00366 *
00367       END