d9/db4/pclaschk_8f_source.html

      SUBROUTINE pclaschk( SYMM, DIAG, N, NRHS, X, IX, JX, DESCX,

     $                     IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID,

     $                     WORK )

*

*  -- ScaLAPACK auxiliary routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 1, 1997

*

*     .. Scalar Arguments ..

      CHARACTER          DIAG, SYMM

      INTEGER            IA, IASEED, IBSEED, IX, JA, JX, N, NRHS

      REAL               ANORM, RESID

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * ), DESCX( * )

      COMPLEX            WORK( * ), X( * )

*     ..

*

*  Purpose

*  =======

*

*  PCLASCHK computes the residual

*  || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N)

*  to check the accuracy of the factorization and solve steps in the

*  LU and Cholesky decompositions, where sub( A ) denotes

*  A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1).

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  SYMM      (global input) CHARACTER

*          if SYMM = 'H', sub( A ) is a hermitian distributed matrix,

*          otherwise sub( A ) is a general distributed matrix.

*

*  DIAG    (global input) CHARACTER

*          If DIAG = 'D', sub( A ) is diagonally dominant.

*

*  N       (global input) INTEGER

*          The number of columns to be operated on, i.e. the number of

*          columns of the distributed submatrix sub( A ). N >= 0.

*

*  NRHS    (global input) INTEGER

*          The number of right-hand-sides, i.e the number of columns

*          of the distributed matrix sub( X ). NRHS >= 0.

*

*  X       (local input) COMPLEX pointer into the local memory

*          to an array of dimension (LLD_X,LOCc(JX+NRHS-1). This array

*          contains the local pieces of the answer vector(s) sub( X ) of

*          sub( A ) sub( X ) - B, split up over a column of processes.

*

*  IX      (global input) INTEGER

*          The row index in the global array X indicating the first

*          row of sub( X ).

*

*  JX      (global input) INTEGER

*          The column index in the global array X indicating the

*          first column of sub( X ).

*

*  DESCX   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix X.

*

*  IASEED  (global input) INTEGER

*          The seed number to generate the original matrix Ao.

*

*  IA      (global input) INTEGER

*          The row index in the global array A indicating the first

*          row of sub( A ).

*

*  JA      (global input) INTEGER

*          The column index in the global array A indicating the

*          first column of sub( A ).

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  IBSEED  (global input) INTEGER

*          The seed number to generate the original matrix B.

*

*  ANORM   (global input) REAL

*          The 1-norm or infinity norm of the distributed matrix

*          sub( A ).

*

*  RESID   (global output) REAL

*          The residual error:

*          ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N).

*

*  WORK    (local workspace) COMPLEX array, dimension (LWORK)

*          LWORK >= MAX(1,Np)*NB_X + Nq*NB_X + MAX( MAX(NQ*MB_A,2*NB_X),

*          NB_X * NUMROC( NUMROC(N,MB_X,0,0,NPCOL), MB_X, 0, 0, LCMQ ) )

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      COMPLEX            ZERO, ONE

      PARAMETER          ( ONE = ( 1.0e+0, 0.0e+0 ),

     $                     zero = ( 0.0e+0, 0.0e+0 ) )

*     ..

*     .. Local Scalars ..

      INTEGER            IACOL, IAROW, IB, ICOFF, ICTXT, ICURCOL, IDUMM,

     $                   II, IIA, IIX, IOFFX, IPA, IPB, IPW, IPX, IROFF,

     $                   ixcol, ixrow, j, jbrhs, jj, jja, jjx, ldx,

     $                   mycol, myrow, np, npcol, nprow, nq

      REAL               DIVISOR, EPS, RESID1

      COMPLEX            BETA

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, cgamx2d, cgemm, cgsum2d,

     $                   claset, pbctran, pcmatgen, sgebr2d,

     $                   sgebs2d, sgerv2d, sgesd2d

*     ..

*     .. External Functions ..

      INTEGER            ICAMAX, NUMROC

      REAL               PSLAMCH

      EXTERNAL           icamax, numroc, pslamch

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, min, mod, real

*     ..

*     .. Executable Statements ..

*

*     Get needed initial parameters

*

      ictxt = desca( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

      eps = pslamch( ictxt, 'eps' )

      resid = 0.0e+0

      divisor = anorm * eps * real( n )

*

      CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,

     $              iarow, iacol )

      CALL infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix, jjx,

     $              ixrow, ixcol )

      iroff = mod( ia-1, desca( mb_ ) )

      icoff = mod( ja-1, desca( nb_ ) )

      np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )

      nq = numroc( n+icoff, desca( nb_ ), mycol, iacol, npcol )

*

      ldx = max( 1, np )

      ipb = 1

      ipx = ipb + np * descx( nb_ )

      ipa = ipx + nq * descx( nb_ )

*

      IF( myrow.EQ.iarow )

     $   np = np - iroff

      IF( mycol.EQ.iacol )

     $   nq = nq - icoff

*

      icurcol = ixcol

*

*     Loop over the rhs

*

      DO 40 j = 1, nrhs, descx( nb_ )

         jbrhs = min( descx( nb_ ), nrhs-j+1 )

*

*        Transpose x from ICURCOL to all rows

*

         ioffx = iix + ( jjx - 1 ) * descx( lld_ )

         CALL pbctran( ictxt, 'Column', 'Transpose', n, jbrhs,

     $              descx( mb_ ), x( ioffx ), descx( lld_ ), zero,

     $              work( ipx ), jbrhs, ixrow, icurcol, -1, iacol,

     $              work( ipa ) )

*

*        Regenerate B in IXCOL

*

         IF( mycol.EQ.icurcol ) THEN

            CALL pcmatgen( ictxt, 'N', 'N', descx( m_ ), descx( n_ ),

     $                     descx( mb_ ), descx( nb_ ), work( ipb ), ldx,

     $                     ixrow, ixcol, ibseed, iix-1, np, jjx-1,

     $                     jbrhs, myrow, mycol, nprow, npcol )

            beta = one

         ELSE

            beta = zero

         END IF

*

         IF( nq.GT.0 ) THEN

            DO 10 ii = iia, iia+np-1, desca( mb_ )

               ib = min( desca( mb_ ), iia+np-ii )

*

*              Regenerate ib rows of the matrix A(IA:IA+N-1,JA:JA+N-1).

*

               CALL pcmatgen( ictxt, symm, diag, desca( m_ ),

     $                        desca( n_ ), desca( mb_ ), desca( nb_ ),

     $                        work( ipa ), ib, desca( rsrc_ ),

     $                        desca( csrc_ ), iaseed, ii-1, ib,

     $                        jja-1, nq, myrow, mycol, nprow, npcol )

*

*              Compute B <= B - A * X.

*

               CALL cgemm( 'No transpose', 'Transpose', ib, jbrhs, nq,

     $                     -one, work( ipa ), ib, work( ipx ), jbrhs,

     $                     beta, work( ipb+ii-iia ), ldx )

*

   10       CONTINUE

*

         ELSE IF( mycol.NE.icurcol ) THEN

*

            CALL claset( 'All', np, jbrhs, zero, zero, work( ipb ),

     $                   ldx )

*

         END IF

*

*        Add B rowwise to ICURCOL

*

         CALL cgsum2d( ictxt, 'Row', ' ', np, jbrhs, work( ipb ), ldx,

     $                 myrow, icurcol )

*

         IF( mycol.EQ.icurcol ) THEN

*

*           Figure || A * X - B || & || X ||

*

            ipw = ipa + jbrhs

            DO 20 jj = 0, jbrhs - 1

               IF( np.GT.0 ) THEN

                  ii = icamax( np, work( ipb+jj*ldx ), 1 )

                  work( ipa+jj ) = abs( work( ipb+ii-1+jj*ldx ) )

                  work( ipw+jj ) = abs( x( ioffx + icamax( np,

     $            x( ioffx + jj*descx( lld_ ) ), 1 )-1+jj*

     $            descx( lld_ ) ) )

               ELSE

                  work( ipa+jj ) = zero

                  work( ipw+jj ) = zero

               END IF

   20       CONTINUE

*

*           After CGAMX2D computation,

*              WORK(IPB) has the maximum of || Ax - b ||, and

*              WORK(IPX) has the maximum of || X ||.

*

            CALL cgamx2d( ictxt, 'Column', ' ', 1, 2*jbrhs,

     $                    work( ipa ), 1, idumm, idumm, -1, 0, icurcol )

*

*           Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N)

*

            IF( myrow.EQ.0 ) THEN

               DO 30 jj = 0, jbrhs - 1

                  resid1 = real( work( ipa+jj ) ) /

     $                     ( real( work( ipw+jj ) )*divisor )

                  IF( resid.LT.resid1 )

     $               resid = resid1

   30          CONTINUE

               IF( mycol.NE.0 )

     $            CALL sgesd2d( ictxt, 1, 1, resid, 1, 0, 0 )

            END IF

*

         ELSE IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN

*

            CALL sgerv2d( ictxt, 1, 1, resid1, 1, 0, icurcol )

            IF( resid.LT.resid1 )

     $         resid = resid1

*

         END IF

*

         IF( mycol.EQ.icurcol )

     $      jjx = jjx + jbrhs

         icurcol = mod( icurcol+1, npcol )

*

   40 CONTINUE

*

      IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN

         CALL sgebs2d( ictxt, 'All', ' ', 1, 1, resid, 1 )

      ELSE

         CALL sgebr2d( ictxt, 'All', ' ', 1, 1, resid, 1, 0, 0 )

      END IF

*

      RETURN

*

*     End of PCLASCHK

*

      END