d9/d49/psqrt14_8f_source.html

      REAL             FUNCTION PSQRT14( TRANS, M, N, NRHS, A, IA, JA,

     $                                   DESCA, X, IX, JX, DESCX, WORK )

*

*  -- ScaLAPACK routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 1, 1997

*

*     .. Scalar Arguments ..

      CHARACTER          trans

      INTEGER            ia, ix, ja, jx, m, n, nrhs

*     ..

*     .. Array Arguments ..

      INTEGER            desca( * ), descx( * )

      REAL               a( * ), work( * ), x( * )

*     ..

*

*  Purpose

*  =======

*

*  PSQRT14 checks whether sub( X ) is in the row space of sub( A ) or

*  sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and

*  sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and

*  X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise.  It does so by scaling both

*  sub( X ) and sub( A ) such that their norms are in the range

*  [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of

*  [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of

*  [sub( A ),sub( X )] otherwise, and returning the norm of the trailing

*  triangle, scaled by MAX(M,N,NRHS)*eps.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  TRANS   (global input) CHARACTER*1

*          = 'N':  No transpose, check for sub( X ) in the row space of

*                  sub( A ),

*          = 'T':  Transpose, check for sub( X ) in row space of

*                  sub( A )'.

*

*  M       (global input) INTEGER

*          The number of rows to be operated on, i.e. the number of rows

*          of the distributed submatrix sub( A ). M >= 0.

*

*  N       (global input) INTEGER

*          The number of columns to be operated on, i.e. the number of

*          columns of the distributed submatrix sub( A ). N >= 0.

*

*  NRHS    (global input) INTEGER

*          The number of right hand sides, i.e., the number of columns

*          of the distributed submatrix sub( X ). NRHS >= 0.

*

*  A       (local input) REAL pointer into the local memory

*          to an array of dimension (LLD_A, LOCc(JA+N-1)). This array

*          contains the local pieces of the M-by-N distributed matrix

*          sub( A ).

*

*  IA      (global input) INTEGER

*          The row index in the global array A indicating the first

*          row of sub( A ).

*

*  JA      (global input) INTEGER

*          The column index in the global array A indicating the

*          first column of sub( A ).

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  X       (local input) REAL pointer into the local

*          memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).

*          On entry, this array contains the local pieces of the

*          N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',

*          and the M-by-NRHS distributed submatrix sub( X ) otherwise.

*

*  IX      (global input) INTEGER

*          The row index in the global array X indicating the first

*          row of sub( X ).

*

*  JX      (global input) INTEGER

*          The column index in the global array X indicating the

*          first column of sub( X ).

*

*  DESCX   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix X.

*

*  WORK    (local workspace) REAL array dimension (LWORK)

*          If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and

*          LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where

*

*          IF TRANS='N', (LQ fact)

*            MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,

*                             NPROW )

*            LTAU   = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,

*                             RSRC_A, NPROW )

*            LWF    = MB_A * ( MB_A + MNRHSP + NQ0 )

*          ELSE         (QR fact)

*            NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,

*                             NPCOL )

*            LTAU   = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,

*                             CSRC_A, NPCOL )

*            LWF    = NB_A * ( NB_A + MP0 + NNRHSQ )

*          END IF

*

*          and,

*

*          IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),

*          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),

*          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),

*          MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),

*          NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).

*

*          INDXG2P and NUMROC are ScaLAPACK tool functions;

*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling

*          the subroutine BLACS_GRIDINFO.

*

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      REAL               one, zero

      parameter( zero = 0.0e+0, one = 1.0e+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            tpsd

      INTEGER            iacol, iarow, icoffa, ictxt, idum, iia, info,

     $                   iptau, ipw, ipwa, iroffa, iwa, iwx, j, jja,

     $                   jwa, jwx, ldw, lwork, mpwa, mpw, mqw, mycol,

     $                   myrow, npcol, nprow, npw, nqwa, nqw

      REAL               amax, anrm, err, xnrm

*     ..

*     .. Local Arrays ..

      INTEGER            descw( dlen_ ), idum1( 1 ), idum2( 1 )

      REAL               rwork( 1 )

*     ..

*     .. External Functions ..

      LOGICAL            lsame

      INTEGER            numroc

      REAL               pslange, pslamch

      EXTERNAL           lsame, numroc, pslange, pslamch

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, descset, infog2l, psamax,

     $                   pscopy, psgelqf, psgeqrf, pslacpy,

     $                   pslascl, pxerbla, sgamx2d

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, min, mod, real

*     ..

*     .. Executable Statements ..

*

*     Get grid parameters

*

      ictxt = desca( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

      psqrt14 = zero

*

      ipwa = 1

      iroffa = mod( ia-1, desca( mb_ ) )

      icoffa = mod( ja-1, desca( nb_ ) )

      iwa = iroffa + 1

      jwa = icoffa + 1

      CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,

     $              jja, iarow, iacol )

      mpwa = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )

      nqwa = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )

*

      info = 0

      IF( lsame( trans, 'N' ) ) THEN

         IF( n.LE.0 .OR. nrhs.LE.0 )

     $      RETURN

         tpsd = .false.

         mpw = numroc( m+nrhs+iroffa, desca( mb_ ), myrow, iarow,

     $                 nprow )

         nqw = nqwa

*

*        Assign descriptor DESCW for workspace WORK and pointers to

*        matrices sub( A ) and sub( X ) in workspace

*

         iwx = iwa + m

         jwx = jwa

         ldw = max( 1, mpw )

         CALL descset( descw, m+nrhs+iroffa, n+icoffa, desca( mb_ ),

     $                 desca( nb_ ), iarow, iacol, ictxt, ldw )

*

      ELSE IF( lsame( trans, 'T' ) ) THEN

         IF( m.LE.0 .OR. nrhs.LE.0 )

     $      RETURN

         tpsd = .true.

         mpw = mpwa

         nqw = numroc( n+nrhs+icoffa, desca( nb_ ), mycol, iacol,

     $                 npcol )

*

*        Assign descriptor DESCW for workspace WORK and pointers to

*        matrices sub( A ) and sub( X ) in workspace

*

         iwx = iwa

         jwx = jwa + n

         ldw = max( 1, mpw )

         CALL descset( descw, m+iroffa, n+nrhs+icoffa, desca( mb_ ),

     $                 desca( nb_ ), iarow, iacol, ictxt, ldw )

      ELSE

         CALL pxerbla( ictxt, 'PSQRT14', -1 )

         RETURN

      END IF

*

*     Copy and scale sub( A )

*

      iptau = ipwa + mpw*nqw

      CALL pslacpy( 'All', m, n, a, ia, ja, desca, work( ipwa ), iwa,

     $              jwa, descw )

      rwork( 1 ) = zero

      anrm = pslange( 'M', m, n, work( ipwa ), iwa, jwa, descw, rwork )

      IF( anrm.NE.zero )

     $   CALL pslascl( 'G', anrm, one, m, n, work( ipwa ), iwa,

     $                 jwa, descw, info )

*

*     Copy sub( X ) or sub( X )' into the right place and scale it

*

      IF( tpsd ) THEN

*

*        Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work

*

         DO 10 j = 1, nrhs

            CALL pscopy( m, x, ix, jx+j-1, descx, 1, work( ipwa ), iwx,

     $                   jwx+j-1, descw, 1 )

   10    CONTINUE

         xnrm = pslange( 'M', m, nrhs, work( ipwa ), iwx, jwx, descw,

     $                   rwork )

         IF( xnrm.NE.zero )

     $      CALL pslascl( 'G', xnrm, one, m, nrhs, work( ipwa ), iwx,

     $                    jwx, descw, info )

*

*        Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)

*

         mqw = numroc( m+icoffa, desca( nb_ ), mycol, iacol, npcol )

         ipw = iptau + min( mqw, nqw )

         lwork = descw( nb_ ) * ( mpw + nqw + descw( nb_ ) )

         CALL psgeqrf( m, n+nrhs, work( ipwa ), iwa, jwa, descw,

     $                work( iptau ), work( ipw ), lwork, info )

*

*        Compute largest entry in upper triangle of

*        work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)

*

         err = zero

         IF( n.LT.m ) THEN

            DO 20 j = jwx, jwa+n+nrhs-1

               CALL psamax( min(m-n,j-jwx+1), amax, idum, work( ipwa ),

     $                      iwa+n, j, descw, 1 )

               err = max( err, abs( amax ) )

   20       CONTINUE

         END IF

         CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,

     $                 -1, -1, 0 )

*

      ELSE

*

*        Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work

*

         DO 30 j = 1, nrhs

            CALL pscopy( n, x, ix, jx+j-1, descx, 1, work( ipwa ),

     $                   iwx+j-1, jwx, descw, descw( m_ ) )

   30    CONTINUE

*

         xnrm = pslange( 'M', nrhs, n, work( ipwa ), iwx, jwx, descw,

     $                   rwork )

         IF( xnrm.NE.zero )

     $      CALL pslascl( 'G', xnrm, one, nrhs, n, work( ipwa ), iwx,

     $                    jwx, descw, info )

*

*        Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)

*

         npw = numroc( n+iroffa, desca( mb_ ), myrow, iarow, nprow )

         ipw = iptau + min( mpw, npw )

         lwork = descw( mb_ ) * ( mpw + nqw + descw( mb_ ) )

         CALL psgelqf( m+nrhs, n, work( ipwa ), iwa, jwa, descw,

     $                 work( iptau ), work( ipw ), lwork, info )

*

*        Compute largest entry in lower triangle in

*        work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)

*

         err = zero

         DO 40 j = jwa+m, min( jwa+n-1, jwa+m+nrhs-1 )

            CALL psamax( jwa+m+nrhs-j, amax, idum, work( ipwa ),

     $                   iwx+j-jwa-m, j, descw, 1 )

            err = max( err, abs( amax ) )

   40    CONTINUE

         CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,

     $                 -1, -1, 0 )

*

      END IF

*

      psqrt14 = err / ( real( max( m, n, nrhs ) ) *

     $          pslamch( ictxt, 'Epsilon' ) )

*

      RETURN

*

*     End of PSQRT14

*

      END