ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pdlauu2.f
Go to the documentation of this file.
00001       SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA )
00002 *
00003 *  -- ScaLAPACK auxiliary routine (version 1.7) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     May 1, 1997
00007 *
00008 *     .. Scalar Arguments ..
00009       CHARACTER          UPLO
00010       INTEGER            IA, JA, N
00011 *     ..
00012 *     .. Array Arguments ..
00013       INTEGER            DESCA( * )
00014       DOUBLE PRECISION   A( * )
00015 *     ..
00016 *
00017 *  Purpose
00018 *  =======
00019 *
00020 *  PDLAUU2 computes the product U * U' or L' * L, where the triangular
00021 *  factor U or L is stored in the upper or lower triangular part of
00022 *  the matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
00023 *
00024 *  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
00025 *  overwriting the factor U in sub( A ).
00026 *  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
00027 *  overwriting the factor L in sub( A ).
00028 *
00029 *  This is the unblocked form of the algorithm, calling Level 2 BLAS.
00030 *  No communication is performed by this routine, the matrix to operate
00031 *  on should be strictly local to one process.
00032 *
00033 *  Notes
00034 *  =====
00035 *
00036 *  Each global data object is described by an associated description
00037 *  vector.  This vector stores the information required to establish
00038 *  the mapping between an object element and its corresponding process
00039 *  and memory location.
00040 *
00041 *  Let A be a generic term for any 2D block cyclicly distributed array.
00042 *  Such a global array has an associated description vector DESCA.
00043 *  In the following comments, the character _ should be read as
00044 *  "of the global array".
00045 *
00046 *  NOTATION        STORED IN      EXPLANATION
00047 *  --------------- -------------- --------------------------------------
00048 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00049 *                                 DTYPE_A = 1.
00050 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00051 *                                 the BLACS process grid A is distribu-
00052 *                                 ted over. The context itself is glo-
00053 *                                 bal, but the handle (the integer
00054 *                                 value) may vary.
00055 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00056 *                                 array A.
00057 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00058 *                                 array A.
00059 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00060 *                                 the rows of the array.
00061 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00062 *                                 the columns of the array.
00063 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00064 *                                 row of the array A is distributed.
00065 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00066 *                                 first column of the array A is
00067 *                                 distributed.
00068 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00069 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00070 *
00071 *  Let K be the number of rows or columns of a distributed matrix,
00072 *  and assume that its process grid has dimension p x q.
00073 *  LOCr( K ) denotes the number of elements of K that a process
00074 *  would receive if K were distributed over the p processes of its
00075 *  process column.
00076 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00077 *  process would receive if K were distributed over the q processes of
00078 *  its process row.
00079 *  The values of LOCr() and LOCc() may be determined via a call to the
00080 *  ScaLAPACK tool function, NUMROC:
00081 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00082 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00083 *  An upper bound for these quantities may be computed by:
00084 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00085 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00086 *
00087 *  Arguments
00088 *  =========
00089 *
00090 *  UPLO    (global input) CHARACTER*1
00091 *          Specifies whether the triangular factor stored in the matrix
00092 *          sub( A ) is upper or lower triangular:
00093 *          = 'U':  Upper triangular,
00094 *          = 'L':  Lower triangular.
00095 *
00096 *  N       (global input) INTEGER
00097 *          The number of rows and columns to be operated on, i.e. the
00098 *          order of the order of the triangular factor U or L.  N >= 0.
00099 *
00100 *  A       (local input/local output) DOUBLE PRECISION pointer into the
00101 *          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
00102 *          On entry, the local pieces of the triangular factor L or U.
00103 *          On exit, if UPLO = 'U', the upper triangle of the distributed
00104 *          matrix sub( A ) is overwritten with the upper triangle of the
00105 *          product U * U'; if UPLO = 'L', the lower triangle of sub( A )
00106 *          is overwritten with the lower triangle of the product L' * L.
00107 *
00108 *  IA      (global input) INTEGER
00109 *          The row index in the global array A indicating the first
00110 *          row of sub( A ).
00111 *
00112 *  JA      (global input) INTEGER
00113 *          The column index in the global array A indicating the
00114 *          first column of sub( A ).
00115 *
00116 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00117 *          The array descriptor for the distributed matrix A.
00118 *
00119 *  =====================================================================
00120 *
00121 *     .. Parameters ..
00122       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00123      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00124       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00125      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00126      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00127       DOUBLE PRECISION   ONE
00128       PARAMETER          ( ONE = 1.0D+0 )
00129 *     ..
00130 *     .. Local Scalars ..
00131       INTEGER            IACOL, IAROW, ICURR, IDIAG, IIA, IOFFA, JJA,
00132      $                   LDA, MYCOL, MYROW, NA, NPCOL, NPROW
00133       DOUBLE PRECISION   AII
00134 *     ..
00135 *     .. External Subroutines ..
00136       EXTERNAL           BLACS_GRIDINFO, INFOG2L, DGEMV, DSCAL
00137 *     ..
00138 *     .. External Functions ..
00139       LOGICAL            LSAME
00140       DOUBLE PRECISION   DDOT
00141       EXTERNAL           DDOT, LSAME
00142 *     ..
00143 *     .. Executable Statements ..
00144 *
00145 *     Quick return if possible
00146 *
00147       IF( N.EQ.0 )
00148      $   RETURN
00149 *
00150 *     Get grid parameters and compute local indexes
00151 *
00152       CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL )
00153       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
00154      $              IAROW, IACOL )
00155 *
00156       IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.IACOL ) THEN
00157 *
00158          LDA = DESCA( LLD_ )
00159          IDIAG = IIA + ( JJA - 1 ) * LDA
00160          IOFFA = IDIAG
00161 *
00162          IF( LSAME( UPLO, 'U' ) ) THEN
00163 *
00164 *           Compute the product U * U'.
00165 *
00166             DO 10 NA = N-1, 1, -1
00167                AII = A( IDIAG )
00168                ICURR = IDIAG + LDA
00169                A( IDIAG ) = AII*AII + DDOT( NA, A( ICURR ), LDA,
00170      $                                    A( ICURR ), LDA )
00171                CALL DGEMV( 'No transpose', N-NA-1, NA, ONE,
00172      $                     A( IOFFA+LDA ), LDA, A( ICURR ), LDA, AII,
00173      $                     A( IOFFA ), 1 )
00174                IDIAG = IDIAG + LDA + 1
00175                IOFFA = IOFFA + LDA
00176    10       CONTINUE
00177             AII = A( IDIAG )
00178             CALL DSCAL( N, AII, A( IOFFA ), 1 )
00179 *
00180          ELSE
00181 *
00182 *           Compute the product L' * L.
00183 *
00184             DO 20 NA = 1, N-1
00185                AII = A( IDIAG )
00186                ICURR = IDIAG + 1
00187                A(IDIAG) = AII*AII + DDOT( N-NA, A( ICURR ), 1,
00188      $                                    A( ICURR ), 1 )
00189                CALL DGEMV( 'Transpose', N-NA, NA-1, ONE, A( IOFFA+1 ),
00190      $                     LDA, A( ICURR ), 1, AII, A( IOFFA ), LDA )
00191                IDIAG = IDIAG + LDA + 1
00192                IOFFA = IOFFA + 1
00193    20       CONTINUE
00194             AII = A( IDIAG )
00195             CALL DSCAL( N, AII, A( IOFFA ), LDA )
00196 *
00197          END IF
00198 *
00199       END IF
00200 *
00201       RETURN
00202 *
00203 *     End of PDLAUU2
00204 *
00205       END