ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pslauum.f
Go to the documentation of this file.
00001       SUBROUTINE PSLAUUM( UPLO, N, A, IA, JA, DESCA )
00002 *
00003 *  -- ScaLAPACK auxiliary routine (version 1.7) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     May 1, 1997
00007 *
00008 *     .. Scalar Arguments ..
00009       CHARACTER          UPLO
00010       INTEGER            IA, JA, N
00011 *     ..
00012 *     .. Array Arguments ..
00013       INTEGER            DESCA( * )
00014       REAL               A( * )
00015 *     ..
00016 *
00017 *  Purpose
00018 *  =======
00019 *
00020 *  PSLAUUM computes the product U * U' or L' * L, where the triangular
00021 *  factor U or L is stored in the upper or lower triangular part of
00022 *  the distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
00023 *
00024 *  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
00025 *  overwriting the factor U in sub( A ).
00026 *  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
00027 *  overwriting the factor L in sub( A ).
00028 *
00029 *  This is the blocked form of the algorithm, calling Level 3 PBLAS.
00030 *
00031 *  Notes
00032 *  =====
00033 *
00034 *  Each global data object is described by an associated description
00035 *  vector.  This vector stores the information required to establish
00036 *  the mapping between an object element and its corresponding process
00037 *  and memory location.
00038 *
00039 *  Let A be a generic term for any 2D block cyclicly distributed array.
00040 *  Such a global array has an associated description vector DESCA.
00041 *  In the following comments, the character _ should be read as
00042 *  "of the global array".
00043 *
00044 *  NOTATION        STORED IN      EXPLANATION
00045 *  --------------- -------------- --------------------------------------
00046 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00047 *                                 DTYPE_A = 1.
00048 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00049 *                                 the BLACS process grid A is distribu-
00050 *                                 ted over. The context itself is glo-
00051 *                                 bal, but the handle (the integer
00052 *                                 value) may vary.
00053 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00054 *                                 array A.
00055 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00056 *                                 array A.
00057 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00058 *                                 the rows of the array.
00059 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00060 *                                 the columns of the array.
00061 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00062 *                                 row of the array A is distributed.
00063 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00064 *                                 first column of the array A is
00065 *                                 distributed.
00066 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00067 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00068 *
00069 *  Let K be the number of rows or columns of a distributed matrix,
00070 *  and assume that its process grid has dimension p x q.
00071 *  LOCr( K ) denotes the number of elements of K that a process
00072 *  would receive if K were distributed over the p processes of its
00073 *  process column.
00074 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00075 *  process would receive if K were distributed over the q processes of
00076 *  its process row.
00077 *  The values of LOCr() and LOCc() may be determined via a call to the
00078 *  ScaLAPACK tool function, NUMROC:
00079 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00080 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00081 *  An upper bound for these quantities may be computed by:
00082 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00083 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00084 *
00085 *  Arguments
00086 *  =========
00087 *
00088 *  UPLO    (global input) CHARACTER*1
00089 *          Specifies whether the triangular factor stored in the
00090 *          distributed matrix sub( A ) is upper or lower triangular:
00091 *          = 'U':  Upper triangular
00092 *          = 'L':  Lower triangular
00093 *
00094 *  N       (global input) INTEGER
00095 *          The number of rows and columns to be operated on, i.e. the
00096 *          order of the triangular factor U or L. N >= 0.
00097 *
00098 *  A       (local input/local output) REAL pointer into the
00099 *          local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
00100 *          On entry, the local pieces of the triangular factor L or U.
00101 *          On exit, if UPLO = 'U', the upper triangle of the distributed
00102 *          matrix sub( A ) is overwritten with the upper triangle of the
00103 *          product U * U'; if UPLO = 'L', the lower triangle of sub( A )
00104 *          is overwritten with the lower triangle of the product L' * L.
00105 *
00106 *  IA      (global input) INTEGER
00107 *          The row index in the global array A indicating the first
00108 *          row of sub( A ).
00109 *
00110 *  JA      (global input) INTEGER
00111 *          The column index in the global array A indicating the
00112 *          first column of sub( A ).
00113 *
00114 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00115 *          The array descriptor for the distributed matrix A.
00116 *
00117 *  =====================================================================
00118 *
00119 *     .. Parameters ..
00120       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00121      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00122       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00123      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00124      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00125       REAL               ONE
00126       PARAMETER          ( ONE = 1.0E+0 )
00127 *     ..
00128 *     .. Local Scalars ..
00129       INTEGER            I, J, JB, JN
00130 *     ..
00131 *     .. External Subroutines ..
00132       EXTERNAL           PSGEMM, PSLAUU2, PSTRMM, PSSYRK
00133 *     ..
00134 *     .. External Functions ..
00135       LOGICAL            LSAME
00136       INTEGER            ICEIL
00137       EXTERNAL           ICEIL, LSAME
00138 *     ..
00139 *     .. Intrinsic Functions ..
00140       INTRINSIC          MIN
00141 *     ..
00142 *     .. Executable Statements ..
00143 *
00144 *     Quick return if possible
00145 *
00146       IF( N.EQ.0 )
00147      $   RETURN
00148 *
00149       JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 )
00150       IF(  LSAME( UPLO, 'U' ) ) THEN
00151 *
00152 *        Compute the product U * U'.
00153 *
00154 *        Handle first block separately
00155 *
00156          JB = JN-JA+1
00157          CALL PSLAUU2( 'Upper', JB, A, IA, JA, DESCA )
00158          IF( JB.LE.N-1 ) THEN
00159             CALL PSSYRK( 'Upper', 'No transpose', JB, N-JB, ONE, A, IA,
00160      $                   JA+JB, DESCA, ONE, A, IA, JA, DESCA )
00161          END IF
00162 *
00163 *        Loop over remaining block of columns
00164 *
00165          DO 10 J = JN+1, JA+N-1, DESCA( NB_ )
00166             JB = MIN( N-J+JA, DESCA( NB_ ) )
00167             I = IA + J - JA
00168             CALL PSTRMM( 'Right', 'Upper', 'Transpose',  'Non-unit',
00169      $                   J-JA, JB, ONE, A, I, J, DESCA, A, IA, J,
00170      $                   DESCA )
00171             CALL PSLAUU2( 'Upper', JB, A, I, J, DESCA )
00172             IF( J+JB.LE.JA+N-1 ) THEN
00173                CALL PSGEMM( 'No transpose', 'Transpose', J-JA, JB,
00174      $                      N-J-JB+JA, ONE, A, IA, J+JB, DESCA, A, I,
00175      $                      J+JB, DESCA, ONE, A, IA, J, DESCA )
00176                CALL PSSYRK( 'Upper', 'No transpose', JB, N-J-JB+JA, ONE,
00177      $                      A, I, J+JB, DESCA, ONE, A, I, J, DESCA )
00178             END IF
00179    10    CONTINUE
00180       ELSE
00181 *
00182 *        Compute the product L' * L.
00183 *
00184 *        Handle first block separately
00185 *
00186          JB = JN-JA+1
00187          CALL PSLAUU2( 'Lower', JB, A, IA, JA, DESCA )
00188          IF( JB.LE.N-1 ) THEN
00189             CALL PSSYRK( 'Lower', 'Transpose', JB, N-JB, ONE, A, IA+JB,
00190      $                   JA, DESCA, ONE, A, IA, JA, DESCA )
00191          END IF
00192 *
00193 *        Loop over remaining block of columns
00194 *
00195          DO 20 J = JN+1, JA+N-1, DESCA( NB_ )
00196             JB = MIN( N-J+JA, DESCA( NB_ ) )
00197             I = IA + J - JA
00198             CALL PSTRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', JB,
00199      $                   J-JA, ONE, A, I, J, DESCA, A, I, JA, DESCA )
00200             CALL PSLAUU2( 'Lower', JB, A, I, J, DESCA )
00201             IF( J+JB.LE.JA+N-1 ) THEN
00202                CALL PSGEMM( 'Transpose', 'No transpose', JB, J-JA,
00203      $                      N-J-JB+JA, ONE, A, I+JB, J, DESCA, A, I+JB,
00204      $                      JA, DESCA, ONE, A, I, JA, DESCA )
00205                CALL PSSYRK( 'Lower', 'Transpose', JB, N-J-JB+JA, ONE,
00206      $                      A, I+JB, J, DESCA, ONE, A, I, J, DESCA )
00207             END IF
00208    20    CONTINUE
00209       END IF
00210 *
00211       RETURN
00212 *
00213 *     End of PSLAUUM
00214 *
00215       END