ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pslacp2.f
Go to the documentation of this file.
00001       SUBROUTINE PSLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB,
00002      $                     DESCB )
00003 *
00004 *  -- ScaLAPACK auxiliary routine (version 2.0.2) --
00005 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
00006 *     May 1 2012
00007 *
00008 *     .. Scalar Arguments ..
00009       CHARACTER          UPLO
00010       INTEGER            IA, IB, JA, JB, M, N
00011 *     ..
00012 *     .. Array Arguments ..
00013       INTEGER            DESCA( * ), DESCB( * )
00014       REAL               A( * ), B( * )
00015 *     ..
00016 *
00017 *  Purpose
00018 *  =======
00019 *
00020 *  PSLACP2 copies all or part of a distributed matrix A to another
00021 *  distributed matrix B.  No communication is performed, PSLACP2
00022 *  performs a local copy sub( A ) := sub( B ), where sub( A ) denotes
00023 *  A(IA:IA+M-1,JA:JA+N-1) and sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1).
00024 *  PSLACP2 requires that only dimension of the matrix operands is
00025 *  distributed.
00026 *
00027 *  Notes
00028 *  =====
00029 *
00030 *  Each global data object is described by an associated description
00031 *  vector.  This vector stores the information required to establish
00032 *  the mapping between an object element and its corresponding process
00033 *  and memory location.
00034 *
00035 *  Let A be a generic term for any 2D block cyclicly distributed array.
00036 *  Such a global array has an associated description vector DESCA.
00037 *  In the following comments, the character _ should be read as
00038 *  "of the global array".
00039 *
00040 *  NOTATION        STORED IN      EXPLANATION
00041 *  --------------- -------------- --------------------------------------
00042 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00043 *                                 DTYPE_A = 1.
00044 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00045 *                                 the BLACS process grid A is distribu-
00046 *                                 ted over. The context itself is glo-
00047 *                                 bal, but the handle (the integer
00048 *                                 value) may vary.
00049 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00050 *                                 array A.
00051 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00052 *                                 array A.
00053 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00054 *                                 the rows of the array.
00055 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00056 *                                 the columns of the array.
00057 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00058 *                                 row of the array A is distributed.
00059 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00060 *                                 first column of the array A is
00061 *                                 distributed.
00062 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00063 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00064 *
00065 *  Let K be the number of rows or columns of a distributed matrix,
00066 *  and assume that its process grid has dimension p x q.
00067 *  LOCr( K ) denotes the number of elements of K that a process
00068 *  would receive if K were distributed over the p processes of its
00069 *  process column.
00070 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00071 *  process would receive if K were distributed over the q processes of
00072 *  its process row.
00073 *  The values of LOCr() and LOCc() may be determined via a call to the
00074 *  ScaLAPACK tool function, NUMROC:
00075 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00076 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00077 *  An upper bound for these quantities may be computed by:
00078 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00079 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00080 *
00081 *  Arguments
00082 *  =========
00083 *
00084 *  UPLO    (global input) CHARACTER
00085 *          Specifies the part of the distributed matrix sub( A ) to be
00086 *          copied:
00087 *          = 'U':   Upper triangular part is copied; the strictly
00088 *                   lower triangular part of sub( A ) is not referenced;
00089 *          = 'L':   Lower triangular part is copied; the strictly
00090 *                   upper triangular part of sub( A ) is not referenced;
00091 *          Otherwise:  All of the matrix sub( A ) is copied.
00092 *
00093 *  M       (global input) INTEGER
00094 *          The number of rows to be operated on i.e the number of rows
00095 *          of the distributed submatrix sub( A ). M >= 0.
00096 *
00097 *  N       (global input) INTEGER
00098 *          The number of columns to be operated on i.e the number of
00099 *          columns of the distributed submatrix sub( A ). N >= 0.
00100 *
00101 *  A       (local input) REAL pointer into the local memory
00102 *          to an array of dimension (LLD_A, LOCc(JA+N-1) ). This array
00103 *          contains the local pieces of the distributed matrix sub( A )
00104 *          to be copied from.
00105 *
00106 *  IA      (global input) INTEGER
00107 *          The row index in the global array A indicating the first
00108 *          row of sub( A ).
00109 *
00110 *  JA      (global input) INTEGER
00111 *          The column index in the global array A indicating the
00112 *          first column of sub( A ).
00113 *
00114 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00115 *          The array descriptor for the distributed matrix A.
00116 *
00117 *  B       (local output) REAL pointer into the local memory
00118 *          to an array of dimension (LLD_B, LOCc(JB+N-1) ). This array
00119 *          contains on exit the local pieces of the distributed matrix
00120 *          sub( B ) set as follows:
00121 *
00122 *          if UPLO = 'U', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
00123 *                         1<=i<=j, 1<=j<=N;
00124 *          if UPLO = 'L', B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
00125 *                         j<=i<=M, 1<=j<=N;
00126 *          otherwise,     B(IB+i-1,JB+j-1) = A(IA+i-1,JA+j-1),
00127 *                         1<=i<=M, 1<=j<=N.
00128 *
00129 *  IB      (global input) INTEGER
00130 *          The row index in the global array B indicating the first
00131 *          row of sub( B ).
00132 *
00133 *  JB      (global input) INTEGER
00134 *          The column index in the global array B indicating the
00135 *          first column of sub( B ).
00136 *
00137 *  DESCB   (global and local input) INTEGER array of dimension DLEN_.
00138 *          The array descriptor for the distributed matrix B.
00139 *
00140 *  =====================================================================
00141 *
00142 *     .. Parameters ..
00143       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00144      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00145       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00146      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00147      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00148 *     ..
00149 *     .. Local Scalars ..
00150       INTEGER            HEIGHT, IACOL, IAROW, IBASE, IBCOL, IBROW,
00151      $                   ICOFFA, IIA, IIAA, IIB, IIBB, IIBEGA, IIBEGB,
00152      $                   IIENDA, IINXTA, IINXTB, ILEFT, IRIGHT, IROFFA,
00153      $                   ITOP, JJA, JJAA, JJB, JJBB, JJBEGA, JJBEGB,
00154      $                   JJENDA, JJNXTA, JJNXTB, LDA, LDB, MBA, MP,
00155      $                   MPAA, MYCOL, MYDIST, MYROW, NBA, NPCOL, NPROW,
00156      $                   NQ, NQAA, WIDE
00157 *     ..
00158 *     .. External Subroutines ..
00159       EXTERNAL           BLACS_GRIDINFO, INFOG2L, SLAMOV
00160 *     ..
00161 *     .. External Functions ..
00162       LOGICAL            LSAME
00163       INTEGER            ICEIL, NUMROC
00164       EXTERNAL           ICEIL, LSAME, NUMROC
00165 *     ..
00166 *     .. Intrinsic Functions ..
00167       INTRINSIC          MAX, MIN, MOD
00168 *     ..
00169 *     .. Executable Statements ..
00170 *
00171       IF( M.EQ.0 .OR. N.EQ.0 )
00172      $   RETURN
00173 *
00174 *     Get grid parameters
00175 *
00176       CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL )
00177 *
00178       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
00179      $              IAROW, IACOL )
00180       CALL INFOG2L( IB, JB, DESCB, NPROW, NPCOL, MYROW, MYCOL, IIB, JJB,
00181      $              IBROW, IBCOL )
00182 *
00183       MBA    = DESCA( MB_ )
00184       NBA    = DESCA( NB_ )
00185       LDA    = DESCA( LLD_ )
00186       IROFFA = MOD( IA-1, MBA )
00187       ICOFFA = MOD( JA-1, NBA )
00188       LDB    = DESCB( LLD_ )
00189 *
00190       IF( N.LE.( NBA-ICOFFA ) ) THEN
00191 *
00192 *        It is assumed that the local columns JJA:JJA+N-1 of the matrix
00193 *        A are in the same process column (IACOL).
00194 *
00195 *                         N
00196 *                JJA             JJA+N-1
00197 *         /      ---------------------    \
00198 *   IROFFA|      |                   |    |
00199 *         \      |...................|    |          ( IAROW )
00200 *           IIA  |x                  |    |   MBA = DESCA( MB_ )
00201 *                | x                 |    |
00202 *                |--x----------------|    /
00203 *                |   x               |
00204 *                |    x              |        ITOP
00205 *                |     x             |          |
00206 *                |      x            |      /-------\
00207 *                |-------x-----------|      |-------x-----------|
00208 *                |        x          |      |        x          |
00209 *                |         x         |      |         x         |
00210 *                |          x        |      |          x        |
00211 *                |           x       |      |           x       |
00212 *                |------------x------|      |------------x------|
00213 *                |             x     |      \____________/
00214 *                |              x    |            |
00215 *                |               x   |          IBASE
00216 *                |                x  |
00217 *                |-----------------x-|          Local picture
00218 *                |                  x|
00219 *                |                   |
00220 *                |                   |
00221 *                |                   |
00222 *                |-------------------|
00223 *                |                   |
00224 *                .                   .
00225 *                .                   .
00226 *                .      (IACOL)      .
00227 *
00228          IF( MYCOL.EQ.IACOL ) THEN
00229 *
00230             MP = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW )
00231             IF( MP.LE.0 )
00232      $         RETURN
00233             IF( MYROW.EQ.IAROW )
00234      $         MP = MP - IROFFA
00235             MYDIST = MOD( MYROW-IAROW+NPROW, NPROW )
00236             ITOP   = MYDIST * MBA - IROFFA
00237 *
00238             IF( LSAME( UPLO, 'U' ) ) THEN
00239 *
00240                ITOP   = MAX( 0, ITOP )
00241                IIBEGA = IIA
00242                IIENDA = IIA + MP - 1
00243                IINXTA = MIN( ICEIL( IIBEGA, MBA ) * MBA, IIENDA )
00244                IIBEGB = IIB
00245                IINXTB = IIBEGB + IINXTA - IIBEGA
00246 *
00247    10          CONTINUE
00248                IF( ( N-ITOP ).GT.0 ) THEN
00249                   CALL SLAMOV( UPLO, IINXTA-IIBEGA+1, N-ITOP,
00250      $                         A( IIBEGA+(JJA+ITOP-1)*LDA ), LDA,
00251      $                         B( IIBEGB+(JJB+ITOP-1)*LDB ), LDB )
00252                   MYDIST = MYDIST + NPROW
00253                   ITOP   = MYDIST * MBA - IROFFA
00254                   IIBEGA = IINXTA + 1
00255                   IINXTA = MIN( IINXTA+MBA, IIENDA )
00256                   IIBEGB = IINXTB + 1
00257                   IINXTB = IIBEGB + IINXTA - IIBEGA
00258                   GO TO 10
00259                END IF
00260 *
00261             ELSE IF( LSAME( UPLO, 'L' ) ) THEN
00262 *
00263                MPAA  = MP
00264                IIAA  = IIA
00265                JJAA  = JJA
00266                IIBB  = IIB
00267                JJBB  = JJB
00268                IBASE = MIN( ITOP + MBA, N )
00269                ITOP  = MIN( MAX( 0, ITOP ), N )
00270 *
00271    20          CONTINUE
00272                IF( JJAA.LE.( JJA+N-1 ) ) THEN
00273                   HEIGHT = IBASE - ITOP
00274                   CALL SLAMOV( 'All', MPAA, ITOP-JJAA+JJA,
00275      $                         A( IIAA+(JJAA-1)*LDA ), LDA,
00276      $                         B( IIBB+(JJBB-1)*LDB ), LDB )
00277                   CALL SLAMOV( UPLO, MPAA, HEIGHT,
00278      $                         A( IIAA+(JJA+ITOP-1)*LDA ), LDA,
00279      $                         B( IIBB+(JJB+ITOP-1)*LDB ), LDB )
00280                   MPAA   = MAX( 0, MPAA - HEIGHT )
00281                   IIAA   = IIAA + HEIGHT
00282                   JJAA   = JJA  + IBASE
00283                   IIBB   = IIBB + HEIGHT
00284                   JJBB   = JJB  + IBASE
00285                   MYDIST = MYDIST + NPROW
00286                   ITOP   = MYDIST * MBA - IROFFA
00287                   IBASE  = MIN( ITOP + MBA, N )
00288                   ITOP   = MIN( ITOP, N )
00289                   GO TO 20
00290                END IF
00291 *
00292             ELSE
00293 *
00294                CALL SLAMOV( 'All', MP, N, A( IIA+(JJA-1)*LDA ),
00295      $                      LDA, B( IIB+(JJB-1)*LDB ), LDB )
00296 *
00297             END IF
00298 *
00299          END IF
00300 *
00301       ELSE IF( M.LE.( MBA-IROFFA ) ) THEN
00302 *
00303 *        It is assumed that the local rows IIA:IIA+M-1 of the matrix A
00304 *        are in the same process row (IAROW).
00305 *
00306 *            ICOFFA
00307 *             / \JJA
00308 *        IIA  ------------------ ....            --------
00309 *             | .x  |    |    |                 / |    | \
00310 *             | . x |    |    |            ILEFT| |    | |
00311 *             | .  x     |    |                 | |    | |
00312 *             | .   x    |    |                 \ x    | |
00313 *             | .   |x   |    |                   |x   | | IRIGHT
00314 *             | .   | x  |    |                   | x  | |
00315 *    (IAROW)  | .   |  x |    |                   |  x | |
00316 *             | .   |   x|    |                   |   x| |
00317 *             | .   |    x    |                   |    x /
00318 *             | .   |    |x   |                   |    |
00319 *             | .   |    | x  |                   |    |
00320 *             | .   |    |  x |                   |    |
00321 *             | .   |    |   x|                   |    |
00322 *    IIA+M-1  ------------------ ....            -------
00323 *              NB_A
00324 *             (IACOL)                          Local picture
00325 *
00326          IF( MYROW.EQ.IAROW ) THEN
00327 *
00328             NQ = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL )
00329             IF( NQ.LE.0 )
00330      $         RETURN
00331             IF( MYCOL.EQ.IACOL )
00332      $         NQ = NQ - ICOFFA
00333             MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL )
00334             ILEFT  = MYDIST * NBA - ICOFFA
00335 *
00336             IF( LSAME( UPLO, 'L' ) ) THEN
00337 *
00338                ILEFT  = MAX( 0, ILEFT )
00339                JJBEGA = JJA
00340                JJENDA = JJA + NQ - 1
00341                JJNXTA = MIN( ICEIL( JJBEGA, NBA ) * NBA, JJENDA )
00342                JJBEGB = JJB
00343                JJNXTB = JJBEGB + JJNXTA - JJBEGA
00344 *
00345    30          CONTINUE
00346                IF( ( M-ILEFT ).GT.0 ) THEN
00347                   CALL SLAMOV( UPLO, M-ILEFT, JJNXTA-JJBEGA+1,
00348      $                         A( IIA+ILEFT+(JJBEGA-1)*LDA ), LDA,
00349      $                         B( IIB+ILEFT+(JJBEGB-1)*LDB ), LDB )
00350                   MYDIST = MYDIST + NPCOL
00351                   ILEFT  = MYDIST * NBA - ICOFFA
00352                   JJBEGA = JJNXTA +1
00353                   JJNXTA = MIN( JJNXTA+NBA, JJENDA )
00354                   JJBEGB = JJNXTB +1
00355                   JJNXTB = JJBEGB + JJNXTA - JJBEGA
00356                   GO TO 30
00357                END IF
00358 *
00359             ELSE IF( LSAME( UPLO, 'U' ) ) THEN
00360 *
00361                NQAA   = NQ
00362                IIAA   = IIA
00363                JJAA   = JJA
00364                IIBB   = IIB
00365                JJBB   = JJB
00366                IRIGHT = MIN( ILEFT + NBA, M )
00367                ILEFT  = MIN( MAX( 0, ILEFT ), M )
00368 *
00369    40          CONTINUE
00370                IF( IIAA.LE.( IIA+M-1 ) ) THEN
00371                   WIDE = IRIGHT - ILEFT
00372                   CALL SLAMOV( 'All', ILEFT-IIAA+IIA, NQAA,
00373      $                         A( IIAA+(JJAA-1)*LDA ), LDA,
00374      $                         B( IIBB+(JJBB-1)*LDB ), LDB )
00375                   CALL SLAMOV( UPLO, WIDE, NQAA,
00376      $                         A( IIA+ILEFT+(JJAA-1)*LDA ), LDA,
00377      $                         B( IIB+ILEFT+(JJBB-1)*LDB ), LDB )
00378                   NQAA   = MAX( 0, NQAA - WIDE )
00379                   IIAA   = IIA  + IRIGHT
00380                   JJAA   = JJAA + WIDE
00381                   IIBB   = IIB  + IRIGHT
00382                   JJBB   = JJBB + WIDE
00383                   MYDIST = MYDIST + NPCOL
00384                   ILEFT  = MYDIST * NBA - ICOFFA
00385                   IRIGHT = MIN( ILEFT + NBA, M )
00386                   ILEFT  = MIN( ILEFT, M )
00387                   GO TO 40
00388                END IF
00389 *
00390             ELSE
00391 *
00392                CALL SLAMOV( 'All', M, NQ, A( IIA+(JJA-1)*LDA ),
00393      $                      LDA, B( IIB+(JJB-1)*LDB ), LDB )
00394 *
00395             END IF
00396 *
00397          END IF
00398 *
00399       END IF
00400 *
00401       RETURN
00402 *
00403 *     End of PSLACP2
00404 *
00405       END