ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psgetrrv.f
Go to the documentation of this file.
1  SUBROUTINE psgetrrv( M, N, A, IA, JA, DESCA, IPIV, WORK )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * .. Scalar Arguments ..
9  INTEGER IA, JA, M, N
10 * ..
11 * .. Array Arguments ..
12  INTEGER DESCA( * ), IPIV( * )
13  REAL A( * ), WORK( * )
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * PSGETRRV reforms sub( A ) = A(IA:IA+M-1,JA:JA+N-1) from the
20 * triangular matrices L and U returned by PSGETRF. It multiplies
21 * an upper triangular matrix stored in the upper triangle of sub( A )
22 * times the unit lower triangular matrix stored in the lower triangle.
23 * To accomplish this, the routine basically performs the PSGETRF
24 * routine in reverse.
25 *
26 * It computes L*U first, and then apply P: P*L*U => sub( A ). In the
27 * J-th loop, the block column (or column panel), which has the lower
28 * triangular unit matrix L is multiplied with the block row (or row
29 * panel), which contains the upper triangular matrix U.
30 *
31 * ( L1 ) ( 0 0 ) ( L1*U1 L1*U2 )
32 * A` = L * U + A` = ( ) * (U1 U2) + ( ) = ( )
33 * ( L2 ) ( 0 A`) ( L2*U1 L2*U2+A` )
34 *
35 * where L1 is a lower unit triangular matrix and U1 is an upper
36 * triangular matrix.
37 *
38 * Notes
39 * =====
40 *
41 * Each global data object is described by an associated description
42 * vector. This vector stores the information required to establish
43 * the mapping between an object element and its corresponding process
44 * and memory location.
45 *
46 * Let A be a generic term for any 2D block cyclicly distributed array.
47 * Such a global array has an associated description vector DESCA.
48 * In the following comments, the character _ should be read as
49 * "of the global array".
50 *
51 * NOTATION STORED IN EXPLANATION
52 * --------------- -------------- --------------------------------------
53 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
54 * DTYPE_A = 1.
55 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
56 * the BLACS process grid A is distribu-
57 * ted over. The context itself is glo-
58 * bal, but the handle (the integer
59 * value) may vary.
60 * M_A (global) DESCA( M_ ) The number of rows in the global
61 * array A.
62 * N_A (global) DESCA( N_ ) The number of columns in the global
63 * array A.
64 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
65 * the rows of the array.
66 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
67 * the columns of the array.
68 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
69 * row of the array A is distributed.
70 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
71 * first column of the array A is
72 * distributed.
73 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
74 * array. LLD_A >= MAX(1,LOCr(M_A)).
75 *
76 * Let K be the number of rows or columns of a distributed matrix,
77 * and assume that its process grid has dimension p x q.
78 * LOCr( K ) denotes the number of elements of K that a process
79 * would receive if K were distributed over the p processes of its
80 * process column.
81 * Similarly, LOCc( K ) denotes the number of elements of K that a
82 * process would receive if K were distributed over the q processes of
83 * its process row.
84 * The values of LOCr() and LOCc() may be determined via a call to the
85 * ScaLAPACK tool function, NUMROC:
86 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
87 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
88 * An upper bound for these quantities may be computed by:
89 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
90 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
91 *
92 * Arguments
93 * =========
94 *
95 * M (global input) INTEGER
96 * The number of rows to be operated on, i.e. the number of rows
97 * of the distributed submatrix sub( A ). M >= 0.
98 *
99 * N (global input) INTEGER
100 * The number of columns to be operated on, i.e. the number of
101 * columns of the distributed submatrix sub( A ). N >= 0.
102 *
103 * A (local input/local output) REAL pointer into the
104 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
105 * On entry, the local pieces of the distributed matrix sub( A )
106 * contains the the factors L and U from the factorization
107 * sub( A ) = P*L*U; the unit diagonal elements of L are not
108 * stored. On exit, the original distributed matrix sub( A )
109 * is restored.
110 *
111 * IA (global input) INTEGER
112 * The row index in the global array A indicating the first
113 * row of sub( A ).
114 *
115 * JA (global input) INTEGER
116 * The column index in the global array A indicating the
117 * first column of sub( A ).
118 *
119 * DESCA (global and local input) INTEGER array of dimension DLEN_.
120 * The array descriptor for the distributed matrix A.
121 *
122 * IPIV (local input) INTEGER array, dimension ( LOCr(M_A)+MB_A )
123 * This array contains the pivoting information.
124 * IPIV(i) -> The global row local row i was swapped with.
125 * This array is tied to the distributed matrix A.
126 *
127 * WORK (local workspace) REAL array of dimension (LWORK)
128 * LWORK >= MpA0 * NB_A + NqA0 * MB_A, where
129 *
130 * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
131 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
132 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
133 * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
134 * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
135 *
136 * WORK is used to store a block of columns of L, and a block of
137 * rows of U. INDXG2P and NUMROC are ScaLAPACK tool functions;
138 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
139 * the subroutine BLACS_GRIDINFO.
140 *
141 * =====================================================================
142 *
143 * .. Parameters ..
144  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
145  $ LLD_, MB_, M_, NB_, N_, RSRC_
146  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
147  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
148  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
149  REAL ONE, ZERO
150  parameter( one = 1.0e+0, zero = 0.0e+0 )
151 * ..
152 * .. Local Scalars ..
153  CHARACTER COLBTOP, ROWBTOP
154  INTEGER IACOL, IAROW, ICTXT, IL, IPL, IPU, IROFF, J,
155  $ JB, JL, JN, MN, MP, MYCOL, MYROW, NPCOL, NPROW
156 * .. Local Arrays ..
157  INTEGER DESCIP( DLEN_ ), DESCL( DLEN_ ),
158  $ DESCU( DLEN_ ), IDUM( 1 )
159 * ..
160 * .. External Subroutines ..
161  EXTERNAL blacs_gridinfo, descset, psgemm, pslacpy,
162  $ pslapiv, pslaset, pb_topget, pb_topset
163 * ..
164 * .. External Functions ..
165  INTEGER ICEIL, INDXG2P, NUMROC
166  EXTERNAL iceil, indxg2p, numroc
167 * ..
168 * .. Intrinsic Functions ..
169  INTRINSIC max, min, mod
170 * ..
171 * .. Executable Statements ..
172 *
173 * Get grid parameters.
174 *
175  ictxt = desca( ctxt_ )
176  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
177 *
178  iroff = mod( ia-1, desca( mb_ ) )
179  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
180  mp = numroc( m+iroff, desca( mb_ ), myrow, iarow, nprow )
181  ipl = 1
182  ipu = ipl + mp * desca( nb_ )
183  CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
184  CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
185  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', 'S-ring' )
186  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', ' ' )
187 *
188 * Define array descriptors for L and U
189 *
190  mn = min( m, n )
191  il = max( ( ( ia+mn-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
192  jl = max( ( ( ja+mn-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
193  jn = min( iceil( ja, desca( nb_ ) )*desca( nb_ ), ja+mn-1 )
194  iarow = indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
195  iacol = indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
196 *
197  CALL descset( descl, ia+m-il, desca( nb_ ), desca( mb_ ),
198  $ desca( nb_ ), iarow, iacol, ictxt, max( 1, mp ) )
199 *
200  CALL descset( descu, desca( mb_ ), ja+n-jl, desca( mb_ ),
201  $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
202 *
203  CALL descset( descip, desca( m_ ) + desca( mb_ )*nprow, 1,
204  $ desca( mb_ ), 1, desca( rsrc_ ), mycol, ictxt,
205  $ numroc( desca( m_ ), desca( mb_ ), myrow,
206  $ desca( rsrc_ ), nprow ) + desca( mb_ ) )
207 *
208 *
209  DO 10 j = jl, jn+1, -desca( nb_ )
210 *
211  jb = min( ja+mn-j, desca( nb_ ) )
212 *
213 * Copy unit lower triangular part of sub( A ) into WORK
214 *
215  CALL pslacpy( 'Lower', m-il+ia, jb, a, il, j, desca,
216  $ work( ipl ), 1, 1, descl )
217  CALL pslaset( 'Upper', m-il+ia, jb, zero, one, work( ipl ),
218  $ 1, 1, descl )
219 *
220 * Copy upper triangular part of sub( A ) into WORK(IPU)
221 *
222  CALL pslacpy( 'Upper', jb, ja+n-j, a, il, j, desca,
223  $ work( ipu ), 1, 1, descu )
224  CALL pslaset( 'Lower', jb-1, ja+n-j, zero, zero,
225  $ work( ipu ), 2, 1, descu )
226 *
227 * Zero the strict lower triangular piece of the current block.
228 *
229  CALL pslaset( 'Lower', ia+m-il-1, jb, zero, zero, a, il+1, j,
230  $ desca )
231 *
232 * Zero the upper triangular piece of the current block.
233 *
234  CALL pslaset( 'Upper', jb, ja+n-j, zero, zero, a, il, j,
235  $ desca )
236 *
237 * Update the matrix sub( A ).
238 *
239  CALL psgemm( 'No transpose', 'No transpose', ia+m-il,
240  $ ja+n-j, jb, one, work( ipl ), 1, 1, descl,
241  $ work( ipu ), 1, 1, descu, one, a, il, j, desca )
242 *
243  il = il - desca( mb_ )
244  descl( m_ ) = descl( m_ ) + descl( mb_ )
245  descl( rsrc_ ) = mod( descl( rsrc_ ) + nprow - 1, nprow )
246  descl( csrc_ ) = mod( descl( csrc_ ) + npcol - 1, npcol )
247  descu( n_ ) = descu( n_ ) + descu( nb_ )
248  descu( rsrc_ ) = descl( rsrc_ )
249  descu( csrc_ ) = descl( csrc_ )
250 *
251  10 CONTINUE
252 *
253 * Handle first block separately
254 *
255  jb = min( jn-ja+1, desca( nb_ ) )
256 *
257 * Copy unit lower triangular part of sub( A ) into WORK
258 *
259  CALL pslacpy( 'Lower', m, jb, a, ia, ja, desca, work( ipl ),
260  $ 1, 1, descl )
261  CALL pslaset( 'Upper', m, jb, zero, one, work( ipl ), 1, 1,
262  $ descl )
263 *
264 * Copy upper triangular part of sub( A ) into WORK(IPU)
265 *
266  CALL pslacpy( 'Upper', jb, n, a, ia, ja, desca, work( ipu ), 1,
267  $ 1, descu )
268  CALL pslaset( 'Lower', jb-1, n, zero, zero, work( ipu ), 2, 1,
269  $ descu )
270 *
271 * Zero the strict lower triangular piece of the current block.
272 *
273  CALL pslaset( 'Lower', m-1, jb, zero, zero, a, ia+1, ja, desca )
274 *
275 * Zero the upper triangular piece of the current block.
276 *
277  CALL pslaset( 'Upper', jb, n, zero, zero, a, ia, ja, desca )
278 *
279 * Update the matrix sub( A ).
280 *
281  CALL psgemm( 'No transpose', 'No transpose', m, n, jb, one,
282  $ work( ipl ), 1, 1, descl, work( ipu ), 1, 1,
283  $ descu, one, a, ia, ja, desca )
284 *
285 * Apply pivots so that sub( A ) = P*L*U
286 *
287  CALL pslapiv( 'Backward', 'Row', 'Col', min( m, n ), n, a, ia, ja,
288  $ desca, ipiv, ia, 1, descip, idum )
289 *
290  CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', rowbtop )
291  CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', colbtop )
292 *
293  RETURN
294 *
295 * End of PSGETRRV
296 *
297  END
max
#define max(A, B)
Definition: pcgemr.c:180
psgetrrv
subroutine psgetrrv(M, N, A, IA, JA, DESCA, IPIV, WORK)
Definition: psgetrrv.f:2
pslacpy
subroutine pslacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pslacpy.f:3
pslapiv
subroutine pslapiv(DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, DESCA, IPIV, IP, JP, DESCIP, IWORK)
Definition: pslapiv.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pslaset
subroutine pslaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: psblastst.f:6863
min
#define min(A, B)
Definition: pcgemr.c:181