ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcgeequ.f
Go to the documentation of this file.
1  SUBROUTINE pcgeequ( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND,
2  $ AMAX, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, INFO, JA, M, N
11  REAL AMAX, COLCND, ROWCND
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * )
15  REAL C( * ), R( * )
16  COMPLEX A( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * PCGEEQU computes row and column scalings intended to equilibrate an
23 * M-by-N distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA:JA+N-1) and
24 * reduce its condition number. R returns the row scale factors and C
25 * the column scale factors, chosen to try to make the largest entry in
26 * each row and column of the distributed matrix B with elements
27 * B(i,j) = R(i) * A(i,j) * C(j) have absolute value 1.
28 *
29 * R(i) and C(j) are restricted to be between SMLNUM = smallest safe
30 * number and BIGNUM = largest safe number. Use of these scaling
31 * factors is not guaranteed to reduce the condition number of
32 * sub( A ) but works well in practice.
33 *
34 * Notes
35 * =====
36 *
37 * Each global data object is described by an associated description
38 * vector. This vector stores the information required to establish
39 * the mapping between an object element and its corresponding process
40 * and memory location.
41 *
42 * Let A be a generic term for any 2D block cyclicly distributed array.
43 * Such a global array has an associated description vector DESCA.
44 * In the following comments, the character _ should be read as
45 * "of the global array".
46 *
47 * NOTATION STORED IN EXPLANATION
48 * --------------- -------------- --------------------------------------
49 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
50 * DTYPE_A = 1.
51 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
52 * the BLACS process grid A is distribu-
53 * ted over. The context itself is glo-
54 * bal, but the handle (the integer
55 * value) may vary.
56 * M_A (global) DESCA( M_ ) The number of rows in the global
57 * array A.
58 * N_A (global) DESCA( N_ ) The number of columns in the global
59 * array A.
60 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
61 * the rows of the array.
62 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
63 * the columns of the array.
64 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
65 * row of the array A is distributed.
66 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
67 * first column of the array A is
68 * distributed.
69 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
70 * array. LLD_A >= MAX(1,LOCr(M_A)).
71 *
72 * Let K be the number of rows or columns of a distributed matrix,
73 * and assume that its process grid has dimension p x q.
74 * LOCr( K ) denotes the number of elements of K that a process
75 * would receive if K were distributed over the p processes of its
76 * process column.
77 * Similarly, LOCc( K ) denotes the number of elements of K that a
78 * process would receive if K were distributed over the q processes of
79 * its process row.
80 * The values of LOCr() and LOCc() may be determined via a call to the
81 * ScaLAPACK tool function, NUMROC:
82 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
83 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
84 * An upper bound for these quantities may be computed by:
85 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
86 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
87 *
88 * Arguments
89 * =========
90 *
91 * M (global input) INTEGER
92 * The number of rows to be operated on i.e the number of rows
93 * of the distributed submatrix sub( A ). M >= 0.
94 *
95 * N (global input) INTEGER
96 * The number of columns to be operated on i.e the number of
97 * columns of the distributed submatrix sub( A ). N >= 0.
98 *
99 * A (local input) COMPLEX pointer into the local memory
100 * to an array of dimension ( LLD_A, LOCc(JA+N-1) ), the
101 * local pieces of the M-by-N distributed matrix whose
102 * equilibration factors are to be computed.
103 *
104 * IA (global input) INTEGER
105 * The row index in the global array A indicating the first
106 * row of sub( A ).
107 *
108 * JA (global input) INTEGER
109 * The column index in the global array A indicating the
110 * first column of sub( A ).
111 *
112 * DESCA (global and local input) INTEGER array of dimension DLEN_.
113 * The array descriptor for the distributed matrix A.
114 *
115 * R (local output) REAL array, dimension LOCr(M_A)
116 * If INFO = 0 or INFO > IA+M-1, R(IA:IA+M-1) contains the row
117 * scale factors for sub( A ). R is aligned with the distributed
118 * matrix A, and replicated across every process column. R is
119 * tied to the distributed matrix A.
120 *
121 * C (local output) REAL array, dimension LOCc(N_A)
122 * If INFO = 0, C(JA:JA+N-1) contains the column scale factors
123 * for sub( A ). C is aligned with the distributed matrix A, and
124 * replicated down every process row. C is tied to the distri-
125 * buted matrix A.
126 *
127 * ROWCND (global output) REAL
128 * If INFO = 0 or INFO > IA+M-1, ROWCND contains the ratio of
129 * the smallest R(i) to the largest R(i) (IA <= i <= IA+M-1).
130 * If ROWCND >= 0.1 and AMAX is neither too large nor too small,
131 * it is not worth scaling by R(IA:IA+M-1).
132 *
133 * COLCND (global output) REAL
134 * If INFO = 0, COLCND contains the ratio of the smallest C(j)
135 * to the largest C(j) (JA <= j <= JA+N-1). If COLCND >= 0.1, it
136 * is not worth scaling by C(JA:JA+N-1).
137 *
138 * AMAX (global output) REAL
139 * Absolute value of largest distributed matrix element. If
140 * AMAX is very close to overflow or very close to underflow,
141 * the matrix should be scaled.
142 *
143 * INFO (global output) INTEGER
144 * = 0: successful exit
145 * < 0: If the i-th argument is an array and the j-entry had
146 * an illegal value, then INFO = -(i*100+j), if the i-th
147 * argument is a scalar and had an illegal value, then
148 * INFO = -i.
149 * > 0: If INFO = i, and i is
150 * <= M: the i-th row of the distributed matrix sub( A )
151 * is exactly zero,
152 * > M: the (i-M)-th column of the distributed
153 * matrix sub( A ) is exactly zero.
154 *
155 * =====================================================================
156 *
157 * .. Parameters ..
158  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
159  $ lld_, mb_, m_, nb_, n_, rsrc_
160  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
161  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
162  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
163  REAL ONE, ZERO
164  parameter( one = 1.0e+0, zero = 0.0e+0 )
165 * ..
166 * .. Local Scalars ..
167  CHARACTER COLCTOP, ROWCTOP
168  INTEGER I, IACOL, IAROW, ICOFF, ICTXT, IDUMM, IIA,
169  $ ioffa, iroff, j, jja, lda, mp, mycol, myrow,
170  $ npcol, nprow, nq
171  REAL BIGNUM, RCMAX, RCMIN, SMLNUM
172  COMPLEX ZDUM
173 * ..
174 * .. Local Arrays ..
175  INTEGER DESCC( DLEN_ ), DESCR( DLEN_ )
176 * ..
177 * .. External Subroutines ..
178  EXTERNAL blacs_gridinfo, chk1mat, descset, igamx2d,
179  $ infog2l, pchk1mat, pb_topget, pxerbla, sgamn2d,
180  $ sgamx2d
181 * ..
182 * .. External Functions ..
183  INTEGER INDXL2G, NUMROC
184  REAL PSLAMCH
185  EXTERNAL indxl2g, numroc, pslamch
186 * ..
187 * .. Intrinsic Functions ..
188  INTRINSIC abs, aimag, max, min, mod, real
189 * ..
190 * .. Statement Functions ..
191  REAL CABS1
192 * ..
193 * .. Statement Function definitions ..
194  cabs1( zdum ) = abs( real( zdum ) ) + abs( aimag( zdum ) )
195 * ..
196 * .. Executable Statements ..
197 *
198 * Get grid parameters
199 *
200  ictxt = desca( ctxt_ )
201  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
202 *
203 * Test the input parameters.
204 *
205  info = 0
206  IF( nprow.EQ.-1 ) THEN
207  info = -(600+ctxt_)
208  ELSE
209  CALL chk1mat( m, 1, n, 2, ia, ja, desca, 6, info )
210  CALL pchk1mat( m, 1, n, 2, ia, ja, desca, 6, 0, idumm, idumm,
211  $ info )
212  END IF
213 *
214  IF( info.NE.0 ) THEN
215  CALL pxerbla( ictxt, 'PCGEEQU', -info )
216  RETURN
217  END IF
218 *
219 * Quick return if possible
220 *
221  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
222  rowcnd = one
223  colcnd = one
224  amax = zero
225  RETURN
226  END IF
227 *
228  CALL pb_topget( ictxt, 'Combine', 'Rowwise', rowctop )
229  CALL pb_topget( ictxt, 'Combine', 'Columnwise', colctop )
230 *
231 * Get machine constants and local indexes.
232 *
233  smlnum = pslamch( ictxt, 'S' )
234  bignum = one / smlnum
235  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
236  $ iarow, iacol )
237  iroff = mod( ia-1, desca( mb_ ) )
238  icoff = mod( ja-1, desca( nb_ ) )
239  mp = numroc( m+iroff, desca( mb_ ), myrow, iarow, nprow )
240  nq = numroc( n+icoff, desca( nb_ ), mycol, iacol, npcol )
241  IF( myrow.EQ.iarow )
242  $ mp = mp - iroff
243  IF( mycol.EQ.iacol )
244  $ nq = nq - icoff
245  lda = desca( lld_ )
246 *
247 * Assign descriptors for R and C arrays
248 *
249  CALL descset( descr, m, 1, desca( mb_ ), 1, 0, 0, ictxt,
250  $ max( 1, mp ) )
251  CALL descset( descc, 1, n, 1, desca( nb_ ), 0, 0, ictxt, 1 )
252 *
253 * Compute row scale factors.
254 *
255  DO 10 i = iia, iia+mp-1
256  r( i ) = zero
257  10 CONTINUE
258 *
259 * Find the maximum element in each row.
260 *
261  ioffa = (jja-1)*lda
262  DO 30 j = jja, jja+nq-1
263  DO 20 i = iia, iia+mp-1
264  r( i ) = max( r( i ), cabs1( a( ioffa + i ) ) )
265  20 CONTINUE
266  ioffa = ioffa + lda
267  30 CONTINUE
268  CALL sgamx2d( ictxt, 'Rowwise', rowctop, mp, 1, r( iia ),
269  $ max( 1, mp ), idumm, idumm, -1, -1, mycol )
270 *
271 * Find the maximum and minimum scale factors.
272 *
273  rcmin = bignum
274  rcmax = zero
275  DO 40 i = iia, iia+mp-1
276  rcmax = max( rcmax, r( i ) )
277  rcmin = min( rcmin, r( i ) )
278  40 CONTINUE
279  CALL sgamx2d( ictxt, 'Columnwise', colctop, 1, 1, rcmax, 1, idumm,
280  $ idumm, -1, -1, mycol )
281  CALL sgamn2d( ictxt, 'Columnwise', colctop, 1, 1, rcmin, 1, idumm,
282  $ idumm, -1, -1, mycol )
283  amax = rcmax
284 *
285  IF( rcmin.EQ.zero ) THEN
286 *
287 * Find the first zero scale factor and return an error code.
288 *
289  DO 50 i = iia, iia+mp-1
290  IF( r( i ).EQ.zero .AND. info.EQ.0 )
291  $ info = indxl2g( i, desca( mb_ ), myrow, desca( rsrc_ ),
292  $ nprow ) - ia + 1
293  50 CONTINUE
294  CALL igamx2d( ictxt, 'Columnwise', colctop, 1, 1, info, 1,
295  $ idumm, idumm, -1, -1, mycol )
296  IF( info.NE.0 )
297  $ RETURN
298  ELSE
299 *
300 * Invert the scale factors.
301 *
302  DO 60 i = iia, iia+mp-1
303  r( i ) = one / min( max( r( i ), smlnum ), bignum )
304  60 CONTINUE
305 *
306 * Compute ROWCND = min(R(I)) / max(R(I))
307 *
308  rowcnd = max( rcmin, smlnum ) / min( rcmax, bignum )
309 *
310  END IF
311 *
312 * Compute column scale factors
313 *
314  DO 70 j = jja, jja+nq-1
315  c( j ) = zero
316  70 CONTINUE
317 *
318 * Find the maximum element in each column,
319 * assuming the row scaling computed above.
320 *
321  ioffa = (jja-1)*lda
322  DO 90 j = jja, jja+nq-1
323  DO 80 i = iia, iia+mp-1
324  c( j ) = max( c( j ), cabs1( a( ioffa + i ) )*r( i ) )
325  80 CONTINUE
326  ioffa = ioffa + lda
327  90 CONTINUE
328  CALL sgamx2d( ictxt, 'Columnwise', colctop, 1, nq, c( jja ),
329  $ 1, idumm, idumm, -1, -1, mycol )
330 *
331 * Find the maximum and minimum scale factors.
332 *
333  rcmin = bignum
334  rcmax = zero
335  DO 100 j = jja, jja+nq-1
336  rcmin = min( rcmin, c( j ) )
337  rcmax = max( rcmax, c( j ) )
338  100 CONTINUE
339  CALL sgamx2d( ictxt, 'Columnwise', colctop, 1, 1, rcmax, 1, idumm,
340  $ idumm, -1, -1, mycol )
341  CALL sgamn2d( ictxt, 'Columnwise', colctop, 1, 1, rcmin, 1, idumm,
342  $ idumm, -1, -1, mycol )
343 *
344  IF( rcmin.EQ.zero ) THEN
345 *
346 * Find the first zero scale factor and return an error code.
347 *
348  DO 110 j = jja, jja+nq-1
349  IF( c( j ).EQ.zero .AND. info.EQ.0 )
350  $ info = m + indxl2g( j, desca( nb_ ), mycol,
351  $ desca( csrc_ ), npcol ) - ja + 1
352  110 CONTINUE
353  CALL igamx2d( ictxt, 'Columnwise', colctop, 1, 1, info, 1,
354  $ idumm, idumm, -1, -1, mycol )
355  IF( info.NE.0 )
356  $ RETURN
357  ELSE
358 *
359 * Invert the scale factors.
360 *
361  DO 120 j = jja, jja+nq-1
362  c( j ) = one / min( max( c( j ), smlnum ), bignum )
363  120 CONTINUE
364 *
365 * Compute COLCND = min(C(J)) / max(C(J))
366 *
367  colcnd = max( rcmin, smlnum ) / min( rcmax, bignum )
368 *
369  END IF
370 *
371  RETURN
372 *
373 * End of PCGEEQU
374 *
375  END
max
#define max(A, B)
Definition: pcgemr.c:180
pcgeequ
subroutine pcgeequ(M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, AMAX, INFO)
Definition: pcgeequ.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181