ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlassq.f
Go to the documentation of this file.
1  SUBROUTINE pdlassq( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ )
2 *
3 * -- ScaLAPACK auxiliary routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * .. Scalar Arguments ..
9  INTEGER IX, INCX, JX, N
10  DOUBLE PRECISION SCALE, SUMSQ
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCX( * )
14  DOUBLE PRECISION X( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDLASSQ returns the values scl and smsq such that
21 *
22 * ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
23 *
24 * where x( i ) = sub( X ) = X( IX+(JX-1)*DESCX(M_)+(i-1)*INCX ).
25 * The value of sumsq is assumed to be non-negative and scl returns the
26 * value
27 *
28 * scl = max( scale, abs( x( i ) ) ).
29 *
30 * scale and sumsq must be supplied in SCALE and SUMSQ respectively.
31 * SCALE and SUMSQ are overwritten by scl and ssq respectively.
32 *
33 * The routine makes only one pass through the vector sub( X ).
34 *
35 * Notes
36 * =====
37 *
38 * Each global data object is described by an associated description
39 * vector. This vector stores the information required to establish
40 * the mapping between an object element and its corresponding process
41 * and memory location.
42 *
43 * Let A be a generic term for any 2D block cyclicly distributed array.
44 * Such a global array has an associated description vector DESCA.
45 * In the following comments, the character _ should be read as
46 * "of the global array".
47 *
48 * NOTATION STORED IN EXPLANATION
49 * --------------- -------------- --------------------------------------
50 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
51 * DTYPE_A = 1.
52 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
53 * the BLACS process grid A is distribu-
54 * ted over. The context itself is glo-
55 * bal, but the handle (the integer
56 * value) may vary.
57 * M_A (global) DESCA( M_ ) The number of rows in the global
58 * array A.
59 * N_A (global) DESCA( N_ ) The number of columns in the global
60 * array A.
61 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
62 * the rows of the array.
63 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
64 * the columns of the array.
65 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
66 * row of the array A is distributed.
67 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
68 * first column of the array A is
69 * distributed.
70 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
71 * array. LLD_A >= MAX(1,LOCr(M_A)).
72 *
73 * Let K be the number of rows or columns of a distributed matrix,
74 * and assume that its process grid has dimension p x q.
75 * LOCr( K ) denotes the number of elements of K that a process
76 * would receive if K were distributed over the p processes of its
77 * process column.
78 * Similarly, LOCc( K ) denotes the number of elements of K that a
79 * process would receive if K were distributed over the q processes of
80 * its process row.
81 * The values of LOCr() and LOCc() may be determined via a call to the
82 * ScaLAPACK tool function, NUMROC:
83 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
84 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
85 * An upper bound for these quantities may be computed by:
86 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
87 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
88 *
89 * Because vectors may be viewed as a subclass of matrices, a
90 * distributed vector is considered to be a distributed matrix.
91 *
92 * The result are only available in the scope of sub( X ), i.e if
93 * sub( X ) is distributed along a process row, the correct results are
94 * only available in this process row of the grid. Similarly if sub( X )
95 * is distributed along a process column, the correct results are only
96 * available in this process column of the grid.
97 *
98 * Arguments
99 * =========
100 *
101 * N (global input) INTEGER
102 * The length of the distributed vector sub( X ).
103 *
104 * X (input) DOUBLE PRECISION
105 * The vector for which a scaled sum of squares is computed.
106 * x( i ) = X(IX+(JX-1)*M_X +(i-1)*INCX ), 1 <= i <= n.
107 *
108 * IX (global input) INTEGER
109 * The row index in the global array X indicating the first
110 * row of sub( X ).
111 *
112 * JX (global input) INTEGER
113 * The column index in the global array X indicating the
114 * first column of sub( X ).
115 *
116 * DESCX (global and local input) INTEGER array of dimension DLEN_.
117 * The array descriptor for the distributed matrix X.
118 *
119 * INCX (global input) INTEGER
120 * The global increment for the elements of X. Only two values
121 * of INCX are supported in this version, namely 1 and M_X.
122 * INCX must not be zero.
123 *
124 * SCALE (local input/local output) DOUBLE PRECISION
125 * On entry, the value scale in the equation above.
126 * On exit, SCALE is overwritten with scl , the scaling factor
127 * for the sum of squares.
128 *
129 * SUMSQ (local input/local output) DOUBLE PRECISION
130 * On entry, the value sumsq in the equation above.
131 * On exit, SUMSQ is overwritten with smsq , the basic sum of
132 * squares from which scl has been factored out.
133 *
134 * =====================================================================
135 *
136 * .. Parameters ..
137  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
138  $ LLD_, MB_, M_, NB_, N_, RSRC_
139  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
140  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
141  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
142  DOUBLE PRECISION ZERO
143  parameter( zero = 0.0d+0 )
144 * ..
145 * .. Local Scalars ..
146  INTEGER I, ICOFF, ICTXT, IIX, IOFF, IROFF, IXCOL,
147  $ IXROW, JJX, LDX, MYCOL, MYROW, NP, NPCOL,
148  $ NPROW, NQ
149  DOUBLE PRECISION TEMP1
150 * ..
151 * .. Local Arrays ..
152  DOUBLE PRECISION WORK( 2 )
153 * ..
154 * .. External Subroutines ..
155  EXTERNAL blacs_gridinfo, dcombssq, infog2l, pdtreecomb
156 * ..
157 * .. External Functions ..
158  INTEGER NUMROC
159  EXTERNAL numroc
160 * ..
161 * .. Intrinsic Functions ..
162  INTRINSIC abs, mod
163 * ..
164 * .. Executable Statements ..
165 *
166 * Get grid parameters.
167 *
168  ictxt = descx( ctxt_ )
169  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
170 *
171 * Figure local indexes
172 *
173  CALL infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix, jjx,
174  $ ixrow, ixcol )
175 *
176  ldx = descx( lld_ )
177  IF( incx.EQ.descx( m_ ) ) THEN
178 *
179 * X is rowwise distributed.
180 *
181  IF( myrow.NE.ixrow )
182  $ RETURN
183  icoff = mod( jx, descx( nb_ ) )
184  nq = numroc( n+icoff, descx( nb_ ), mycol, ixcol, npcol )
185  IF( mycol.EQ.ixcol )
186  $ nq = nq - icoff
187 *
188 * Code direct from LAPACK's DLASSQ, (save subroutine call)
189 *
190  IF( nq.GT.0 ) THEN
191  ioff = iix + ( jjx - 1 ) * ldx
192  DO 10 i = 1, nq
193  IF( x( ioff ).NE.zero ) THEN
194  temp1 = abs( x( ioff ) )
195  IF( scale.LT.temp1 ) THEN
196  sumsq = 1 + sumsq * ( scale / temp1 )**2
197  scale = temp1
198  ELSE
199  sumsq = sumsq + ( temp1 / scale )**2
200  END IF
201  END IF
202  ioff = ioff + ldx
203  10 CONTINUE
204  END IF
205 *
206 * Take local result and find global
207 *
208  work( 1 ) = scale
209  work( 2 ) = sumsq
210 *
211  CALL pdtreecomb( ictxt, 'Rowwise', 2, work, -1, ixcol,
212  $ dcombssq )
213 *
214  scale = work( 1 )
215  sumsq = work( 2 )
216 *
217  ELSE IF( incx.EQ.1 ) THEN
218 *
219 * X is columnwise distributed.
220 *
221  IF( mycol.NE.ixcol )
222  $ RETURN
223  iroff = mod( ix, descx( mb_ ) )
224  np = numroc( n+iroff, descx( mb_ ), myrow, ixrow, nprow )
225  IF( myrow.EQ.ixrow )
226  $ np = np - iroff
227 *
228 * Code direct from LAPACK's DLASSQ, (save subroutine call)
229 *
230  IF( np.GT.0 ) THEN
231  ioff = iix + ( jjx - 1 ) * ldx
232  DO 20 i = 1, np
233  IF( x( ioff ).NE.zero ) THEN
234  temp1 = abs( x( ioff ) )
235  IF( scale.LT.temp1 ) THEN
236  sumsq = 1 + sumsq*( scale / temp1 )**2
237  scale = temp1
238  ELSE
239  sumsq = sumsq + ( temp1 / scale )**2
240  END IF
241  END IF
242  ioff = ioff + 1
243  20 CONTINUE
244  END IF
245 *
246 * Take local result and find global
247 *
248  work( 1 ) = scale
249  work( 2 ) = sumsq
250 *
251  CALL pdtreecomb( ictxt, 'Columnwise', 2, work, -1, ixcol,
252  $ dcombssq )
253 *
254  scale = work( 1 )
255  sumsq = work( 2 )
256 *
257  END IF
258 *
259  RETURN
260 *
261 * End of PDLASSQ
262 *
263  END
pdlassq
subroutine pdlassq(N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ)
Definition: pdlassq.f:2
pdtreecomb
subroutine pdtreecomb(ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, SUBPTR)
Definition: pdtreecomb.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
dcombssq
subroutine dcombssq(V1, V2)
Definition: pdtreecomb.f:259