ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pzqrt14.f
Go to the documentation of this file.
1  DOUBLE PRECISION FUNCTION pzqrt14( TRANS, M, N, NRHS, A, IA, JA,
2  \$ DESCA, X, IX, JX, DESCX, WORK )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER trans
11  INTEGER ia, ix, ja, jx, m, n, nrhs
12 * ..
13 * .. Array Arguments ..
14  INTEGER desca( * ), descx( * )
15  COMPLEX*16 a( * ), work( * ), x( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PZQRT14 checks whether sub( X ) is in the row space of sub( A ) or
22 * sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and
23 * sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and
24 * X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both
25 * sub( X ) and sub( A ) such that their norms are in the range
26 * [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of
27 * [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of
28 * [sub( A ),sub( X )] otherwise, and returning the norm of the trailing
29 * triangle, scaled by MAX(M,N,NRHS)*eps.
30 *
31 * Notes
32 * =====
33 *
34 * Each global data object is described by an associated description
35 * vector. This vector stores the information required to establish
36 * the mapping between an object element and its corresponding process
37 * and memory location.
38 *
39 * Let A be a generic term for any 2D block cyclicly distributed array.
40 * Such a global array has an associated description vector DESCA.
41 * In the following comments, the character _ should be read as
42 * "of the global array".
43 *
44 * NOTATION STORED IN EXPLANATION
45 * --------------- -------------- --------------------------------------
46 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47 * DTYPE_A = 1.
48 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49 * the BLACS process grid A is distribu-
50 * ted over. The context itself is glo-
51 * bal, but the handle (the integer
52 * value) may vary.
53 * M_A (global) DESCA( M_ ) The number of rows in the global
54 * array A.
55 * N_A (global) DESCA( N_ ) The number of columns in the global
56 * array A.
57 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58 * the rows of the array.
59 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60 * the columns of the array.
61 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62 * row of the array A is distributed.
63 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64 * first column of the array A is
65 * distributed.
66 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67 * array. LLD_A >= MAX(1,LOCr(M_A)).
68 *
69 * Let K be the number of rows or columns of a distributed matrix,
70 * and assume that its process grid has dimension p x q.
71 * LOCr( K ) denotes the number of elements of K that a process
72 * would receive if K were distributed over the p processes of its
73 * process column.
74 * Similarly, LOCc( K ) denotes the number of elements of K that a
75 * process would receive if K were distributed over the q processes of
76 * its process row.
77 * The values of LOCr() and LOCc() may be determined via a call to the
78 * ScaLAPACK tool function, NUMROC:
79 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81 * An upper bound for these quantities may be computed by:
82 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84 *
85 * Arguments
86 * =========
87 *
88 * TRANS (global input) CHARACTER*1
89 * = 'N': No transpose, check for sub( X ) in the row space of
90 * sub( A ),
91 * = 'C': Conjugate transpose, check for sub( X ) in row space
92 * of sub( A )'.
93 *
94 * M (global input) INTEGER
95 * The number of rows to be operated on, i.e. the number of rows
96 * of the distributed submatrix sub( A ). M >= 0.
97 *
98 * N (global input) INTEGER
99 * The number of columns to be operated on, i.e. the number of
100 * columns of the distributed submatrix sub( A ). N >= 0.
101 *
102 * NRHS (global input) INTEGER
103 * The number of right hand sides, i.e., the number of columns
104 * of the distributed submatrix sub( X ). NRHS >= 0.
105 *
106 * A (local input) COMPLEX*16 pointer into the local memory
107 * to an array of dimension (LLD_A, LOCc(JA+N-1)). This array
108 * contains the local pieces of the M-by-N distributed matrix
109 * sub( A ).
110 *
111 * IA (global input) INTEGER
112 * The row index in the global array A indicating the first
113 * row of sub( A ).
114 *
115 * JA (global input) INTEGER
116 * The column index in the global array A indicating the
117 * first column of sub( A ).
118 *
119 * DESCA (global and local input) INTEGER array of dimension DLEN_.
120 * The array descriptor for the distributed matrix A.
121 *
122 * X (local input) COMPLEX*16 pointer into the local
123 * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
124 * On entry, this array contains the local pieces of the
125 * N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',
126 * and the M-by-NRHS distributed submatrix sub( X ) otherwise.
127 *
128 * IX (global input) INTEGER
129 * The row index in the global array X indicating the first
130 * row of sub( X ).
131 *
132 * JX (global input) INTEGER
133 * The column index in the global array X indicating the
134 * first column of sub( X ).
135 *
136 * DESCX (global and local input) INTEGER array of dimension DLEN_.
137 * The array descriptor for the distributed matrix X.
138 *
139 * WORK (local workspace) COMPLEX*16 array dimension (LWORK)
140 * If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and
141 * LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where
142 *
143 * IF TRANS='N', (LQ fact)
144 * MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,
145 * NPROW )
146 * LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,
147 * RSRC_A, NPROW )
148 * LWF = MB_A * ( MB_A + MNRHSP + NQ0 )
149 * ELSE (QR fact)
150 * NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,
151 * NPCOL )
152 * LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,
153 * CSRC_A, NPCOL )
154 * LWF = NB_A * ( NB_A + MP0 + NNRHSQ )
155 * END IF
156 *
157 * and,
158 *
159 * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
160 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
161 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
162 * MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
163 * NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).
164 *
165 * INDXG2P and NUMROC are ScaLAPACK tool functions;
166 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
167 * the subroutine BLACS_GRIDINFO.
168 *
169 *
170 * =====================================================================
171 *
172 * .. Parameters ..
173  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
174  \$ lld_, mb_, m_, nb_, n_, rsrc_
175  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176  \$ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177  \$ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178  DOUBLE PRECISION one, zero
179  parameter( zero = 0.0d+0, one = 1.0d+0 )
180 * ..
181 * .. Local Scalars ..
182  LOGICAL tpsd
183  INTEGER iacol, iarow, icoffa, ictxt, idum, iia, info,
184  \$ iptau, ipw, ipwa, iroffa, iwa, iwx, j, jja,
185  \$ jwa, jwx, ldw, lwork, mpwa, mpw, mqw, mycol,
186  \$ myrow, npcol, nprow, npw, nqwa, nqw
187  DOUBLE PRECISION anrm, err, xnrm
188  COMPLEX*16 amax
189 * ..
190 * .. Local Arrays ..
191  INTEGER descw( dlen_ ), idum1( 1 ), idum2( 1 )
192  DOUBLE PRECISION rwork( 1 )
193 * ..
194 * .. External Functions ..
195  LOGICAL lsame
196  INTEGER numroc
197  DOUBLE PRECISION pdlamch, pzlange
198  EXTERNAL lsame, numroc, pdlamch, pzlange
199 * ..
200 * .. External Subroutines ..
201  EXTERNAL blacs_gridinfo, descset, dgamx2d, infog2l,
202  \$ pxerbla, pzmax1, pzcopy, pzgelqf,
204 * ..
205 * .. Intrinsic Functions ..
206  INTRINSIC abs, dble, max, min, mod
207 * ..
208 * .. Executable Statements ..
209 *
210 * Get grid parameters
211 *
212  ictxt = desca( ctxt_ )
213  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
214 *
215  pzqrt14 = zero
216 *
217  ipwa = 1
218  iroffa = mod( ia-1, desca( mb_ ) )
219  icoffa = mod( ja-1, desca( nb_ ) )
220  iwa = iroffa + 1
221  jwa = icoffa + 1
222  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
223  \$ jja, iarow, iacol )
224  mpwa = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
225  nqwa = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
226 *
227  info = 0
228  IF( lsame( trans, 'N' ) ) THEN
229  IF( n.LE.0 .OR. nrhs.LE.0 )
230  \$ RETURN
231  tpsd = .false.
232  mpw = numroc( m+nrhs+iroffa, desca( mb_ ), myrow, iarow,
233  \$ nprow )
234  nqw = nqwa
235 *
236 * Assign descriptor DESCW for workspace WORK and pointers to
237 * matrices sub( A ) and sub( X ) in workspace
238 *
239  iwx = iwa + m
240  jwx = jwa
241  ldw = max( 1, mpw )
242  CALL descset( descw, m+nrhs+iroffa, n+icoffa, desca( mb_ ),
243  \$ desca( nb_ ), iarow, iacol, ictxt, ldw )
244 *
245  ELSE IF( lsame( trans, 'C' ) ) THEN
246  IF( m.LE.0 .OR. nrhs.LE.0 )
247  \$ RETURN
248  tpsd = .true.
249  mpw = mpwa
250  nqw = numroc( n+nrhs+icoffa, desca( nb_ ), mycol, iacol,
251  \$ npcol )
252 *
253 * Assign descriptor DESCW for workspace WORK and pointers to
254 * matrices sub( A ) and sub( X ) in workspace
255 *
256  iwx = iwa
257  jwx = jwa + n
258  ldw = max( 1, mpw )
259  CALL descset( descw, m+iroffa, n+nrhs+icoffa, desca( mb_ ),
260  \$ desca( nb_ ), iarow, iacol, ictxt, ldw )
261  ELSE
262  CALL pxerbla( ictxt, 'PZQRT14', -1 )
263  RETURN
264  END IF
265 *
266 * Copy and scale sub( A )
267 *
268  iptau = ipwa + mpw*nqw
269  CALL pzlacpy( 'All', m, n, a, ia, ja, desca, work( ipwa ), iwa,
270  \$ jwa, descw )
271  rwork( 1 ) = zero
272  anrm = pzlange( 'M', m, n, work( ipwa ), iwa, jwa, descw, rwork )
273  IF( anrm.NE.zero )
274  \$ CALL pzlascl( 'G', anrm, one, m, n, work( ipwa ), iwa,
275  \$ jwa, descw, info )
276 *
277 * Copy sub( X ) or sub( X )' into the right place and scale it
278 *
279  IF( tpsd ) THEN
280 *
281 * Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work
282 *
283  DO 10 j = 1, nrhs
284  CALL pzcopy( m, x, ix, jx+j-1, descx, 1, work( ipwa ), iwx,
285  \$ jwx+j-1, descw, 1 )
286  10 CONTINUE
287  xnrm = pzlange( 'M', m, nrhs, work( ipwa ), iwx, jwx, descw,
288  \$ rwork )
289  IF( xnrm.NE.zero )
290  \$ CALL pzlascl( 'G', xnrm, one, m, nrhs, work( ipwa ), iwx,
291  \$ jwx, descw, info )
292 *
293 * Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)
294 *
295  mqw = numroc( m+icoffa, desca( nb_ ), mycol, iacol, npcol )
296  ipw = iptau + min( mqw, nqw )
297  lwork = descw( nb_ ) * ( mpw + nqw + descw( nb_ ) )
298  CALL pzgeqrf( m, n+nrhs, work( ipwa ), iwa, jwa, descw,
299  \$ work( iptau ), work( ipw ), lwork, info )
300 *
301 * Compute largest entry in upper triangle of
302 * work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)
303 *
304  err = zero
305  IF( n.LT.m ) THEN
306  DO 20 j = jwx, jwa+n+nrhs-1
307  CALL pzmax1( min(m-n,j-jwx+1), amax, idum, work( ipwa ),
308  \$ iwa+n, j, descw, 1 )
309  err = max( err, abs( amax ) )
310  20 CONTINUE
311  END IF
312  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,
313  \$ -1, -1, 0 )
314 *
315  ELSE
316 *
317 * Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work
318 *
319  DO 30 j = 1, nrhs
320  CALL pzcopy( n, x, ix, jx+j-1, descx, 1, work( ipwa ),
321  \$ iwx+j-1, jwx, descw, descw( m_ ) )
322  CALL pzlacgv( n, work( ipwa ), iwx+j-1, jwx, descw,
323  \$ descw( m_ ) )
324  30 CONTINUE
325 *
326  xnrm = pzlange( 'M', nrhs, n, work( ipwa ), iwx, jwx, descw,
327  \$ rwork )
328  IF( xnrm.NE.zero )
329  \$ CALL pzlascl( 'G', xnrm, one, nrhs, n, work( ipwa ), iwx,
330  \$ jwx, descw, info )
331 *
332 * Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)
333 *
334  npw = numroc( n+iroffa, desca( mb_ ), myrow, iarow, nprow )
335  ipw = iptau + min( mpw, npw )
336  lwork = descw( mb_ ) * ( mpw + nqw + descw( mb_ ) )
337  CALL pzgelqf( m+nrhs, n, work( ipwa ), iwa, jwa, descw,
338  \$ work( iptau ), work( ipw ), lwork, info )
339 *
340 * Compute largest entry in lower triangle in
341 * work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)
342 *
343  err = zero
344  DO 40 j = jwa+m, min( jwa+n-1, jwa+m+nrhs-1 )
345  CALL pzmax1( jwa+m+nrhs-j, amax, idum, work( ipwa ),
346  \$ iwx+j-jwa-m, j, descw, 1 )
347  err = max( err, abs( amax ) )
348  40 CONTINUE
349  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,
350  \$ -1, -1, 0 )
351 *
352  END IF
353 *
354  pzqrt14 = err / ( dble( max( m, n, nrhs ) ) *
355  \$ pdlamch( ictxt, 'Epsilon' ) )
356 *
357  RETURN
358 *
359 * End of PZQRT14
360 *
361  END
pzgeqrf
subroutine pzgeqrf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pzgeqrf.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
pzlange
double precision function pzlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pzlange.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzgelqf
subroutine pzgelqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pzgelqf.f:3
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pzlascl
subroutine pzlascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pzlascl.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pzlacgv
subroutine pzlacgv(N, X, IX, JX, DESCX, INCX)
Definition: pzlacgv.f:2
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pzlacpy
subroutine pzlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pzlacpy.f:3
pzqrt14
double precision function pzqrt14(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, WORK)
Definition: pzqrt14.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pzmax1
subroutine pzmax1(N, AMAX, INDX, X, IX, JX, DESCX, INCX)
Definition: pzmax1.f:2
pdlamch
double precision function pdlamch(ICTXT, CMACH)
Definition: pdblastst.f:6769
min
#define min(A, B)
Definition: pcgemr.c:181