ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdggrqf.f
Go to the documentation of this file.
1  SUBROUTINE pdggrqf( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB,
2  $ DESCB, TAUB, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * ), DESCB( * )
14  DOUBLE PRECISION A( * ), B( * ), TAUA( * ), TAUB( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PDGGRQF computes a generalized RQ factorization of
21 * an M-by-N matrix sub( A ) = A(IA:IA+M-1,JA:JA+N-1)
22 * and a P-by-N matrix sub( B ) = B(IB:IB+P-1,JB:JB+N-1):
23 *
24 * sub( A ) = R*Q, sub( B ) = Z*T*Q,
25 *
26 * where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
27 * matrix, and R and T assume one of the forms:
28 *
29 * if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N,
30 * N-M M ( R21 ) N
31 * N
32 *
33 * where R12 or R21 is upper triangular, and
34 *
35 * if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P,
36 * ( 0 ) P-N P N-P
37 * N
38 *
39 * where T11 is upper triangular.
40 *
41 * In particular, if sub( B ) is square and nonsingular, the GRQ
42 * factorization of sub( A ) and sub( B ) implicitly gives the RQ
43 * factorization of sub( A )*inv( sub( B ) ):
44 *
45 * sub( A )*inv( sub( B ) ) = (R*inv(T))*Z'
46 *
47 * where inv( sub( B ) ) denotes the inverse of the matrix sub( B ),
48 * and Z' denotes the transpose of matrix Z.
49 *
50 * Notes
51 * =====
52 *
53 * Each global data object is described by an associated description
54 * vector. This vector stores the information required to establish
55 * the mapping between an object element and its corresponding process
56 * and memory location.
57 *
58 * Let A be a generic term for any 2D block cyclicly distributed array.
59 * Such a global array has an associated description vector DESCA.
60 * In the following comments, the character _ should be read as
61 * "of the global array".
62 *
63 * NOTATION STORED IN EXPLANATION
64 * --------------- -------------- --------------------------------------
65 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
66 * DTYPE_A = 1.
67 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
68 * the BLACS process grid A is distribu-
69 * ted over. The context itself is glo-
70 * bal, but the handle (the integer
71 * value) may vary.
72 * M_A (global) DESCA( M_ ) The number of rows in the global
73 * array A.
74 * N_A (global) DESCA( N_ ) The number of columns in the global
75 * array A.
76 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
77 * the rows of the array.
78 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
79 * the columns of the array.
80 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
81 * row of the array A is distributed.
82 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
83 * first column of the array A is
84 * distributed.
85 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
86 * array. LLD_A >= MAX(1,LOCr(M_A)).
87 *
88 * Let K be the number of rows or columns of a distributed matrix,
89 * and assume that its process grid has dimension p x q.
90 * LOCr( K ) denotes the number of elements of K that a process
91 * would receive if K were distributed over the p processes of its
92 * process column.
93 * Similarly, LOCc( K ) denotes the number of elements of K that a
94 * process would receive if K were distributed over the q processes of
95 * its process row.
96 * The values of LOCr() and LOCc() may be determined via a call to the
97 * ScaLAPACK tool function, NUMROC:
98 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
99 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
100 * An upper bound for these quantities may be computed by:
101 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
102 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
103 *
104 * Arguments
105 * =========
106 *
107 * M (global input) INTEGER
108 * The number of rows to be operated on i.e the number of
109 * rows of the distributed submatrix sub( A ). M >= 0.
110 *
111 * P (global input) INTEGER
112 * The number of rows to be operated on i.e the number of
113 * rows of the distributed submatrix sub( B ). P >= 0.
114 *
115 * N (global input) INTEGER
116 * The number of columns to be operated on i.e the number of
117 * columns of the distributed submatrices sub( A ) and sub( B ).
118 * N >= 0.
119 *
120 * A (local input/local output) DOUBLE PRECISION pointer into the
121 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122 * On entry, the local pieces of the M-by-N distributed matrix
123 * sub( A ) which is to be factored. On exit, if M <= N, the
124 * upper triangle of A( IA:IA+M-1, JA+N-M:JA+N-1 ) contains the
125 * M by M upper triangular matrix R; if M >= N, the elements on
126 * and above the (M-N)-th subdiagonal contain the M by N upper
127 * trapezoidal matrix R; the remaining elements, with the array
128 * TAUA, represent the orthogonal matrix Q as a product of
129 * elementary reflectors (see Further Details).
130 *
131 * IA (global input) INTEGER
132 * The row index in the global array A indicating the first
133 * row of sub( A ).
134 *
135 * JA (global input) INTEGER
136 * The column index in the global array A indicating the
137 * first column of sub( A ).
138 *
139 * DESCA (global and local input) INTEGER array of dimension DLEN_.
140 * The array descriptor for the distributed matrix A.
141 *
142 * TAUA (local output) DOUBLE PRECISION array, dimension LOCr(IA+M-1)
143 * This array contains the scalar factors of the elementary
144 * reflectors which represent the orthogonal unitary matrix Q.
145 * TAUA is tied to the distributed matrix A (see Further
146 * Details).
147 *
148 * B (local input/local output) DOUBLE PRECISION pointer into the
149 * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
150 * On entry, the local pieces of the P-by-N distributed matrix
151 * sub( B ) which is to be factored. On exit, the elements on
152 * and above the diagonal of sub( B ) contain the min(P,N) by N
153 * upper trapezoidal matrix T (T is upper triangular if P >= N);
154 * the elements below the diagonal, with the array TAUB,
155 * represent the orthogonal matrix Z as a product of elementary
156 * reflectors (see Further Details).
157 *
158 * IB (global input) INTEGER
159 * The row index in the global array B indicating the first
160 * row of sub( B ).
161 *
162 * JB (global input) INTEGER
163 * The column index in the global array B indicating the
164 * first column of sub( B ).
165 *
166 * DESCB (global and local input) INTEGER array of dimension DLEN_.
167 * The array descriptor for the distributed matrix B.
168 *
169 * TAUB (local output) DOUBLE PRECISION array, dimension
170 * LOCc(JB+MIN(P,N)-1). This array contains the scalar factors
171 * TAUB of the elementary reflectors which represent the
172 * orthogonal matrix Z. TAUB is tied to the distributed matrix
173 * B (see Further Details).
174 *
175 * WORK (local workspace/local output) DOUBLE PRECISION array,
176 * dimension (LWORK)
177 * On exit, WORK(1) returns the minimal and optimal LWORK.
178 *
179 * LWORK (local or global input) INTEGER
180 * The dimension of the array WORK.
181 * LWORK is local input and must be at least
182 * LWORK >= MAX( MB_A * ( MpA0 + NqA0 + MB_A ),
183 * MAX( (MB_A*(MB_A-1))/2, (PpB0 + NqB0)*MB_A ) +
184 * MB_A * MB_A,
185 * NB_B * ( PpB0 + NqB0 + NB_B ) ), where
186 *
187 * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
188 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
189 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
190 * MpA0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
191 * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
192 *
193 * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ),
194 * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ),
195 * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ),
196 * PpB0 = NUMROC( P+IROFFB, MB_B, MYROW, IBROW, NPROW ),
197 * NqB0 = NUMROC( N+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ),
198 *
199 * and NUMROC, INDXG2P are ScaLAPACK tool functions;
200 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
201 * the subroutine BLACS_GRIDINFO.
202 *
203 * If LWORK = -1, then LWORK is global input and a workspace
204 * query is assumed; the routine only calculates the minimum
205 * and optimal size for all work arrays. Each of these
206 * values is returned in the first entry of the corresponding
207 * work array, and no error message is issued by PXERBLA.
208 *
209 * INFO (global output) INTEGER
210 * = 0: successful exit
211 * < 0: If the i-th argument is an array and the j-entry had
212 * an illegal value, then INFO = -(i*100+j), if the i-th
213 * argument is a scalar and had an illegal value, then
214 * INFO = -i.
215 *
216 * Further Details
217 * ===============
218 *
219 * The matrix Q is represented as a product of elementary reflectors
220 *
221 * Q = H(ia) H(ia+1) . . . H(ia+k-1), where k = min(m,n).
222 *
223 * Each H(i) has the form
224 *
225 * H(i) = I - taua * v * v'
226 *
227 * where taua is a real scalar, and v is a real vector with
228 * v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
229 * A(ia+m-k+i-1,ja:ja+n-k+i-2), and taua in TAUA(ia+m-k+i-1).
230 * To form Q explicitly, use ScaLAPACK subroutine PDORGRQ.
231 * To use Q to update another matrix, use ScaLAPACK subroutine PDORMRQ.
232 *
233 * The matrix Z is represented as a product of elementary reflectors
234 *
235 * Z = H(jb) H(jb+1) . . . H(jb+k-1), where k = min(p,n).
236 *
237 * Each H(i) has the form
238 *
239 * H(i) = I - taub * v * v'
240 *
241 * where taub is a real scalar, and v is a real vector with
242 * v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in
243 * B(ib+i:ib+p-1,jb+i-1), and taub in TAUB(jb+i-1).
244 * To form Z explicitly, use ScaLAPACK subroutine PDORGQR.
245 * To use Z to update another matrix, use ScaLAPACK subroutine PDORMQR.
246 *
247 * Alignment requirements
248 * ======================
249 *
250 * The distributed submatrices sub( A ) and sub( B ) must verify some
251 * alignment properties, namely the following expression should be true:
252 *
253 * ( NB_A.EQ.NB_B .AND. ICOFFA.EQ.ICOFFB .AND. IACOL.EQ.IBCOL )
254 *
255 * =====================================================================
256 *
257 * .. Parameters ..
258  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
259  $ lld_, mb_, m_, nb_, n_, rsrc_
260  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
261  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
262  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
263 * .. Local Scalars ..
264  LOGICAL LQUERY
265  INTEGER IACOL, IAROW, IBCOL, IBROW, ICOFFA, ICOFFB,
266  $ ictxt, iroffa, iroffb, lwmin, mpa0, mycol,
267  $ myrow, npcol, nprow, nqa0, nqb0, ppb0
268 * ..
269 * .. External Subroutines ..
270  EXTERNAL blacs_gridinfo, chk1mat, pchk2mat, pdgeqrf,
272 * ..
273 * .. Local Arrays ..
274  INTEGER IDUM1( 1 ), IDUM2( 1 )
275 * ..
276 * .. External Functions ..
277  INTEGER INDXG2P, NUMROC
278  EXTERNAL indxg2p, numroc
279 * ..
280 * .. Intrinsic Functions ..
281  INTRINSIC dble, int, max, min, mod
282 * ..
283 * .. Executable Statements ..
284 *
285 * Get grid parameters
286 *
287  ictxt = desca( ctxt_ )
288  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
289 *
290 * Test the input parameters
291 *
292  info = 0
293  IF( nprow.EQ.-1 ) THEN
294  info = -707
295  ELSE
296  CALL chk1mat( m, 1, n, 3, ia, ja, desca, 7, info )
297  CALL chk1mat( p, 2, n, 3, ib, jb, descb, 12, info )
298  IF( info.EQ.0 ) THEN
299  iroffa = mod( ia-1, desca( mb_ ) )
300  icoffa = mod( ja-1, desca( nb_ ) )
301  iroffb = mod( ib-1, descb( mb_ ) )
302  icoffb = mod( jb-1, descb( nb_ ) )
303  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
304  $ nprow )
305  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
306  $ npcol )
307  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
308  $ nprow )
309  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
310  $ npcol )
311  mpa0 = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
312  nqa0 = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
313  ppb0 = numroc( p+iroffb, descb( mb_ ), myrow, ibrow, nprow )
314  nqb0 = numroc( n+icoffb, descb( nb_ ), mycol, ibcol, npcol )
315  lwmin = max( desca( mb_ ) * ( mpa0 + nqa0 + desca( mb_ ) ),
316  $ max( max( ( desca( mb_ )*( desca( mb_ ) - 1 ) ) / 2,
317  $ ( ppb0 + nqb0 ) * desca( mb_ ) ) +
318  $ desca( mb_ ) * desca( mb_ ),
319  $ descb( nb_ ) * ( ppb0 + nqb0 + descb( nb_ ) ) ) )
320 *
321  work( 1 ) = dble( lwmin )
322  lquery = ( lwork.EQ.-1 )
323  IF( iacol.NE.ibcol .OR. icoffa.NE.icoffb ) THEN
324  info = -11
325  ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
326  info = -1204
327  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
328  info = -1207
329  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
330  info = -15
331  END IF
332  END IF
333  IF( lwork.EQ.-1 ) THEN
334  idum1( 1 ) = -1
335  ELSE
336  idum1( 1 ) = 1
337  END IF
338  idum2( 1 ) = 15
339  CALL pchk2mat( m, 1, n, 3, ia, ja, desca, 7, p, 2, n, 3, ib,
340  $ jb, descb, 12, 1, idum1, idum2, info )
341  END IF
342 *
343  IF( info.NE.0 ) THEN
344  CALL pxerbla( ictxt, 'PDGGRQF', -info )
345  RETURN
346  ELSE IF( lquery ) THEN
347  RETURN
348  END IF
349 *
350 * RQ factorization of M-by-N matrix sub( A ): sub( A ) = R*Q
351 *
352  CALL pdgerqf( m, n, a, ia, ja, desca, taua, work, lwork, info )
353  lwmin = int( work( 1 ) )
354 *
355 * Update sub( B ) := sub( B )*Q'
356 *
357  CALL pdormrq( 'Right', 'Transpose', p, n, min( m, n ), a,
358  $ max( ia, ia+m-n ), ja, desca, taua, b, ib, jb,
359  $ descb, work, lwork, info )
360  lwmin = max( lwmin, int( work( 1 ) ) )
361 *
362 * QR factorization of P-by-N matrix sub( B ): sub( B ) = Z*T
363 *
364  CALL pdgeqrf( p, n, b, ib, jb, descb, taub, work, lwork, info )
365  work( 1 ) = dble( max( lwmin, int( work( 1 ) ) ) )
366 *
367  RETURN
368 *
369 * End of PDGGRQF
370 *
371  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdgerqf
subroutine pdgerqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgerqf.f:3
pdggrqf
subroutine pdggrqf(M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO)
Definition: pdggrqf.f:3
pdormrq
subroutine pdormrq(SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: pdormrq.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pdgeqrf
subroutine pdgeqrf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: pdgeqrf.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181