SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pzgebrd()

subroutine pzgebrd ( integer  m,
integer  n,
complex*16, dimension( * )  a,
integer  ia,
integer  ja,
integer, dimension( * )  desca,
double precision, dimension( * )  d,
double precision, dimension( * )  e,
complex*16, dimension( * )  tauq,
complex*16, dimension( * )  taup,
complex*16, dimension( * )  work,
integer  lwork,
integer  info 
)

Definition at line 1 of file pzgebrd.f.

3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 25, 2001
8*
9* .. Scalar Arguments ..
10 INTEGER IA, INFO, JA, LWORK, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 DOUBLE PRECISION D( * ), E( * )
15 COMPLEX*16 A( * ), TAUP( * ), TAUQ( * ), WORK( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PZGEBRD reduces a complex general M-by-N distributed matrix
22* sub( A ) = A(IA:IA+M-1,JA:JA+N-1) to upper or lower bidiagonal
23* form B by an unitary transformation: Q' * sub( A ) * P = B.
24*
25* If M >= N, B is upper bidiagonal; if M < N, B is lower bidiagonal.
26*
27* Notes
28* =====
29*
30* Each global data object is described by an associated description
31* vector. This vector stores the information required to establish
32* the mapping between an object element and its corresponding process
33* and memory location.
34*
35* Let A be a generic term for any 2D block cyclicly distributed array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- --------------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45* the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54* the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56* the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58* row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCr(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCr( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCc( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes of
72* its process row.
73* The values of LOCr() and LOCc() may be determined via a call to the
74* ScaLAPACK tool function, NUMROC:
75* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* M (global input) INTEGER
85* The number of rows to be operated on, i.e. the number of rows
86* of the distributed submatrix sub( A ). M >= 0.
87*
88* N (global input) INTEGER
89* The number of columns to be operated on, i.e. the number of
90* columns of the distributed submatrix sub( A ). N >= 0.
91*
92* A (local input/local output) COMPLEX*16 pointer into the
93* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
94* On entry, this array contains the local pieces of the
95* general distributed matrix sub( A ). On exit, if M >= N,
96* the diagonal and the first superdiagonal of sub( A ) are
97* overwritten with the upper bidiagonal matrix B; the elements
98* below the diagonal, with the array TAUQ, represent the
99* unitary matrix Q as a product of elementary reflectors, and
100* the elements above the first superdiagonal, with the array
101* TAUP, represent the orthogonal matrix P as a product of
102* elementary reflectors. If M < N, the diagonal and the first
103* subdiagonal are overwritten with the lower bidiagonal
104* matrix B; the elements below the first subdiagonal, with the
105* array TAUQ, represent the unitary matrix Q as a product of
106* elementary reflectors, and the elements above the diagonal,
107* with the array TAUP, represent the orthogonal matrix P as a
108* product of elementary reflectors. See Further Details.
109*
110* IA (global input) INTEGER
111* The row index in the global array A indicating the first
112* row of sub( A ).
113*
114* JA (global input) INTEGER
115* The column index in the global array A indicating the
116* first column of sub( A ).
117*
118* DESCA (global and local input) INTEGER array of dimension DLEN_.
119* The array descriptor for the distributed matrix A.
120*
121* D (local output) DOUBLE PRECISION array, dimension
122* LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise.
123* The distributed diagonal elements of the bidiagonal matrix
124* B: D(i) = A(i,i). D is tied to the distributed matrix A.
125*
126* E (local output) DOUBLE PRECISION array, dimension
127* LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise.
128* The distributed off-diagonal elements of the bidiagonal
129* distributed matrix B:
130* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
131* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
132* E is tied to the distributed matrix A.
133*
134* TAUQ (local output) COMPLEX*16 array dimension
135* LOCc(JA+MIN(M,N)-1). The scalar factors of the elementary
136* reflectors which represent the unitary matrix Q. TAUQ is
137* tied to the distributed matrix A. See Further Details.
138*
139* TAUP (local output) COMPLEX*16 array, dimension
140* LOCr(IA+MIN(M,N)-1). The scalar factors of the elementary
141* reflectors which represent the unitary matrix P. TAUP is
142* tied to the distributed matrix A. See Further Details.
143*
144* WORK (local workspace/local output) COMPLEX*16 array,
145* dimension (LWORK)
146* On exit, WORK( 1 ) returns the minimal and optimal LWORK.
147*
148* LWORK (local or global input) INTEGER
149* The dimension of the array WORK.
150* LWORK is local input and must be at least
151* LWORK >= NB*( MpA0 + NqA0 + 1 ) + NqA0
152*
153* where NB = MB_A = NB_A,
154* IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ),
155* IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ),
156* IACOL = INDXG2P( JA, NB, MYCOL, CSRC_A, NPCOL ),
157* MpA0 = NUMROC( M+IROFFA, NB, MYROW, IAROW, NPROW ),
158* NqA0 = NUMROC( N+ICOFFA, NB, MYCOL, IACOL, NPCOL ).
159*
160* INDXG2P and NUMROC are ScaLAPACK tool functions;
161* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
162* the subroutine BLACS_GRIDINFO.
163*
164* If LWORK = -1, then LWORK is global input and a workspace
165* query is assumed; the routine only calculates the minimum
166* and optimal size for all work arrays. Each of these
167* values is returned in the first entry of the corresponding
168* work array, and no error message is issued by PXERBLA.
169*
170* INFO (global output) INTEGER
171* = 0: successful exit
172* < 0: If the i-th argument is an array and the j-entry had
173* an illegal value, then INFO = -(i*100+j), if the i-th
174* argument is a scalar and had an illegal value, then
175* INFO = -i.
176*
177* Further Details
178* ===============
179*
180* The matrices Q and P are represented as products of elementary
181* reflectors:
182*
183* If m >= n,
184*
185* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
186*
187* Each H(i) and G(i) has the form:
188*
189* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
190*
191* where tauq and taup are complex scalars, and v and u are complex
192* vectors;
193* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
194* A(ia+i:ia+m-1,ja+i-1);
195* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
196* A(ia+i-1,ja+i+1:ja+n-1);
197* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
198*
199* If m < n,
200*
201* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
202*
203* Each H(i) and G(i) has the form:
204*
205* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u'
206*
207* where tauq and taup are complex scalars, and v and u are complex
208* vectors;
209* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
210* A(ia+i+1:ia+m-1,ja+i-1);
211* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
212* A(ia+i-1,ja+i:ja+n-1);
213* tauq is stored in TAUQ(ja+i-1) and taup in TAUP(ia+i-1).
214*
215* The contents of sub( A ) on exit are illustrated by the following
216* examples:
217*
218* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
219*
220* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
221* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
222* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
223* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
224* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
225* ( v1 v2 v3 v4 v5 )
226*
227* where d and e denote diagonal and off-diagonal elements of B, vi
228* denotes an element of the vector defining H(i), and ui an element of
229* the vector defining G(i).
230*
231* Alignment requirements
232* ======================
233*
234* The distributed submatrix sub( A ) must verify some alignment proper-
235* ties, namely the following expressions should be true:
236* ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
237*
238* =====================================================================
239*
240* .. Parameters ..
241 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
242 $ LLD_, MB_, M_, NB_, N_, RSRC_
243 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
244 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
245 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
246 COMPLEX*16 ONE
247 parameter( one = ( 1.0d+0, 0.0d+0 ) )
248* ..
249* .. Local Scalars ..
250 LOGICAL LQUERY
251 CHARACTER COLCTOP, ROWCTOP
252 INTEGER I, IACOL, IAROW, ICTXT, IINFO, IOFF, IPW, IPY,
253 $ IW, J, JB, JS, JW, K, L, LWMIN, MN, MP, MYCOL,
254 $ MYROW, NB, NPCOL, NPROW, NQ
255* ..
256* .. Local Arrays ..
257 INTEGER DESCWX( DLEN_ ), DESCWY( DLEN_ ), IDUM1( 1 ),
258 $ IDUM2( 1 )
259* ..
260* .. External Subroutines ..
261 EXTERNAL blacs_gridinfo, chk1mat, descset, pchk1mat,
262 $ pb_topget, pb_topset, pxerbla, pzelset,
263 $ pzgebd2, pzgemm, pzlabrd
264* ..
265* .. External Functions ..
266 INTEGER INDXG2L, INDXG2P, NUMROC
267 EXTERNAL indxg2l, indxg2p, numroc
268* ..
269* .. Intrinsic Functions ..
270 INTRINSIC dcmplx, dble, max, min, mod
271* ..
272* .. Executable Statements ..
273*
274* Get grid parameters
275*
276 ictxt = desca( ctxt_ )
277 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
278*
279* Test the input parameters
280*
281 info = 0
282 IF( nprow.EQ.-1 ) THEN
283 info = -(600+ctxt_)
284 ELSE
285 CALL chk1mat( m, 1, n, 2, ia, ja, desca, 6, info )
286 IF( info.EQ.0 ) THEN
287 nb = desca( mb_ )
288 ioff = mod( ia-1, desca( mb_ ) )
289 iarow = indxg2p( ia, nb, myrow, desca( rsrc_ ), nprow )
290 iacol = indxg2p( ja, nb, mycol, desca( csrc_ ), npcol )
291 mp = numroc( m+ioff, nb, myrow, iarow, nprow )
292 nq = numroc( n+ioff, nb, mycol, iacol, npcol )
293 lwmin = nb*( mp+nq+1 ) + nq
294*
295 work( 1 ) = dcmplx( dble( lwmin ) )
296 lquery = ( lwork.EQ.-1 )
297 IF( ioff.NE.mod( ja-1, desca( nb_ ) ) ) THEN
298 info = -5
299 ELSE IF( nb.NE.desca( nb_ ) ) THEN
300 info = -(600+nb_)
301 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
302 info = -12
303 END IF
304 END IF
305 IF( lquery ) THEN
306 idum1( 1 ) = -1
307 ELSE
308 idum1( 1 ) = 1
309 END IF
310 idum2( 1 ) = 12
311 CALL pchk1mat( m, 1, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
312 $ info )
313 END IF
314*
315 IF( info.LT.0 ) THEN
316 CALL pxerbla( ictxt, 'PZGEBRD', -info )
317 RETURN
318 ELSE IF( lquery ) THEN
319 RETURN
320 END IF
321*
322* Quick return if possible
323*
324 mn = min( m, n )
325 IF( mn.EQ.0 )
326 $ RETURN
327*
328* Initialize parameters.
329*
330 CALL pb_topget( ictxt, 'Combine', 'Columnwise', colctop )
331 CALL pb_topget( ictxt, 'Combine', 'Rowwise', rowctop )
332 CALL pb_topset( ictxt, 'Combine', 'Columnwise', '1-tree' )
333 CALL pb_topset( ictxt, 'Combine', 'Rowwise', '1-tree' )
334*
335 ipy = mp * nb + 1
336 ipw = nq * nb + ipy
337*
338 CALL descset( descwx, m+ioff, nb, nb, nb, iarow, iacol, ictxt,
339 $ max( 1, mp ) )
340 CALL descset( descwy, nb, n+ioff, nb, nb, iarow, iacol, ictxt,
341 $ nb )
342*
343 mp = numroc( m+ia-1, nb, myrow, desca( rsrc_ ), nprow )
344 nq = numroc( n+ja-1, nb, mycol, desca( csrc_ ), npcol )
345 k = 1
346 jb = nb - ioff
347 iw = ioff + 1
348 jw = ioff + 1
349*
350 DO 10 l = 1, mn+ioff-nb, nb
351 i = ia + k - 1
352 j = ja + k - 1
353*
354* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
355* the matrices X and Y which are needed to update the unreduced
356* part of the matrix.
357*
358 CALL pzlabrd( m-k+1, n-k+1, jb, a, i, j, desca, d, e, tauq,
359 $ taup, work, iw, jw, descwx, work( ipy ), iw,
360 $ jw, descwy, work( ipw ) )
361*
362* Update the trailing submatrix A(i+nb:ia+m-1,j+nb:ja+n-1), using
363* an update of the form A := A - V*Y' - X*U'.
364*
365 CALL pzgemm( 'No transpose', 'No transpose', m-k-jb+1,
366 $ n-k-jb+1, jb, -one, a, i+jb, j, desca,
367 $ work( ipy ), iw, jw+jb, descwy, one, a, i+jb,
368 $ j+jb, desca )
369 CALL pzgemm( 'No transpose', 'No transpose', m-k-jb+1,
370 $ n-k-jb+1, jb, -one, work, iw+jb, jw, descwx, a, i,
371 $ j+jb, desca, one, a, i+jb, j+jb, desca )
372*
373* Copy last off-diagonal elements of B back into sub( A ).
374*
375 IF( m.GE.n ) THEN
376 js = min( indxg2l( i+jb-1, nb, 0, desca( rsrc_ ), nprow ),
377 $ mp )
378 IF( js.GT.0 )
379 $ CALL pzelset( a, i+jb-1, j+jb, desca, dcmplx( e( js ) ) )
380 ELSE
381 js = min( indxg2l( j+jb-1, nb, 0, desca( csrc_ ), npcol ),
382 $ nq )
383 IF( js.GT.0 )
384 $ CALL pzelset( a, i+jb, j+jb-1, desca, dcmplx( e( js ) ) )
385 END IF
386*
387 k = k + jb
388 jb = nb
389 iw = 1
390 jw = 1
391 descwx( m_ ) = descwx( m_ ) - jb
392 descwx( rsrc_ ) = mod( descwx( rsrc_ ) + 1, nprow )
393 descwx( csrc_ ) = mod( descwx( csrc_ ) + 1, npcol )
394 descwy( n_ ) = descwy( n_ ) - jb
395 descwy( rsrc_ ) = mod( descwy( rsrc_ ) + 1, nprow )
396 descwy( csrc_ ) = mod( descwy( csrc_ ) + 1, npcol )
397*
398 10 CONTINUE
399*
400* Use unblocked code to reduce the remainder of the matrix.
401*
402 CALL pzgebd2( m-k+1, n-k+1, a, ia+k-1, ja+k-1, desca, d, e, tauq,
403 $ taup, work, lwork, iinfo )
404*
405 CALL pb_topset( ictxt, 'Combine', 'Columnwise', colctop )
406 CALL pb_topset( ictxt, 'Combine', 'Rowwise', rowctop )
407*
408 work( 1 ) = dcmplx( dble( lwmin ) )
409*
410 RETURN
411*
412* End of PZGEBRD
413*
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
integer function indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2l.f:2
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2p.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzelset(a, ia, ja, desca, alpha)
Definition pzelset.f:2
subroutine pzgebd2(m, n, a, ia, ja, desca, d, e, tauq, taup, work, lwork, info)
Definition pzgebd2.f:3
subroutine pzlabrd(m, n, nb, a, ia, ja, desca, d, e, tauq, taup, x, ix, jx, descx, y, iy, jy, descy, work)
Definition pzlabrd.f:3
Here is the call graph for this function:
Here is the caller graph for this function: