ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psgehdrv.f
Go to the documentation of this file.
1  SUBROUTINE psgehdrv( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * .. Scalar Arguments ..
9  INTEGER IA, IHI, ILO, JA, N
10 * ..
11 * .. Array Arguments ..
12  INTEGER DESCA( * )
13  REAL A( * ), TAU( * ), WORK( * )
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * PSGEHDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from the
20 * orthogonal matrix Q, the Hessenberg matrix, and the array TAU
21 * returned by PSGEHRD:
22 * sub( A ) := Q * H * Q'
23 *
24 * Arguments
25 * =========
26 *
27 * N (global input) INTEGER
28 * The number of rows and columns to be operated on, i.e. the
29 * order of the distributed submatrix sub( A ). N >= 0.
30 *
31 * ILO (global input) INTEGER
32 * IHI (global input) INTEGER
33 * It is assumed that sub( A ) is already upper triangular in
34 * rows and columns 1:ILO-1 and IHI+1:N. If N > 0,
35 * 1 <= ILO <= IHI <= N; otherwise set ILO = 1, IHI = N.
36 *
37 * A (local input/local output) REAL pointer into the
38 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
39 * On entry, this array contains the local pieces of the N-by-N
40 * general distributed matrix sub( A ) reduced to Hessenberg
41 * form by PSGEHRD. The upper triangle and the first sub-
42 * diagonal of sub( A ) contain the upper Hessenberg matrix H,
43 * and the elements below the first subdiagonal, with the array
44 * TAU, represent the orthogonal matrix Q as a product of
45 * elementary reflectors. On exit, the original distributed
46 * N-by-N matrix sub( A ) is recovered.
47 *
48 * IA (global input) INTEGER
49 * The row index in the global array A indicating the first
50 * row of sub( A ).
51 *
52 * JA (global input) INTEGER
53 * The column index in the global array A indicating the
54 * first column of sub( A ).
55 *
56 * DESCA (global and local input) INTEGER array of dimension DLEN_.
57 * The array descriptor for the distributed matrix A.
58 *
59 * TAU (local input) REAL array, dimension LOCc(JA+N-2)
60 * The scalar factors of the elementary reflectors returned by
61 * PSGEHRD. TAU is tied to the distributed matrix A.
62 *
63 * WORK (local workspace) REAL array, dimension (LWORK).
64 * LWORK >= NB*NB + NB*IHLP + MAX[ NB*( IHLP+INLQ ),
65 * NB*( IHLQ + MAX[ IHIP,
66 * IHLP+NUMROC( NUMROC( IHI-ILO+LOFF+1, NB, 0, 0,
67 * NPCOL ), NB, 0, 0, LCMQ ) ] ) ]
68 *
69 * where NB = MB_A = NB_A,
70 * LCM is the least common multiple of NPROW and NPCOL,
71 * LCM = ILCM( NPROW, NPCOL ), LCMQ = LCM / NPCOL,
72 *
73 * IROFFA = MOD( IA-1, NB ),
74 * IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ),
75 * IHIP = NUMROC( IHI+IROFFA, NB, MYROW, IAROW, NPROW ),
76 *
77 * ILROW = INDXG2P( IA+ILO-1, NB, MYROW, RSRC_A, NPROW ),
78 * ILCOL = INDXG2P( JA+ILO-1, NB, MYCOL, CSRC_A, NPCOL ),
79 * IHLP = NUMROC( IHI-ILO+IROFFA+1, NB, MYROW, ILROW, NPROW ),
80 * IHLQ = NUMROC( IHI-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ),
81 * INLQ = NUMROC( N-ILO+IROFFA+1, NB, MYCOL, ILCOL, NPCOL ).
82 *
83 * ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
84 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
85 * the subroutine BLACS_GRIDINFO.
86 *
87 * =====================================================================
88 *
89 * .. Parameters ..
90  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
91  $ LLD_, MB_, M_, NB_, N_, RSRC_
92  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
93  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
94  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
95  REAL ZERO
96  parameter( zero = 0.0e+0 )
97 * ..
98 * .. Local Scalars ..
99  INTEGER I, IACOL, IAROW, ICTXT, IHLP, II, IOFF, IPT,
100  $ IPV, IPW, IV, J, JB, JJ, JL, K, MYCOL, MYROW,
101  $ NB, NPCOL, NPROW
102 * ..
103 * .. Local Arrays ..
104  INTEGER DESCV( DLEN_ )
105 * ..
106 * .. External Functions ..
107  INTEGER INDXG2P, NUMROC
108  EXTERNAL indxg2p, numroc
109 * ..
110 * .. External Subroutines ..
111  EXTERNAL blacs_gridinfo, descset, infog2l, pslarfb,
113 * ..
114 * .. Intrinsic Functions ..
115  INTRINSIC max, min, mod
116 * ..
117 * .. Executable statements ..
118 *
119 * Get grid parameters
120 *
121  ictxt = desca( ctxt_ )
122  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
123 *
124 * Quick return if possible
125 *
126  IF( ihi-ilo.LE.0 )
127  $ RETURN
128 *
129  nb = desca( mb_ )
130  ioff = mod( ia+ilo-2, nb )
131  CALL infog2l( ia+ilo-1, ja+ilo-1, desca, nprow, npcol, myrow,
132  $ mycol, ii, jj, iarow, iacol )
133  ihlp = numroc( ihi-ilo+ioff+1, nb, myrow, iarow, nprow )
134 *
135  ipt = 1
136  ipv = ipt + nb * nb
137  ipw = ipv + ihlp * nb
138  jl = max( ( ( ja+ihi-2 ) / nb ) * nb + 1, ja + ilo - 1 )
139  CALL descset( descv, ihi-ilo+ioff+1, nb, nb, nb, iarow,
140  $ indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ),
141  $ npcol ), ictxt, max( 1, ihlp ) )
142 *
143  DO 10 j = jl, ilo+ja+nb-ioff-1, -nb
144  jb = min( ja+ihi-j-1, nb )
145  i = ia + j - ja
146  k = i - ia + 1
147  iv = k - ilo + ioff + 1
148 *
149 * Compute upper triangular matrix T from TAU.
150 *
151  CALL pslarft( 'Forward', 'Columnwise', ihi-k, jb, a, i+1, j,
152  $ desca, tau, work( ipt ), work( ipw ) )
153 *
154 * Copy Householder vectors into workspace.
155 *
156  CALL pslacpy( 'All', ihi-k, jb, a, i+1, j, desca, work( ipv ),
157  $ iv+1, 1, descv )
158 *
159 * Zero out the strict lower triangular part of A.
160 *
161  CALL pslaset( 'Lower', ihi-k-1, jb, zero, zero, a, i+2, j,
162  $ desca )
163 *
164 * Apply block Householder transformation from Left.
165 *
166  CALL pslarfb( 'Left', 'No transpose', 'Forward', 'Columnwise',
167  $ ihi-k, n-k+1, jb, work( ipv ), iv+1, 1, descv,
168  $ work( ipt ), a, i+1, j, desca, work( ipw ) )
169 *
170 * Apply block Householder transformation from Right.
171 *
172  CALL pslarfb( 'Right', 'Transpose', 'Forward', 'Columnwise',
173  $ ihi, ihi-k, jb, work( ipv ), iv+1, 1, descv,
174  $ work( ipt ), a, ia, j+1, desca, work( ipw ) )
175 *
176  descv( csrc_ ) = mod( descv( csrc_ ) + npcol - 1, npcol )
177 *
178  10 CONTINUE
179 *
180 * Handle the first block separately
181 *
182  iv = ioff + 1
183  i = ia + ilo - 1
184  j = ja + ilo - 1
185  jb = min( nb-ioff, ja+ihi-j-1 )
186 *
187 * Compute upper triangular matrix T from TAU.
188 *
189  CALL pslarft( 'Forward', 'Columnwise', ihi-ilo, jb, a, i+1, j,
190  $ desca, tau, work( ipt ), work( ipw ) )
191 *
192 * Copy Householder vectors into workspace.
193 *
194  CALL pslacpy( 'All', ihi-ilo, jb, a, i+1, j, desca, work( ipv ),
195  $ iv+1, 1, descv )
196 *
197 * Zero out the strict lower triangular part of A.
198 *
199  IF( ihi-ilo.GT.0 )
200  $ CALL pslaset( 'Lower', ihi-ilo-1, jb, zero, zero, a, i+2, j,
201  $ desca )
202 *
203 * Apply block Householder transformation from Left.
204 *
205  CALL pslarfb( 'Left', 'No transpose', 'Forward', 'Columnwise',
206  $ ihi-ilo, n-ilo+1, jb, work( ipv ), iv+1, 1, descv,
207  $ work( ipt ), a, i+1, j, desca, work( ipw ) )
208 *
209 * Apply block Householder transformation from Right.
210 *
211  CALL pslarfb( 'Right', 'Transpose', 'Forward', 'Columnwise', ihi,
212  $ ihi-ilo, jb, work( ipv ), iv+1, 1, descv,
213  $ work( ipt ), a, ia, j+1, desca, work( ipw ) )
214 *
215  RETURN
216 *
217 * End of PSGEHDRV
218 *
219  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pslacpy
subroutine pslacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pslacpy.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
psgehdrv
subroutine psgehdrv(N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK)
Definition: psgehdrv.f:2
pslarfb
subroutine pslarfb(SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, JV, DESCV, T, C, IC, JC, DESCC, WORK)
Definition: pslarfb.f:3
pslarft
subroutine pslarft(DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, T, WORK)
Definition: pslarft.f:3
pslaset
subroutine pslaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: psblastst.f:6863
min
#define min(A, B)
Definition: pcgemr.c:181