ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclauu2.f
Go to the documentation of this file.
1  SUBROUTINE pclauu2( UPLO, N, A, IA, JA, DESCA )
2 *
3 * -- ScaLAPACK auxiliary routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * .. Scalar Arguments ..
9  CHARACTER UPLO
10  INTEGER IA, JA, N
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  COMPLEX A( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PCLAUU2 computes the product U * U' or L' * L, where the triangular
21 * factor U or L is stored in the upper or lower triangular part of
22 * the matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
23 *
24 * If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
25 * overwriting the factor U in sub( A ).
26 * If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
27 * overwriting the factor L in sub( A ).
28 *
29 * This is the unblocked form of the algorithm, calling Level 2 BLAS.
30 * No communication is performed by this routine, the matrix to operate
31 * on should be strictly local to one process.
32 *
33 * Notes
34 * =====
35 *
36 * Each global data object is described by an associated description
37 * vector. This vector stores the information required to establish
38 * the mapping between an object element and its corresponding process
39 * and memory location.
40 *
41 * Let A be a generic term for any 2D block cyclicly distributed array.
42 * Such a global array has an associated description vector DESCA.
43 * In the following comments, the character _ should be read as
44 * "of the global array".
45 *
46 * NOTATION STORED IN EXPLANATION
47 * --------------- -------------- --------------------------------------
48 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49 * DTYPE_A = 1.
50 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51 * the BLACS process grid A is distribu-
52 * ted over. The context itself is glo-
53 * bal, but the handle (the integer
54 * value) may vary.
55 * M_A (global) DESCA( M_ ) The number of rows in the global
56 * array A.
57 * N_A (global) DESCA( N_ ) The number of columns in the global
58 * array A.
59 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60 * the rows of the array.
61 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62 * the columns of the array.
63 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64 * row of the array A is distributed.
65 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66 * first column of the array A is
67 * distributed.
68 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69 * array. LLD_A >= MAX(1,LOCr(M_A)).
70 *
71 * Let K be the number of rows or columns of a distributed matrix,
72 * and assume that its process grid has dimension p x q.
73 * LOCr( K ) denotes the number of elements of K that a process
74 * would receive if K were distributed over the p processes of its
75 * process column.
76 * Similarly, LOCc( K ) denotes the number of elements of K that a
77 * process would receive if K were distributed over the q processes of
78 * its process row.
79 * The values of LOCr() and LOCc() may be determined via a call to the
80 * ScaLAPACK tool function, NUMROC:
81 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83 * An upper bound for these quantities may be computed by:
84 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86 *
87 * Arguments
88 * =========
89 *
90 * UPLO (global input) CHARACTER*1
91 * Specifies whether the triangular factor stored in the matrix
92 * sub( A ) is upper or lower triangular:
93 * = 'U': Upper triangular,
94 * = 'L': Lower triangular.
95 *
96 * N (global input) INTEGER
97 * The number of rows and columns to be operated on, i.e. the
98 * order of the order of the triangular factor U or L. N >= 0.
99 *
100 * A (local input/local output) COMPLEX pointer into the
101 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
102 * On entry, the local pieces of the triangular factor L or U.
103 * On exit, if UPLO = 'U', the upper triangle of the distributed
104 * matrix sub( A ) is overwritten with the upper triangle of the
105 * product U * U'; if UPLO = 'L', the lower triangle of sub( A )
106 * is overwritten with the lower triangle of the product L' * L.
107 *
108 * IA (global input) INTEGER
109 * The row index in the global array A indicating the first
110 * row of sub( A ).
111 *
112 * JA (global input) INTEGER
113 * The column index in the global array A indicating the
114 * first column of sub( A ).
115 *
116 * DESCA (global and local input) INTEGER array of dimension DLEN_.
117 * The array descriptor for the distributed matrix A.
118 *
119 * =====================================================================
120 *
121 * .. Parameters ..
122  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
123  $ LLD_, MB_, M_, NB_, N_, RSRC_
124  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
125  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
126  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
127  COMPLEX ONE
128  parameter( one = ( 1.0e+0, 0.0e+0 ) )
129 * ..
130 * .. Local Scalars ..
131  INTEGER IACOL, IAROW, ICURR, IDIAG, IIA, IOFFA, JJA,
132  $ LDA, MYCOL, MYROW, NA, NPCOL, NPROW
133  REAL AII
134 * ..
135 * .. External Subroutines ..
136  EXTERNAL blacs_gridinfo, cgemv, clacgv,
137  $ csscal, infog2l
138 * ..
139 * .. External Functions ..
140  LOGICAL LSAME
141  COMPLEX CDOTC
142  EXTERNAL cdotc, lsame
143 * ..
144 * .. Intrinsic Functions ..
145  INTRINSIC cmplx, real
146 * ..
147 * .. Executable Statements ..
148 *
149 * Quick return if possible
150 *
151  IF( n.EQ.0 )
152  $ RETURN
153 *
154 * Get grid parameters and compute local indexes
155 *
156  CALL blacs_gridinfo( desca( ctxt_ ), nprow, npcol, myrow, mycol )
157  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
158  $ iarow, iacol )
159 *
160  IF( myrow.EQ.iarow .AND. mycol.EQ.iacol ) THEN
161 *
162  lda = desca( lld_ )
163  idiag = iia + ( jja - 1 ) * lda
164  ioffa = idiag
165 *
166  IF( lsame( uplo, 'U' ) ) THEN
167 *
168 * Compute the product U * U'.
169 *
170  DO 10 na = n-1, 1, -1
171  aii = a( idiag )
172  icurr = idiag + lda
173  a( idiag ) = aii*aii + real( cdotc( na, a( icurr ), lda,
174  $ a( icurr ), lda ) )
175  CALL clacgv( na, a( icurr ), lda )
176  CALL cgemv( 'No transpose', n-na-1, na, one,
177  $ a( ioffa+lda ), lda, a( icurr ), lda,
178  $ cmplx( aii ), a( ioffa ), 1 )
179  CALL clacgv( na, a( icurr ), lda )
180  idiag = idiag + lda + 1
181  ioffa = ioffa + lda
182  10 CONTINUE
183  aii = a( idiag )
184  CALL csscal( n, aii, a( ioffa ), 1 )
185 *
186  ELSE
187 *
188 * Compute the product L' * L.
189 *
190  DO 20 na = 1, n-1
191  aii = a( idiag )
192  icurr = idiag + 1
193  a(idiag) = aii*aii + real( cdotc( n-na, a( icurr ), 1,
194  $ a( icurr ), 1 ) )
195  CALL clacgv( na-1, a( ioffa ), lda )
196  CALL cgemv( 'Conjugate transpose', n-na, na-1, one,
197  $ a( ioffa+1 ), lda, a( icurr ), 1,
198  $ cmplx( aii ), a( ioffa ), lda )
199  CALL clacgv( na-1, a( ioffa ), lda )
200  idiag = idiag + lda + 1
201  ioffa = ioffa + 1
202  20 CONTINUE
203  aii = a( idiag )
204  CALL csscal( n, aii, a( ioffa ), lda )
205 *
206  END IF
207 *
208  END IF
209 *
210  RETURN
211 *
212 * End of PCLAUU2
213 *
214  END
cmplx
float cmplx[2]
Definition: pblas.h:132
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pclauu2
subroutine pclauu2(UPLO, N, A, IA, JA, DESCA)
Definition: pclauu2.f:2