ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlatra.f
Go to the documentation of this file.
1  DOUBLE PRECISION FUNCTION pdlatra( N, A, IA, JA, DESCA )
2 *
3 * -- ScaLAPACK auxiliary routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * .. Scalar Arguments ..
9  INTEGER ia, ja, n
10 * ..
11 * .. Array Arguments ..
12  INTEGER desca( * )
13  DOUBLE PRECISION a( * )
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * PDLATRA computes the trace of an N-by-N distributed matrix sub( A )
20 * denoting A( IA:IA+N-1, JA:JA+N-1 ). The result is left on every
21 * process of the grid.
22 *
23 * Notes
24 * =====
25 *
26 * Each global data object is described by an associated description
27 * vector. This vector stores the information required to establish
28 * the mapping between an object element and its corresponding process
29 * and memory location.
30 *
31 * Let A be a generic term for any 2D block cyclicly distributed array.
32 * Such a global array has an associated description vector DESCA.
33 * In the following comments, the character _ should be read as
34 * "of the global array".
35 *
36 * NOTATION STORED IN EXPLANATION
37 * --------------- -------------- --------------------------------------
38 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
39 * DTYPE_A = 1.
40 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41 * the BLACS process grid A is distribu-
42 * ted over. The context itself is glo-
43 * bal, but the handle (the integer
44 * value) may vary.
45 * M_A (global) DESCA( M_ ) The number of rows in the global
46 * array A.
47 * N_A (global) DESCA( N_ ) The number of columns in the global
48 * array A.
49 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
50 * the rows of the array.
51 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
52 * the columns of the array.
53 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54 * row of the array A is distributed.
55 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56 * first column of the array A is
57 * distributed.
58 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
59 * array. LLD_A >= MAX(1,LOCr(M_A)).
60 *
61 * Let K be the number of rows or columns of a distributed matrix,
62 * and assume that its process grid has dimension p x q.
63 * LOCr( K ) denotes the number of elements of K that a process
64 * would receive if K were distributed over the p processes of its
65 * process column.
66 * Similarly, LOCc( K ) denotes the number of elements of K that a
67 * process would receive if K were distributed over the q processes of
68 * its process row.
69 * The values of LOCr() and LOCc() may be determined via a call to the
70 * ScaLAPACK tool function, NUMROC:
71 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73 * An upper bound for these quantities may be computed by:
74 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76 *
77 * Arguments
78 * =========
79 *
80 * N (global input) INTEGER
81 * The number of rows and columns to be operated on i.e the
82 * order of the distributed submatrix sub( A ). N >= 0.
83 *
84 * A (local input) DOUBLE PRECISION pointer into the local memory
85 * to an array of dimension ( LLD_A, LOCc(JA+N-1) ). This array
86 * contains the local pieces of the distributed matrix the trace
87 * is to be computed.
88 *
89 * IA (global input) INTEGER
90 * The row index in the global array A indicating the first
91 * row of sub( A ).
92 *
93 * JA (global input) INTEGER
94 * The column index in the global array A indicating the
95 * first column of sub( A ).
96 *
97 * DESCA (global and local input) INTEGER array of dimension DLEN_.
98 * The array descriptor for the distributed matrix A.
99 *
100 * ====================================================================
101 *
102 * .. Parameters ..
103  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
104  $ lld_, mb_, m_, nb_, n_, rsrc_
105  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
106  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
107  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
108  DOUBLE PRECISION zero
109  parameter( zero = 0.0d+0 )
110 * ..
111 * .. Local Scalars ..
112  INTEGER icurcol, icurrow, ii, ioffa, j, jb, jj, jn,
113  $ lda, ll, mycol, myrow, npcol, nprow
114  DOUBLE PRECISION trace
115 * ..
116 * .. External Subroutines ..
117  EXTERNAL blacs_gridinfo, dgsum2d, infog2l
118 * ..
119 * .. External Functions ..
120  INTEGER iceil
121  EXTERNAL iceil
122 * ..
123 * .. Intrinsic Functions ..
124  INTRINSIC min, mod
125 * ..
126 * .. Executable Statements ..
127 *
128 * Get grid parameters
129 *
130  CALL blacs_gridinfo( desca( ctxt_ ), nprow, npcol, myrow, mycol )
131 *
132  trace = zero
133  IF( n.EQ.0 ) THEN
134  pdlatra = trace
135  RETURN
136  END IF
137 *
138  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, ii, jj,
139  $ icurrow, icurcol )
140 *
141  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
142  jb = jn-ja+1
143  lda = desca( lld_ )
144  ioffa = ii + ( jj - 1 ) * lda
145 *
146 * Handle first diagonal block separately
147 *
148  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
149  DO 10 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
150  trace = trace + a( ll )
151  10 CONTINUE
152  END IF
153  IF( myrow.EQ.icurrow )
154  $ ioffa = ioffa + jb
155  IF( mycol.EQ.icurcol )
156  $ ioffa = ioffa + jb*lda
157  icurrow = mod( icurrow+1, nprow )
158  icurcol = mod( icurcol+1, npcol )
159 *
160 * Loop over the remaining block of columns
161 *
162  DO 30 j = jn+1, ja+n-1, desca( nb_ )
163  jb = min( ja+n-j, desca( nb_ ) )
164 *
165  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
166  DO 20 ll = ioffa, ioffa + (jb-1)*(lda+1), lda+1
167  trace = trace + a( ll )
168  20 CONTINUE
169  END IF
170  IF( myrow.EQ.icurrow )
171  $ ioffa = ioffa + jb
172  IF( mycol.EQ.icurcol )
173  $ ioffa = ioffa + jb*lda
174  icurrow = mod( icurrow+1, nprow )
175  icurcol = mod( icurcol+1, npcol )
176  30 CONTINUE
177 *
178  CALL dgsum2d( desca( ctxt_ ), 'All', ' ', 1, 1, trace, 1, -1,
179  $ mycol )
180 *
181  pdlatra = trace
182 *
183  RETURN
184 *
185 * End of PDLATRA
186 *
187  END
pdlatra
double precision function pdlatra(N, A, IA, JA, DESCA)
Definition: pdlatra.f:2
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2