2
3
4
5
6
7
8
9 INTEGER IA, JA, M, N
10
11
12 INTEGER DESCA( * ), IPIV( * )
13 DOUBLE PRECISION A( * ), WORK( * )
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
145 $ LLD_, MB_, M_, NB_, N_, RSRC_
146 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
147 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
148 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
149 DOUBLE PRECISION ONE, ZERO
150 parameter( one = 1.0d+0, zero = 0.0d+0 )
151
152
153 CHARACTER COLBTOP, ROWBTOP
154 INTEGER IACOL, IAROW, ICTXT, IL, IPL, IPU, IROFF, J,
155 $ JB, JL, JN, MN, MP, MYCOL, MYROW, NPCOL, NPROW
156
157 INTEGER DESCIP( DLEN_ ), DESCL( DLEN_ ),
158 $ DESCU( DLEN_ ), IDUM( 1 )
159
160
163
164
165 INTEGER ICEIL, INDXG2P, NUMROC
167
168
170
171
172
173
174
175 ictxt = desca( ctxt_ )
176 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
177
178 iroff = mod( ia-1, desca( mb_ ) )
179 iarow =
indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
180 mp =
numroc( m+iroff, desca( mb_ ), myrow, iarow, nprow )
181 ipl = 1
182 ipu = ipl + mp * desca( nb_ )
183 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
184 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
185 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', 'S-ring' )
186 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', ' ' )
187
188
189
191 il =
max( ( ( ia+mn-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
192 jl =
max( ( ( ja+mn-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
193 jn =
min(
iceil( ja, desca( nb_ ) )*desca( nb_ ), ja+mn-1 )
194 iarow =
indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
195 iacol =
indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
196
197 CALL descset( descl, ia+m-il, desca( nb_ ), desca( mb_ ),
198 $ desca( nb_ ), iarow, iacol, ictxt,
max( 1, mp ) )
199
200 CALL descset( descu, desca( mb_ ), ja+n-jl, desca( mb_ ),
201 $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
202
203 CALL descset( descip, desca( m_ ) + desca( mb_ )*nprow, 1,
204 $ desca( mb_ ), 1, desca( rsrc_ ), mycol, ictxt,
205 $
numroc( desca( m_ ), desca( mb_ ), myrow,
206 $ desca( rsrc_ ), nprow ) + desca( mb_ ) )
207
208
209 DO 10 j = jl, jn+1, -desca( nb_ )
210
211 jb =
min( ja+mn-j, desca( nb_ ) )
212
213
214
215 CALL pdlacpy(
'Lower', m-il+ia, jb, a, il, j, desca,
216 $ work( ipl ), 1, 1, descl )
217 CALL pdlaset(
'Upper', m-il+ia, jb, zero, one, work( ipl ),
218 $ 1, 1, descl )
219
220
221
222 CALL pdlacpy(
'Upper', jb, ja+n-j, a, il, j, desca,
223 $ work( ipu ), 1, 1, descu )
224 CALL pdlaset(
'Lower', jb-1, ja+n-j, zero, zero,
225 $ work( ipu ), 2, 1, descu )
226
227
228
229 CALL pdlaset(
'Lower', ia+m-il-1, jb, zero, zero, a, il+1, j,
230 $ desca )
231
232
233
234 CALL pdlaset(
'Upper', jb, ja+n-j, zero, zero, a, il, j,
235 $ desca )
236
237
238
239 CALL pdgemm( 'No transpose', 'No transpose', ia+m-il,
240 $ ja+n-j, jb, one, work( ipl ), 1, 1, descl,
241 $ work( ipu ), 1, 1, descu, one, a, il, j, desca )
242
243 il = il - desca( mb_ )
244 descl( m_ ) = descl( m_ ) + descl( mb_ )
245 descl( rsrc_ ) = mod( descl( rsrc_ ) + nprow - 1, nprow )
246 descl( csrc_ ) = mod( descl( csrc_ ) + npcol - 1, npcol )
247 descu( n_ ) = descu( n_ ) + descu( nb_ )
248 descu( rsrc_ ) = descl( rsrc_ )
249 descu( csrc_ ) = descl( csrc_ )
250
251 10 CONTINUE
252
253
254
255 jb =
min( jn-ja+1, desca( nb_ ) )
256
257
258
259 CALL pdlacpy(
'Lower', m, jb, a, ia, ja, desca, work( ipl ),
260 $ 1, 1, descl )
261 CALL pdlaset(
'Upper', m, jb, zero, one, work( ipl ), 1, 1,
262 $ descl )
263
264
265
266 CALL pdlacpy(
'Upper', jb, n, a, ia, ja, desca, work( ipu ), 1,
267 $ 1, descu )
268 CALL pdlaset(
'Lower', jb-1, n, zero, zero, work( ipu ), 2, 1,
269 $ descu )
270
271
272
273 CALL pdlaset(
'Lower', m-1, jb, zero, zero, a, ia+1, ja, desca )
274
275
276
277 CALL pdlaset(
'Upper', jb, n, zero, zero, a, ia, ja, desca )
278
279
280
281 CALL pdgemm( 'No transpose', 'No transpose', m, n, jb, one,
282 $ work( ipl ), 1, 1, descl, work( ipu ), 1, 1,
283 $ descu, one, a, ia, ja, desca )
284
285
286
287 CALL pdlapiv(
'Backward',
'Row',
'Col',
min( m, n ), n, a, ia, ja,
288 $ desca, ipiv, ia, 1, descip, idum )
289
290 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', rowbtop )
291 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', colbtop )
292
293 RETURN
294
295
296
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
integer function iceil(inum, idenom)
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pdlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
subroutine pdlapiv(direc, rowcol, pivroc, m, n, a, ia, ja, desca, ipiv, ip, jp, descip, iwork)