2
3
4
5
6
7
8
9 INTEGER IA, JA, M, N
10
11
12 INTEGER DESCA( * ), IPIV( * )
13 REAL A( * ), WORK( * )
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
145 $ LLD_, MB_, M_, NB_, N_, RSRC_
146 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
147 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
148 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
149 REAL ONE, ZERO
150 parameter( one = 1.0e+0, zero = 0.0e+0 )
151
152
153 CHARACTER COLBTOP, ROWBTOP
154 INTEGER IACOL, IAROW, ICTXT, IL, IPL, IPU, IROFF, J,
155 $ JB, JL, JN, MN, MP, MYCOL, MYROW, NPCOL, NPROW
156
157 INTEGER DESCIP( DLEN_ ), DESCL( DLEN_ ),
158 $ DESCU( DLEN_ ), IDUM( 1 )
159
160
163
164
165 INTEGER ICEIL, INDXG2P, NUMROC
167
168
170
171
172
173
174
175 ictxt = desca( ctxt_ )
176 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
177
178 iroff = mod( ia-1, desca( mb_ ) )
179 iarow =
indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
180 mp =
numroc( m+iroff, desca( mb_ ), myrow, iarow, nprow )
181 ipl = 1
182 ipu = ipl + mp * desca( nb_ )
183 CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )
184 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
185 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', 'S-ring' )
186 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', ' ' )
187
188
189
191 il =
max( ( ( ia+mn-2 ) / desca( mb_ ) ) * desca( mb_ ) + 1, ia )
192 jl =
max( ( ( ja+mn-2 ) / desca( nb_ ) ) * desca( nb_ ) + 1, ja )
193 jn =
min(
iceil( ja, desca( nb_ ) )*desca( nb_ ), ja+mn-1 )
194 iarow =
indxg2p( il, desca( mb_ ), myrow, desca( rsrc_ ), nprow )
195 iacol =
indxg2p( jl, desca( nb_ ), mycol, desca( csrc_ ), npcol )
196
197 CALL descset( descl, ia+m-il, desca( nb_ ), desca( mb_ ),
198 $ desca( nb_ ), iarow, iacol, ictxt,
max( 1, mp ) )
199
200 CALL descset( descu, desca( mb_ ), ja+n-jl, desca( mb_ ),
201 $ desca( nb_ ), iarow, iacol, ictxt, desca( mb_ ) )
202
203 CALL descset( descip, desca( m_ ) + desca( mb_ )*nprow, 1,
204 $ desca( mb_ ), 1, desca( rsrc_ ), mycol, ictxt,
205 $
numroc( desca( m_ ), desca( mb_ ), myrow,
206 $ desca( rsrc_ ), nprow ) + desca( mb_ ) )
207
208
209 DO 10 j = jl, jn+1, -desca( nb_ )
210
211 jb =
min( ja+mn-j, desca( nb_ ) )
212
213
214
215 CALL pslacpy(
'Lower', m-il+ia, jb, a, il, j, desca,
216 $ work( ipl ), 1, 1, descl )
217 CALL pslaset(
'Upper', m-il+ia, jb, zero, one, work( ipl ),
218 $ 1, 1, descl )
219
220
221
222 CALL pslacpy(
'Upper', jb, ja+n-j, a, il, j, desca,
223 $ work( ipu ), 1, 1, descu )
224 CALL pslaset(
'Lower', jb-1, ja+n-j, zero, zero,
225 $ work( ipu ), 2, 1, descu )
226
227
228
229 CALL pslaset(
'Lower', ia+m-il-1, jb, zero, zero, a, il+1, j,
230 $ desca )
231
232
233
234 CALL pslaset(
'Upper', jb, ja+n-j, zero, zero, a, il, j,
235 $ desca )
236
237
238
239 CALL psgemm( 'No transpose', 'No transpose', ia+m-il,
240 $ ja+n-j, jb, one, work( ipl ), 1, 1, descl,
241 $ work( ipu ), 1, 1, descu, one, a, il, j, desca )
242
243 il = il - desca( mb_ )
244 descl( m_ ) = descl( m_ ) + descl( mb_ )
245 descl( rsrc_ ) = mod( descl( rsrc_ ) + nprow - 1, nprow )
246 descl( csrc_ ) = mod( descl( csrc_ ) + npcol - 1, npcol )
247 descu( n_ ) = descu( n_ ) + descu( nb_ )
248 descu( rsrc_ ) = descl( rsrc_ )
249 descu( csrc_ ) = descl( csrc_ )
250
251 10 CONTINUE
252
253
254
255 jb =
min( jn-ja+1, desca( nb_ ) )
256
257
258
259 CALL pslacpy(
'Lower', m, jb, a, ia, ja, desca, work( ipl ),
260 $ 1, 1, descl )
261 CALL pslaset(
'Upper', m, jb, zero, one, work( ipl ), 1, 1,
262 $ descl )
263
264
265
266 CALL pslacpy(
'Upper', jb, n, a, ia, ja, desca, work( ipu ), 1,
267 $ 1, descu )
268 CALL pslaset(
'Lower', jb-1, n, zero, zero, work( ipu ), 2, 1,
269 $ descu )
270
271
272
273 CALL pslaset(
'Lower', m-1, jb, zero, zero, a, ia+1, ja, desca )
274
275
276
277 CALL pslaset(
'Upper', jb, n, zero, zero, a, ia, ja, desca )
278
279
280
281 CALL psgemm( 'No transpose', 'No transpose', m, n, jb, one,
282 $ work( ipl ), 1, 1, descl, work( ipu ), 1, 1,
283 $ descu, one, a, ia, ja, desca )
284
285
286
287 CALL pslapiv(
'Backward',
'Row',
'Col',
min( m, n ), n, a, ia, ja,
288 $ desca, ipiv, ia, 1, descip, idum )
289
290 CALL pb_topset( ictxt, 'Broadcast', 'Rowwise', rowbtop )
291 CALL pb_topset( ictxt, 'Broadcast', 'Columnwise', colbtop )
292
293 RETURN
294
295
296
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
integer function iceil(inum, idenom)
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pslaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
subroutine pslapiv(direc, rowcol, pivroc, m, n, a, ia, ja, desca, ipiv, ip, jp, descip, iwork)