3
4
5
6
7
8
9
10 CHARACTER DIRECT, SIDE, STOREV, TRANS
11 INTEGER IC, IV, JC, JV, K, L, M, N
12
13
14 INTEGER DESCC( * ), DESCV( * )
15 REAL C( * ), T( * ), V( * ), WORK( * )
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
223 $ LLD_, MB_, M_, NB_, N_, RSRC_
224 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
225 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
226 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
227 REAL ONE, ZERO
228 parameter( one = 1.0e+0, zero = 0.0e+0 )
229
230
231 LOGICAL LEFT
232 CHARACTER COLBTOP, TRANST
233 INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
234 $ ICROW1, ICROW2, ICTXT, IIBEG, IIC1, IIC2,
235 $ IIEND, IINXT, IIV, ILEFT, INFO, IOFFC2, IOFFV,
236 $ IPT, IPV, IPW, IROFFC1, IROFFC2, ITOP, IVCOL,
237 $ IVROW, JJBEG, JJEND, JJNXT, JJC1, JJC2, JJV,
238 $ LDC, LDV, LV, LW, MBC, MBV, MPC1, MPC2, MPC20,
239 $ MQV, MQV0, MYCOL, MYDIST, MYROW, NBC, NBV,
240 $ NPCOL, NPROW, NQC1, NQC2, NQCALL, NQV
241
242
243 EXTERNAL blacs_abort, blacs_gridinfo,
infog2l,
245 $ sgebr2d, sgebs2d, sgemm,
246 $ sgsum2d, slamov, slaset, strbr2d,
247 $ strbs2d, strmm
248
249
251
252
253 LOGICAL LSAME
254 INTEGER ICEIL, NUMROC
256
257
258
259
260
261 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
262 $ RETURN
263
264
265
266 ictxt = descc( ctxt_ )
267 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
268
269
270
271 info = 0
272 IF( .NOT.
lsame( direct,
'B' ) )
THEN
273 info = -3
274 ELSE IF( .NOT.
lsame( storev,
'R' ) )
THEN
275 info = -4
276 END IF
277 IF( info.NE.0 ) THEN
278 CALL pxerbla( ictxt,
'PSLARZB', -info )
279 CALL blacs_abort( ictxt, 1 )
280 RETURN
281 END IF
282
283 left =
lsame( side,
'L' )
284 IF(
lsame( trans,
'N' ) )
THEN
285 transt = 'T'
286 ELSE
287 transt = 'N'
288 END IF
289
290 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
291 $ ivrow, ivcol )
292 mbv = descv( mb_ )
293 nbv = descv( nb_ )
294 icoffv = mod( jv-1, nbv )
295 nqv =
numroc( l+icoffv, nbv, mycol, ivcol, npcol )
296 IF( mycol.EQ.ivcol )
297 $ nqv = nqv - icoffv
298 ldv = descv( lld_ )
299 iiv =
min( iiv, ldv )
300 jjv =
min( jjv,
max( 1,
numroc( descv( n_ ), nbv, mycol,
301 $ descv( csrc_ ), npcol ) ) )
302 ioffv = iiv + ( jjv-1 ) * ldv
303 mbc = descc( mb_ )
304 nbc = descc( nb_ )
305 nqcall =
numroc( descc( n_ ), nbc, mycol, descc( csrc_ ), npcol )
306 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic1,
307 $ jjc1, icrow1, iccol1 )
308 ldc = descc( lld_ )
309 iic1 =
min( iic1, ldc )
310 jjc1 =
min( jjc1,
max( 1, nqcall ) )
311
312 IF( left ) THEN
313 iroffc1 = mod( ic-1, mbc )
314 mpc1 =
numroc( k+iroffc1, mbc, myrow, icrow1, nprow )
315 IF( myrow.EQ.icrow1 )
316 $ mpc1 = mpc1 - iroffc1
317 icoffc1 = mod( jc-1, nbc )
318 nqc1 =
numroc( n+icoffc1, nbc, mycol, iccol1, npcol )
319 IF( mycol.EQ.iccol1 )
320 $ nqc1 = nqc1 - icoffc1
321 CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
322 $ iic2, jjc2, icrow2, iccol2 )
323 iroffc2 = mod( ic+m-l-1, mbc )
324 mpc2 =
numroc( l+iroffc2, mbc, myrow, icrow2, nprow )
325 IF( myrow.EQ.icrow2 )
326 $ mpc2 = mpc2 - iroffc2
327 icoffc2 = icoffc1
328 nqc2 = nqc1
329 ELSE
330 iroffc1 = mod( ic-1, mbc )
331 mpc1 =
numroc( m+iroffc1, mbc, myrow, icrow1, nprow )
332 IF( myrow.EQ.icrow1 )
333 $ mpc1 = mpc1 - iroffc1
334 icoffc1 = mod( jc-1, nbc )
335 nqc1 =
numroc( k+icoffc1, nbc, mycol, iccol1, npcol )
336 IF( mycol.EQ.iccol1 )
337 $ nqc1 = nqc1 - icoffc1
338 CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
339 $ iic2, jjc2, icrow2, iccol2 )
340 iroffc2 = iroffc1
341 mpc2 = mpc1
342 icoffc2 = mod( jc+n-l-1, nbc )
343 nqc2 =
numroc( l+icoffc2, nbc, mycol, iccol2, npcol )
344 IF( mycol.EQ.iccol2 )
345 $ nqc2 = nqc2 - icoffc2
346 END IF
347 iic2 =
min( iic2, ldc )
348 jjc2 =
min( jjc2, nqcall )
349 ioffc2 = iic2 + ( jjc2-1 ) * ldc
350
351 IF(
lsame( side,
'L' ) )
THEN
352
353
354
355
356
357
358 mqv0 =
numroc( m+icoffv, nbv, mycol, ivcol, npcol )
359 IF( mycol.EQ.ivcol ) THEN
360 mqv = mqv0 - icoffv
361 ELSE
362 mqv = mqv0
363 END IF
364 IF( myrow.EQ.icrow2 ) THEN
365 mpc20 = mpc2 + iroffc2
366 ELSE
367 mpc20 = mpc2
368 END IF
369
370
371
372
373
374
375 ipv = 1
376 ipw = ipv + mpc20 * k
377 ipt = ipw + k * mqv0
380
381 IF( myrow.EQ.ivrow ) THEN
382 IF( mycol.EQ.ivcol ) THEN
383 CALL slamov( 'All', k, mqv, v( ioffv ), ldv,
384 $ work( ipw+icoffv*lw ), lw )
385 ELSE
386 CALL slamov( 'All', k, mqv, v( ioffv ), ldv,
387 $ work( ipw ), lw )
388 END IF
389 END IF
390
391
392
393 CALL pbstran( ictxt,
'Rowwise',
'Transpose', k, m+icoffv,
394 $ descv( nb_ ), work( ipw ), lw, zero,
395 $ work( ipv ), lv, ivrow, ivcol, icrow2, -1,
396 $ work( ipt ) )
397
398
399
400 IF( myrow.EQ.icrow2 )
401 $ ipv = ipv + iroffc2
402
403
404
405
407
408 IF( mpc2.GT.0 ) THEN
409 CALL sgemm( 'Transpose', 'No transpose', nqc2, k, mpc2,
410 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
411 $ work( ipw ), lw )
412 ELSE
413 CALL slaset( 'All', nqc2, k, zero, zero, work( ipw ), lw )
414 END IF
415
416
417
418 IF( mpc1.GT.0 ) THEN
419 mydist = mod( myrow-icrow1+nprow, nprow )
420 itop =
max( 0, mydist * mbc - iroffc1 )
421 iibeg = iic1
422 iiend = iic1 + mpc1 - 1
423 iinxt =
min(
iceil( iibeg, mbc ) * mbc, iiend )
424
425 10 CONTINUE
426 IF( iibeg.LE.iinxt ) THEN
427 CALL pbsmatadd( ictxt,
'Transpose', nqc2, iinxt-iibeg+1,
428 $ one, c( iibeg+(jjc1-1)*ldc ), ldc, one,
429 $ work( ipw+itop ), lw )
430 mydist = mydist + nprow
431 itop = mydist * mbc - iroffc1
432 iibeg = iinxt +1
433 iinxt =
min( iinxt+mbc, iiend )
434 GO TO 10
435 END IF
436 END IF
437
438 CALL sgsum2d( ictxt, 'Columnwise', ' ', nqc2, k, work( ipw ),
439 $ lw, ivrow, mycol )
440
441
442
443 IF( myrow.EQ.ivrow ) THEN
444 IF( mycol.EQ.ivcol ) THEN
445
446
447
448 CALL strbs2d( ictxt, 'Rowwise', ' ', 'Lower', 'Non unit',
449 $ k, k, t, mbv )
450 ELSE
451 CALL strbr2d( ictxt, 'Rowwise', ' ', 'Lower', 'Non unit',
452 $ k, k, t, mbv, myrow, ivcol )
453 END IF
454 CALL strmm( 'Right', 'Lower', transt, 'Non unit', nqc2, k,
455 $ one, t, mbv, work( ipw ), lw )
456
457 CALL sgebs2d( ictxt, 'Columnwise', ' ', nqc2, k,
458 $ work( ipw ), lw )
459 ELSE
460 CALL sgebr2d( ictxt, 'Columnwise', ' ', nqc2, k,
461 $ work( ipw ), lw, ivrow, mycol )
462 END IF
463
464
465
466 IF( mpc1.GT.0 ) THEN
467 mydist = mod( myrow-icrow1+nprow, nprow )
468 itop =
max( 0, mydist * mbc - iroffc1 )
469 iibeg = iic1
470 iiend = iic1 + mpc1 - 1
471 iinxt =
min(
iceil( iibeg, mbc ) * mbc, iiend )
472
473 20 CONTINUE
474 IF( iibeg.LE.iinxt ) THEN
475 CALL pbsmatadd( ictxt,
'Transpose', iinxt-iibeg+1, nqc2,
476 $ -one, work( ipw+itop ), lw, one,
477 $ c( iibeg+(jjc1-1)*ldc ), ldc )
478 mydist = mydist + nprow
479 itop = mydist * mbc - iroffc1
480 iibeg = iinxt +1
481 iinxt =
min( iinxt+mbc, iiend )
482 GO TO 20
483 END IF
484 END IF
485
486
487
488
489
490 CALL sgemm( 'No transpose', 'Transpose', mpc2, nqc2, k, -one,
491 $ work( ipv ), lv, work( ipw ), lw, one,
492 $ c( ioffc2 ), ldc )
493
494 ELSE
495
496
497
498
499
500
501
502 ipv = 1
503 ipw = ipv + k * nqc2
506
507
508
509 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
510 IF( myrow.EQ.ivrow ) THEN
511 CALL sgebs2d( ictxt, 'Columnwise', colbtop, k, nqc2,
512 $ v( ioffv ), ldv )
513 IF( mycol.EQ.ivcol )
514 $ CALL strbs2d( ictxt, 'Columnwise', colbtop, 'Lower',
515 $ 'Non unit', k, k, t, mbv )
516 CALL slamov( 'All', k, nqc2, v( ioffv ), ldv, work( ipv ),
517 $ lv )
518 ELSE
519 CALL sgebr2d( ictxt, 'Columnwise', colbtop, k, nqc2,
520 $ work( ipv ), lv, ivrow, mycol )
521 IF( mycol.EQ.ivcol )
522 $ CALL strbr2d( ictxt, 'Columnwise', colbtop, 'Lower',
523 $ 'Non unit', k, k, t, mbv, ivrow, mycol )
524 END IF
525
526
527
528
529 IF( nqc2.GT.0 ) THEN
530 CALL sgemm( 'No Transpose', 'Transpose', mpc2, k, nqc2,
531 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
532 $ work( ipw ), lw )
533 ELSE
534 CALL slaset( 'All', mpc2, k, zero, zero, work( ipw ), lw )
535 END IF
536
537
538
539 IF( nqc1.GT.0 ) THEN
540 mydist = mod( mycol-iccol1+npcol, npcol )
541 ileft =
max( 0, mydist * nbc - icoffc1 )
542 jjbeg = jjc1
543 jjend = jjc1 + nqc1 - 1
544 jjnxt =
min(
iceil( jjbeg, nbc ) * nbc, jjend )
545
546 30 CONTINUE
547 IF( jjbeg.LE.jjnxt ) THEN
548 CALL pbsmatadd( ictxt,
'No transpose', mpc2,
549 $ jjnxt-jjbeg+1, one,
550 $ c( iic1+(jjbeg-1)*ldc ), ldc, one,
551 $ work( ipw+ileft*lw ), lw )
552 mydist = mydist + npcol
553 ileft = mydist * nbc - icoffc1
554 jjbeg = jjnxt +1
555 jjnxt =
min( jjnxt+nbc, jjend )
556 GO TO 30
557 END IF
558 END IF
559
560 CALL sgsum2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
561 $ lw, myrow, ivcol )
562
563
564
565 IF( mycol.EQ.ivcol ) THEN
566 CALL strmm( 'Right', 'Lower', trans, 'Non unit', mpc2, k,
567 $ one, t, mbv, work( ipw ), lw )
568 CALL sgebs2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
569 $ lw )
570 ELSE
571 CALL sgebr2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
572 $ lw, myrow, ivcol )
573 END IF
574
575
576
577 IF( nqc1.GT.0 ) THEN
578 mydist = mod( mycol-iccol1+npcol, npcol )
579 ileft =
max( 0, mydist * nbc - icoffc1 )
580 jjbeg = jjc1
581 jjend = jjc1 + nqc1 - 1
582 jjnxt =
min(
iceil( jjbeg, nbc ) * nbc, jjend )
583
584 40 CONTINUE
585 IF( jjbeg.LE.jjnxt ) THEN
586 CALL pbsmatadd( ictxt,
'No transpose', mpc2,
587 $ jjnxt-jjbeg+1, -one,
588 $ work( ipw+ileft*lw ), lw, one,
589 $ c( iic1+(jjbeg-1)*ldc ), ldc )
590 mydist = mydist + npcol
591 ileft = mydist * nbc - icoffc1
592 jjbeg = jjnxt +1
593 jjnxt =
min( jjnxt+nbc, jjend )
594 GO TO 40
595 END IF
596 END IF
597
598
599
600
601
602 IF( ioffc2.GT.0 )
603 $ CALL sgemm( 'No transpose', 'No transpose', mpc2, nqc2, k,
604 $ -one, work( ipw ), lw, work( ipv ), lv, one,
605 $ c( ioffc2 ), ldc )
606
607 END IF
608
609 RETURN
610
611
612
integer function iceil(inum, idenom)
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pbsmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
subroutine pbstran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
subroutine pxerbla(ictxt, srname, info)