3
4
5
6
7
8
9 CHARACTER DIRECT, SIDE, STOREV, TRANS
10 INTEGER IC, IV, JC, JV, K, L, M, N
11
12
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
222 $ LLD_, MB_, M_, NB_, N_, RSRC_
223 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
224 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
225 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
226 DOUBLE PRECISION ONE, ZERO
227 parameter( one = 1.0d+0, zero = 0.0d+0 )
228
229
230 LOGICAL LEFT
231 CHARACTER COLBTOP, TRANST
232 INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
233 $ ICROW1, ICROW2, ICTXT, IIBEG, IIC1, IIC2,
234 $ IIEND, IINXT, IIV, ILEFT, INFO, IOFFC2, IOFFV,
235 $ IPT, IPV, IPW, IROFFC1, IROFFC2, ITOP, IVCOL,
236 $ IVROW, JJBEG, JJEND, JJNXT, JJC1, JJC2, JJV,
237 $ LDC, LDV, LV, LW, MBC, MBV, MPC1, MPC2, MPC20,
238 $ MQV, MQV0, MYCOL, MYDIST, MYROW, NBC, NBV,
239 $ NPCOL, NPROW, NQC1, NQC2, NQCALL, NQV
240
241
242 EXTERNAL blacs_abort, blacs_gridinfo, dgebr2d,
243 $ dgebs2d,dgemm, dgsum2d, dlamov,
244 $ dlaset, dtrbr2d, dtrbs2d, dtrmm,
246
247
249
250
251 LOGICAL LSAME
252 INTEGER ICEIL, NUMROC
254
255
256
257
258
259 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
260 $ RETURN
261
262
263
264 ictxt = descc( ctxt_ )
265 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
266
267
268
269 info = 0
270 IF( .NOT.
lsame( direct,
'B' ) )
THEN
271 info = -3
272 ELSE IF( .NOT.
lsame( storev,
'R' ) )
THEN
273 info = -4
274 END IF
275 IF( info.NE.0 ) THEN
276 CALL pxerbla( ictxt,
'PDLARZB', -info )
277 CALL blacs_abort( ictxt, 1 )
278 RETURN
279 END IF
280
281 left =
lsame( side,
'L' )
282 IF(
lsame( trans,
'N' ) )
THEN
283 transt = 'T'
284 ELSE
285 transt = 'N'
286 END IF
287
288 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
289 $ ivrow, ivcol )
290 mbv = descv( mb_ )
291 nbv = descv( nb_ )
292 icoffv = mod( jv-1, nbv )
293 nqv =
numroc( l+icoffv, nbv, mycol, ivcol, npcol )
294 IF( mycol.EQ.ivcol )
295 $ nqv = nqv - icoffv
296 ldv = descv( lld_ )
297 iiv =
min( iiv, ldv )
298 jjv =
min( jjv,
max( 1,
numroc( descv( n_ ), nbv, mycol,
299 $ descv( csrc_ ), npcol ) ) )
300 ioffv = iiv + ( jjv-1 ) * ldv
301 mbc = descc( mb_ )
302 nbc = descc( nb_ )
303 nqcall =
numroc( descc( n_ ), nbc, mycol, descc( csrc_ ), npcol )
304 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic1,
305 $ jjc1, icrow1, iccol1 )
306 ldc = descc( lld_ )
307 iic1 =
min( iic1, ldc )
308 jjc1 =
min( jjc1,
max( 1, nqcall ) )
309
310 IF( left ) THEN
311 iroffc1 = mod( ic-1, mbc )
312 mpc1 =
numroc( k+iroffc1, mbc, myrow, icrow1, nprow )
313 IF( myrow.EQ.icrow1 )
314 $ mpc1 = mpc1 - iroffc1
315 icoffc1 = mod( jc-1, nbc )
316 nqc1 =
numroc( n+icoffc1, nbc, mycol, iccol1, npcol )
317 IF( mycol.EQ.iccol1 )
318 $ nqc1 = nqc1 - icoffc1
319 CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
320 $ iic2, jjc2, icrow2, iccol2 )
321 iroffc2 = mod( ic+m-l-1, mbc )
322 mpc2 =
numroc( l+iroffc2, mbc, myrow, icrow2, nprow )
323 IF( myrow.EQ.icrow2 )
324 $ mpc2 = mpc2 - iroffc2
325 icoffc2 = icoffc1
326 nqc2 = nqc1
327 ELSE
328 iroffc1 = mod( ic-1, mbc )
329 mpc1 =
numroc( m+iroffc1, mbc, myrow, icrow1, nprow )
330 IF( myrow.EQ.icrow1 )
331 $ mpc1 = mpc1 - iroffc1
332 icoffc1 = mod( jc-1, nbc )
333 nqc1 =
numroc( k+icoffc1, nbc, mycol, iccol1, npcol )
334 IF( mycol.EQ.iccol1 )
335 $ nqc1 = nqc1 - icoffc1
336 CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
337 $ iic2, jjc2, icrow2, iccol2 )
338 iroffc2 = iroffc1
339 mpc2 = mpc1
340 icoffc2 = mod( jc+n-l-1, nbc )
341 nqc2 =
numroc( l+icoffc2, nbc, mycol, iccol2, npcol )
342 IF( mycol.EQ.iccol2 )
343 $ nqc2 = nqc2 - icoffc2
344 END IF
345 iic2 =
min( iic2, ldc )
346 jjc2 =
min( jjc2, nqcall )
347 ioffc2 = iic2 + ( jjc2-1 ) * ldc
348
349 IF(
lsame( side,
'L' ) )
THEN
350
351
352
353
354
355
356 mqv0 =
numroc( m+icoffv, nbv, mycol, ivcol, npcol )
357 IF( mycol.EQ.ivcol ) THEN
358 mqv = mqv0 - icoffv
359 ELSE
360 mqv = mqv0
361 END IF
362 IF( myrow.EQ.icrow2 ) THEN
363 mpc20 = mpc2 + iroffc2
364 ELSE
365 mpc20 = mpc2
366 END IF
367
368
369
370
371
372
373 ipv = 1
374 ipw = ipv + mpc20 * k
375 ipt = ipw + k * mqv0
378
379 IF( myrow.EQ.ivrow ) THEN
380 IF( mycol.EQ.ivcol ) THEN
381 CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
382 $ work( ipw+icoffv*lw ), lw )
383 ELSE
384 CALL dlamov( 'All', k, mqv, v( ioffv ), ldv,
385 $ work( ipw ), lw )
386 END IF
387 END IF
388
389
390
391 CALL pbdtran( ictxt,
'Rowwise',
'Transpose', k, m+icoffv,
392 $ descv( nb_ ), work( ipw ), lw, zero,
393 $ work( ipv ), lv, ivrow, ivcol, icrow2, -1,
394 $ work( ipt ) )
395
396
397
398 IF( myrow.EQ.icrow2 )
399 $ ipv = ipv + iroffc2
400
401
402
403
405
406 IF( mpc2.GT.0 ) THEN
407 CALL dgemm( 'Transpose', 'No transpose', nqc2, k, mpc2,
408 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
409 $ work( ipw ), lw )
410 ELSE
411 CALL dlaset( 'All', nqc2, k, zero, zero, work( ipw ), lw )
412 END IF
413
414
415
416 IF( mpc1.GT.0 ) THEN
417 mydist = mod( myrow-icrow1+nprow, nprow )
418 itop =
max( 0, mydist * mbc - iroffc1 )
419 iibeg = iic1
420 iiend = iic1 + mpc1 - 1
421 iinxt =
min(
iceil( iibeg, mbc ) * mbc, iiend )
422
423 10 CONTINUE
424 IF( iibeg.LE.iinxt ) THEN
425 CALL pbdmatadd( ictxt,
'Transpose', nqc2, iinxt-iibeg+1,
426 $ one, c( iibeg+(jjc1-1)*ldc ), ldc, one,
427 $ work( ipw+itop ), lw )
428 mydist = mydist + nprow
429 itop = mydist * mbc - iroffc1
430 iibeg = iinxt +1
431 iinxt =
min( iinxt+mbc, iiend )
432 GO TO 10
433 END IF
434 END IF
435
436 CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, k, work( ipw ),
437 $ lw, ivrow, mycol )
438
439
440
441 IF( myrow.EQ.ivrow ) THEN
442 IF( mycol.EQ.ivcol ) THEN
443
444
445
446 CALL dtrbs2d( ictxt, 'Rowwise', ' ', 'Lower', 'Non unit',
447 $ k, k, t, mbv )
448 ELSE
449 CALL dtrbr2d( ictxt, 'Rowwise', ' ', 'Lower', 'Non unit',
450 $ k, k, t, mbv, myrow, ivcol )
451 END IF
452 CALL dtrmm( 'Right', 'Lower', transt, 'Non unit', nqc2, k,
453 $ one, t, mbv, work( ipw ), lw )
454
455 CALL dgebs2d( ictxt, 'Columnwise', ' ', nqc2, k,
456 $ work( ipw ), lw )
457 ELSE
458 CALL dgebr2d( ictxt, 'Columnwise', ' ', nqc2, k,
459 $ work( ipw ), lw, ivrow, mycol )
460 END IF
461
462
463
464 IF( mpc1.GT.0 ) THEN
465 mydist = mod( myrow-icrow1+nprow, nprow )
466 itop =
max( 0, mydist * mbc - iroffc1 )
467 iibeg = iic1
468 iiend = iic1 + mpc1 - 1
469 iinxt =
min(
iceil( iibeg, mbc ) * mbc, iiend )
470
471 20 CONTINUE
472 IF( iibeg.LE.iinxt ) THEN
473 CALL pbdmatadd( ictxt,
'Transpose', iinxt-iibeg+1, nqc2,
474 $ -one, work( ipw+itop ), lw, one,
475 $ c( iibeg+(jjc1-1)*ldc ), ldc )
476 mydist = mydist + nprow
477 itop = mydist * mbc - iroffc1
478 iibeg = iinxt +1
479 iinxt =
min( iinxt+mbc, iiend )
480 GO TO 20
481 END IF
482 END IF
483
484
485
486
487
488 CALL dgemm( 'No transpose', 'Transpose', mpc2, nqc2, k, -one,
489 $ work( ipv ), lv, work( ipw ), lw, one,
490 $ c( ioffc2 ), ldc )
491
492 ELSE
493
494
495
496
497
498
499
500 ipv = 1
501 ipw = ipv + k * nqc2
504
505
506
507 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
508 IF( myrow.EQ.ivrow ) THEN
509 CALL dgebs2d( ictxt, 'Columnwise', colbtop, k, nqc2,
510 $ v( ioffv ), ldv )
511 IF( mycol.EQ.ivcol )
512 $ CALL dtrbs2d( ictxt, 'Columnwise', colbtop, 'Lower',
513 $ 'Non unit', k, k, t, mbv )
514 CALL dlamov( 'All', k, nqc2, v( ioffv ), ldv, work( ipv ),
515 $ lv )
516 ELSE
517 CALL dgebr2d( ictxt, 'Columnwise', colbtop, k, nqc2,
518 $ work( ipv ), lv, ivrow, mycol )
519 IF( mycol.EQ.ivcol )
520 $ CALL dtrbr2d( ictxt, 'Columnwise', colbtop, 'Lower',
521 $ 'Non unit', k, k, t, mbv, ivrow, mycol )
522 END IF
523
524
525
526
527 IF( nqc2.GT.0 ) THEN
528 CALL dgemm( 'No Transpose', 'Transpose', mpc2, k, nqc2,
529 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
530 $ work( ipw ), lw )
531 ELSE
532 CALL dlaset( 'All', mpc2, k, zero, zero, work( ipw ), lw )
533 END IF
534
535
536
537 IF( nqc1.GT.0 ) THEN
538 mydist = mod( mycol-iccol1+npcol, npcol )
539 ileft =
max( 0, mydist * nbc - icoffc1 )
540 jjbeg = jjc1
541 jjend = jjc1 + nqc1 - 1
542 jjnxt =
min(
iceil( jjbeg, nbc ) * nbc, jjend )
543
544 30 CONTINUE
545 IF( jjbeg.LE.jjnxt ) THEN
546 CALL pbdmatadd( ictxt,
'No transpose', mpc2,
547 $ jjnxt-jjbeg+1, one,
548 $ c( iic1+(jjbeg-1)*ldc ), ldc, one,
549 $ work( ipw+ileft*lw ), lw )
550 mydist = mydist + npcol
551 ileft = mydist * nbc - icoffc1
552 jjbeg = jjnxt +1
553 jjnxt =
min( jjnxt+nbc, jjend )
554 GO TO 30
555 END IF
556 END IF
557
558 CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
559 $ lw, myrow, ivcol )
560
561
562
563 IF( mycol.EQ.ivcol ) THEN
564 CALL dtrmm( 'Right', 'Lower', trans, 'Non unit', mpc2, k,
565 $ one, t, mbv, work( ipw ), lw )
566 CALL dgebs2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
567 $ lw )
568 ELSE
569 CALL dgebr2d( ictxt, 'Rowwise', ' ', mpc2, k, work( ipw ),
570 $ lw, myrow, ivcol )
571 END IF
572
573
574
575 IF( nqc1.GT.0 ) THEN
576 mydist = mod( mycol-iccol1+npcol, npcol )
577 ileft =
max( 0, mydist * nbc - icoffc1 )
578 jjbeg = jjc1
579 jjend = jjc1 + nqc1 - 1
580 jjnxt =
min(
iceil( jjbeg, nbc ) * nbc, jjend )
581
582 40 CONTINUE
583 IF( jjbeg.LE.jjnxt ) THEN
584 CALL pbdmatadd( ictxt,
'No transpose', mpc2,
585 $ jjnxt-jjbeg+1, -one,
586 $ work( ipw+ileft*lw ), lw, one,
587 $ c( iic1+(jjbeg-1)*ldc ), ldc )
588 mydist = mydist + npcol
589 ileft = mydist * nbc - icoffc1
590 jjbeg = jjnxt +1
591 jjnxt =
min( jjnxt+nbc, jjend )
592 GO TO 40
593 END IF
594 END IF
595
596
597
598
599
600 IF( ioffc2.GT.0 )
601 $ CALL dgemm( 'No transpose', 'No transpose', mpc2, nqc2, k,
602 $ -one, work( ipw ), lw, work( ipv ), lv, one,
603 $ c( ioffc2 ), ldc )
604
605 END IF
606
607 RETURN
608
609
610
integer function iceil(inum, idenom)
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pbdmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
subroutine pbdtran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
subroutine pxerbla(ictxt, srname, info)