3
4
5
6
7
8
9 CHARACTER TRANS
10 INTEGER BWU, BWL, IB, INFO, JA, LAF, LWORK, N, NRHS
11
12
13 INTEGER DESCA( * ), DESCB( * ), IPIV(*)
14 COMPLEX A( * ), AF( * ), B( * ), WORK( * )
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372 REAL ONE, ZERO
373 parameter( one = 1.0e+0 )
374 parameter( zero = 0.0e+0 )
375 COMPLEX CONE, CZERO
376 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
377 parameter( czero = ( 0.0e+0, 0.0e+0 ) )
378 INTEGER INT_ONE
379 parameter( int_one = 1 )
380 INTEGER DESCMULT, BIGNUM
381 parameter( descmult = 100, bignum = descmult*descmult )
382 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
383 $ LLD_, MB_, M_, NB_, N_, RSRC_
384 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
385 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
386 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
387
388
389 INTEGER APTR, BBPTR, BM, BMN, BN, BNN, BW, CSRC,
390 $ FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
391 $ IDUM2, IDUM3, J, JA_NEW, L, LBWL, LBWU, LDBB,
392 $ LDW, LLDA, LLDB, LM, LMJ, LN, LPTR, MYCOL,
393 $ MYROW, NB, NEICOL, NP, NPACT, NPCOL, NPROW,
394 $ NPSTR, NP_SAVE, ODD_SIZE, PART_OFFSET,
395 $ RECOVERY_VAL, RETURN_CODE, STORE_M_B,
396 $ STORE_N_A, WORK_SIZE_MIN, WPTR
397
398
399 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
400 $ PARAM_CHECK( 17, 3 )
401
402
405
406
407 LOGICAL LSAME
408 INTEGER NUMROC
411
412
413 INTRINSIC ichar, mod
414
415
416
417
418
419
420 info = 0
421
422
423
424
425 desca_1xp( 1 ) = 501
426 descb_px1( 1 ) = 502
427
429
430 IF( return_code .NE. 0) THEN
431 info = -( 8*100 + 2 )
432 ENDIF
433
435
436 IF( return_code .NE. 0) THEN
437 info = -( 11*100 + 2 )
438 ENDIF
439
440
441
442
443 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
444 info = -( 11*100 + 2 )
445 ENDIF
446
447
448
449
450
451 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
452 info = -( 11*100 + 4 )
453 ENDIF
454
455
456
457 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
458 info = -( 11*100 + 5 )
459 ENDIF
460
461
462
463 ictxt = desca_1xp( 2 )
464 csrc = desca_1xp( 5 )
465 nb = desca_1xp( 4 )
466 llda = desca_1xp( 6 )
467 store_n_a = desca_1xp( 3 )
468 lldb = descb_px1( 6 )
469 store_m_b = descb_px1( 3 )
470
471
472
473
474 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
475 np = nprow * npcol
476
477
478
479 IF(
lsame( trans,
'N' ) )
THEN
480 idum2 = ichar( 'N' )
481 ELSE IF (
lsame( trans,
'C' ) )
THEN
482 idum2 = ichar( 'C' )
483 ELSE
484 info = -1
485 END IF
486
487 IF( lwork .LT. -1) THEN
488 info = -16
489 ELSE IF ( lwork .EQ. -1 ) THEN
490 idum3 = -1
491 ELSE
492 idum3 = 1
493 ENDIF
494
495 IF( n .LT. 0 ) THEN
496 info = -2
497 ENDIF
498
499 IF( n+ja-1 .GT. store_n_a ) THEN
500 info = -( 8*100 + 6 )
501 ENDIF
502
503 IF(( bwl .GT. n-1 ) .OR.
504 $ ( bwl .LT. 0 ) ) THEN
505 info = -3
506 ENDIF
507
508 IF(( bwu .GT. n-1 ) .OR.
509 $ ( bwu .LT. 0 ) ) THEN
510 info = -4
511 ENDIF
512
513 IF( llda .LT. (2*bwl+2*bwu+1) ) THEN
514 info = -( 8*100 + 6 )
515 ENDIF
516
517 IF( nb .LE. 0 ) THEN
518 info = -( 8*100 + 4 )
519 ENDIF
520
521 bw = bwu+bwl
522
523 IF( n+ib-1 .GT. store_m_b ) THEN
524 info = -( 11*100 + 3 )
525 ENDIF
526
527 IF( lldb .LT. nb ) THEN
528 info = -( 11*100 + 6 )
529 ENDIF
530
531 IF( nrhs .LT. 0 ) THEN
532 info = -5
533 ENDIF
534
535
536
537 IF( ja .NE. ib) THEN
538 info = -7
539 ENDIF
540
541
542
543 IF( nprow .NE. 1 ) THEN
544 info = -( 8*100+2 )
545 ENDIF
546
547 IF( n .GT. np*nb-mod( ja-1, nb )) THEN
548 info = -( 2 )
550 $ 'PCGBTRS, D&C alg.: only 1 block per proc',
551 $ -info )
552 RETURN
553 ENDIF
554
555 IF((ja+n-1.GT.nb) .AND. ( nb.LT.(bwl+bwu+1) )) THEN
556 info = -( 8*100+4 )
558 $ 'PCGBTRS, D&C alg.: NB too small',
559 $ -info )
560 RETURN
561 ENDIF
562
563
564
565
566 work_size_min = nrhs*(nb+2*bwl+4*bwu)
567
568 work( 1 ) = work_size_min
569
570 IF( lwork .LT. work_size_min ) THEN
571 IF( lwork .NE. -1 ) THEN
572 info = -16
574 $ 'PCGBTRS: worksize error ',
575 $ -info )
576 ENDIF
577 RETURN
578 ENDIF
579
580
581
582 param_check( 17, 1 ) = descb(5)
583 param_check( 16, 1 ) = descb(4)
584 param_check( 15, 1 ) = descb(3)
585 param_check( 14, 1 ) = descb(2)
586 param_check( 13, 1 ) = descb(1)
587 param_check( 12, 1 ) = ib
588 param_check( 11, 1 ) = desca(5)
589 param_check( 10, 1 ) = desca(4)
590 param_check( 9, 1 ) = desca(3)
591 param_check( 8, 1 ) = desca(1)
592 param_check( 7, 1 ) = ja
593 param_check( 6, 1 ) = nrhs
594 param_check( 5, 1 ) = bwu
595 param_check( 4, 1 ) = bwl
596 param_check( 3, 1 ) = n
597 param_check( 2, 1 ) = idum3
598 param_check( 1, 1 ) = idum2
599
600 param_check( 17, 2 ) = 1105
601 param_check( 16, 2 ) = 1104
602 param_check( 15, 2 ) = 1103
603 param_check( 14, 2 ) = 1102
604 param_check( 13, 2 ) = 1101
605 param_check( 12, 2 ) = 10
606 param_check( 11, 2 ) = 805
607 param_check( 10, 2 ) = 804
608 param_check( 9, 2 ) = 803
609 param_check( 8, 2 ) = 801
610 param_check( 7, 2 ) = 7
611 param_check( 6, 2 ) = 5
612 param_check( 5, 2 ) = 4
613 param_check( 4, 2 ) = 3
614 param_check( 3, 2 ) = 2
615 param_check( 2, 2 ) = 16
616 param_check( 1, 2 ) = 1
617
618
619
620
621
622 IF( info.GE.0 ) THEN
623 info = bignum
624 ELSE IF( info.LT.-descmult ) THEN
625 info = -info
626 ELSE
627 info = -info * descmult
628 END IF
629
630
631
632 CALL globchk( ictxt, 17, param_check, 17,
633 $ param_check( 1, 3 ), info )
634
635
636
637
638 IF( info.EQ.bignum ) THEN
639 info = 0
640 ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
641 info = -info / descmult
642 ELSE
643 info = -info
644 END IF
645
646 IF( info.LT.0 ) THEN
647 CALL pxerbla( ictxt,
'PCGBTRS', -info )
648 RETURN
649 END IF
650
651
652
653 IF( n.EQ.0 )
654 $ RETURN
655
656 IF( nrhs.EQ.0 )
657 $ RETURN
658
659
660
661
662
663 part_offset = nb*( (ja-1)/(npcol*nb) )
664
665 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
666 part_offset = part_offset + nb
667 ENDIF
668
669 IF ( mycol .LT. csrc ) THEN
670 part_offset = part_offset - nb
671 ENDIF
672
673
674
675
676
677
678
679 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
680
681
682
683 ja_new = mod( ja-1, nb ) + 1
684
685
686
687 np_save = np
688 np = ( ja_new+n-2 )/nb + 1
689
690
691
692 CALL reshape( ictxt, int_one, ictxt_new, int_one,
693 $ first_proc, int_one, np )
694
695
696
697 ictxt_save = ictxt
698 ictxt = ictxt_new
699 desca_1xp( 2 ) = ictxt_new
700 descb_px1( 2 ) = ictxt_new
701
702
703
704 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
705
706
707
708 IF( myrow .LT. 0 ) THEN
709 GOTO 1234
710 ENDIF
711
712
713
714
715
716
717
718 IF (mycol .LT. npcol-1) THEN
719 CALL cgesd2d( ictxt, bwu, nrhs, b(nb-bwu+1), lldb,
720 $ 0, mycol + 1)
721 ENDIF
722
723 IF (mycol .LT. npcol-1) THEN
724 lm = nb-bwu
725 ELSE
726 lm = nb
727 ENDIF
728
729 IF (mycol .GT. 0) THEN
730 wptr = bwu+1
731 ELSE
732 wptr = 1
733 ENDIF
734
735 ldw = nb+bwu + 2*bw+bwu
736
737 CALL clamov( 'G', lm, nrhs, b(1), lldb, work( wptr ), ldw )
738
739
740
741 DO 1501 j=1, nrhs
742 DO 1502 l=wptr+lm, ldw
743 work( (j-1)*ldw+l ) = czero
744 1502 CONTINUE
745 1501 CONTINUE
746
747 IF (mycol .GT. 0) THEN
748 CALL cgerv2d( ictxt, bwu, nrhs, work(1), ldw,
749 $ 0, mycol-1)
750 ENDIF
751
752
753
754
755
756
757
758 odd_size =
numroc( n, nb, mycol, 0, npcol )
759
760 IF (mycol .NE. 0) THEN
761 lbwl = bw
762 lbwu = 0
763 aptr = 1
764 ELSE
765 lbwl = bwl
766 lbwu = bwu
767 aptr = 1+bwu
768 ENDIF
769
770 IF (mycol .NE. npcol-1) THEN
771 lm = nb - lbwu
772 ln = nb - bw
773 ELSE IF (mycol .NE. 0) THEN
774 lm = odd_size + bwu
775 ln =
max(odd_size-bw,0)
776 ELSE
777 lm = n
779 ENDIF
780
781 DO 21 j = 1, ln
782
784 l = ipiv( j )
785
786 IF( l.NE.j ) THEN
787 CALL cswap(nrhs, work(l), ldw, work(j), ldw)
788 ENDIF
789
790 lptr = bw+1 + (j-1)*llda + aptr
791
792 CALL cgeru(lmj,nrhs,-cone, a(lptr),1, work(j),ldw,
793 $ work(j+1),ldw)
794
795 21 CONTINUE
796
797
798
799
800
801
802
803
804 IF (mycol .NE. npcol-1) THEN
805 bm = bw - lbwu
806 bn = bw
807 ELSE
808 bm =
min(bw,odd_size) + bwu
809 bn =
min(bw,odd_size)
810 ENDIF
811
812
813
814
815 bbptr = (nb+bwu)*bw + 1
816 ldbb = 2*bw + bwu
817
818 IF (npcol.EQ.1) THEN
819
820
821
822 CALL cgetrs( 'N', n-ln, nrhs, af(bbptr+bw*ldbb), ldbb,
823 $ ipiv(ln+1), work( ln+1 ), ldw, info)
824
825 ENDIF
826
827
828
829
830
831
832
833 npact = npcol
834 npstr = 1
835
836
837 200 IF (npact .LE. 1) GOTO 300
838
839
840 IF (mod(mycol,npstr) .EQ. 0) THEN
841
842
843
844 IF (mod(mycol,2*npstr) .EQ. 0) THEN
845
846 neicol = mycol + npstr
847
848 IF (neicol/npstr .LE. npact-1) THEN
849
850 IF (neicol/npstr .LT. npact-1) THEN
851 bmn = bw
852 ELSE
853 bmn =
min(bw,
numroc(n, nb, neicol, 0, npcol))+bwu
854 ENDIF
855
856 CALL cgesd2d( ictxt, bm, nrhs,
857 $ work(ln+1), ldw, 0, neicol )
858
859 IF( npact .NE. 2 )THEN
860
861
862
863 CALL cgerv2d(ictxt, bm+bmn-bw, nrhs,
864 $ work( ln+1 ), ldw, 0, neicol )
865
866 bm = bm+bmn-bw
867
868 ENDIF
869
870 ENDIF
871
872 ELSE
873
874 neicol = mycol - npstr
875
876 IF (neicol .EQ. 0) THEN
877 bmn = bw - bwu
878 ELSE
879 bmn = bw
880 ENDIF
881
882 CALL clamov( 'G', bm, nrhs, work(ln+1), ldw,
883 $ work(nb+bwu+bmn+1), ldw )
884
885 CALL cgerv2d( ictxt, bmn, nrhs, work( nb+bwu+1 ),
886 $ ldw, 0, neicol )
887
888
889
890 IF (npact .NE. 2) THEN
891
892
893
894 CALL claswp( nrhs, work(nb+bwu+1), ldw, 1, bw,
895 $ ipiv(ln+1), 1)
896
897 CALL ctrsm('L','L','N','U', bw, nrhs, cone,
898 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+1), ldw)
899
900
901
902 CALL cgemm( 'N', 'N', bm+bmn-bw, nrhs, bw,
903 $ -cone, af(bbptr+bw*ldbb+bw), ldbb,
904 $ work(nb+bwu+1), ldw,
905 $ cone, work(nb+bwu+1+bw), ldw )
906
907
908
909 CALL cgesd2d( ictxt, bm+bmn-bw, nrhs,
910 $ work(nb+bwu+1+bw), ldw, 0, neicol )
911
912 ELSE
913
914
915
916 CALL claswp( nrhs, work(nb+bwu+1), ldw, 1, bm+bmn,
917 $ ipiv(ln+1), 1)
918
919 CALL ctrsm('L','L','N','U', bm+bmn, nrhs, cone,
920 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+1), ldw)
921 ENDIF
922
923 ENDIF
924
925 npact = (npact + 1)/2
926 npstr = npstr * 2
927 GOTO 200
928
929 ENDIF
930
931 300 CONTINUE
932
933
934
935
936
937
938
939
940 IF (npcol.EQ.1) THEN
941
942
943
944
945
946
947 ENDIF
948
949
950
951
952 recovery_val = npact*npstr - npcol
953
954
955
956
957 2200 IF( npact .GE. npcol ) GOTO 2300
958
959 npstr = npstr/2
960
961 npact = npact*2
962
963
964
965 npact = npact-mod( (recovery_val/npstr), 2 )
966
967
968
969 IF( mycol/npstr .LT. npact-1 ) THEN
970 bn = bw
971 ELSE
972 bn =
min(bw,
numroc(n, nb, npcol-1, 0, npcol) )
973 ENDIF
974
975
976
977 IF( mod( mycol, 2*npstr ) .EQ. 0 ) THEN
978
979 neicol = mycol+npstr
980
981 IF( neicol/npstr .LE. npact-1 ) THEN
982
983 IF( neicol/npstr .LT. npact-1 ) THEN
984 bmn = bw
985 bnn = bw
986 ELSE
987 bmn =
min(bw,
numroc(n, nb, neicol, 0, npcol))+bwu
988 bnn =
min(bw,
numroc(n, nb, neicol, 0, npcol) )
989 ENDIF
990
991 IF( npact .GT. 2 ) THEN
992
993 CALL cgesd2d( ictxt, 2*bw, nrhs, work( ln+1 ),
994 $ ldw, 0, neicol )
995
996 CALL cgerv2d( ictxt, bw, nrhs, work( ln+1 ),
997 $ ldw, 0, neicol )
998
999 ELSE
1000
1001 CALL cgerv2d( ictxt, bw, nrhs, work( ln+1 ),
1002 $ ldw, 0, neicol )
1003
1004 ENDIF
1005
1006 ENDIF
1007
1008 ELSE
1009
1010
1011 neicol = mycol - npstr
1012
1013 IF (neicol .EQ. 0) THEN
1014 bmn = bw - bwu
1015 ELSE
1016 bmn = bw
1017 ENDIF
1018
1019 IF( neicol .LT. npcol-1 ) THEN
1020 bnn = bw
1021 ELSE
1022 bnn =
min(bw,
numroc(n, nb, neicol, 0, npcol) )
1023 ENDIF
1024
1025 IF( npact .GT. 2 ) THEN
1026
1027
1028
1029 CALL clamov( 'G', bw, nrhs, work(nb+bwu+1),
1030 $ ldw, work(nb+bwu+bw+1), ldw )
1031
1032 CALL cgerv2d( ictxt, 2*bw, nrhs, work( ln+1 ),
1033 $ ldw, 0, neicol )
1034
1035 CALL cgemm( 'N', 'N', bw, nrhs, bn,
1036 $ -cone, af(bbptr), ldbb,
1037 $ work(ln+1), ldw,
1038 $ cone, work(nb+bwu+bw+1), ldw )
1039
1040
1041 IF( mycol .GT. npstr ) THEN
1042
1043 CALL cgemm( 'N', 'N', bw, nrhs, bw,
1044 $ -cone, af(bbptr+2*bw*ldbb), ldbb,
1045 $ work(ln+bw+1), ldw,
1046 $ cone, work(nb+bwu+bw+1), ldw )
1047
1048 ENDIF
1049
1050 CALL ctrsm('L','U','N','N', bw, nrhs, cone,
1051 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+bw+1), ldw)
1052
1053
1054
1055 CALL cgesd2d( ictxt, bw, nrhs,
1056 $ work( nb+bwu+bw+1 ), ldw, 0, neicol )
1057
1058
1059
1060 CALL clamov( 'G', bw, nrhs, work(nb+bwu+1+bw),
1061 $ ldw, work(ln+bw+1), ldw )
1062
1063 ELSE
1064
1065
1066
1067 CALL ctrsm( 'L','U','N','N', bn+bnn, nrhs, cone,
1068 $ af(bbptr+bw*ldbb), ldbb, work(nb+bwu+1), ldw)
1069
1070
1071
1072 CALL cgesd2d( ictxt, bw, nrhs,
1073 $ work(nb+bwu+1), ldw, 0, neicol )
1074
1075
1076
1077 CALL clamov( 'G', bnn+bn-bw, nrhs, work(nb+bwu+1+bw),
1078 $ ldw, work(ln+1), ldw )
1079
1080
1081 IF( (nb+bwu+1) .NE. (ln+1+bw) ) THEN
1082
1083
1084
1085 DO 1064 j=1, bw
1086 CALL ccopy( nrhs, work(nb+bwu+j), ldw,
1087 $ work(ln+bw+j), ldw )
1088 1064 CONTINUE
1089
1090 ENDIF
1091
1092 ENDIF
1093
1094 ENDIF
1095
1096 GOTO 2200
1097
1098 2300 CONTINUE
1099
1100
1101
1102
1103
1104
1105
1106
1107 IF (mycol .NE. npcol-1) THEN
1108 bm = bw - lbwu
1109 ELSE
1110 bm =
min(bw,odd_size) + bwu
1111 ENDIF
1112
1113
1114
1115 IF( mycol .LT. npcol-1 ) THEN
1116
1117 CALL cgesd2d( ictxt, bw, nrhs, work( nb-bw+1 ),
1118 $ ldw, 0, mycol+1 )
1119
1120 ENDIF
1121
1122 IF( mycol .GT. 0 ) THEN
1123
1124 CALL cgerv2d( ictxt, bw, nrhs, work( nb+bwu+1 ),
1125 $ ldw, 0, mycol-1 )
1126
1127
1128
1129 CALL cgemm( 'N', 'N', lm-bm, nrhs, bw, -cone,
1130 $ af( 1 ), lm, work( nb+bwu+1 ), ldw, cone,
1131 $ work( 1 ), ldw )
1132
1133 ENDIF
1134
1135 DO 2021 j = ln, 1, -1
1136
1137 lmj =
min( bw, odd_size-1 )
1138
1139 lptr = bw-1+j*llda+aptr
1140
1141
1142
1143
1144 CALL cgemv( 'T', lmj, nrhs, -cone, work( j+1), ldw,
1145 $ a( lptr ), llda-1, cone, work( j ), ldw )
1146
1147
1148
1149 CALL cscal( nrhs, cone/a( lptr-llda+1 ),
1150 $ work( j ), ldw )
1151 2021 CONTINUE
1152
1153
1154
1155 CALL clamov( 'G', odd_size, nrhs, work( 1 ), ldw,
1156 $ b( 1 ), lldb )
1157
1158
1159
1160 ictxt = ictxt_save
1161 IF( ictxt .NE. ictxt_new ) THEN
1162 CALL blacs_gridexit( ictxt_new )
1163 ENDIF
1164
1165 1234 CONTINUE
1166
1167
1168
1169 np = np_save
1170
1171
1172
1173 work( 1 ) = work_size_min
1174
1175 RETURN
1176
1177
1178
subroutine desc_convert(desc_in, desc_out, info)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine globchk(ictxt, n, x, ldx, iwork, info)
subroutine pxerbla(ictxt, srname, info)
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)