3
4
5
6
7
8
9 INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N
10
11
12 INTEGER DESCA( * ), IPIV( * )
13 DOUBLE PRECISION A( * ), AF( * ), WORK( * )
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356 DOUBLE PRECISION ONE
357 parameter( one = 1.0d+0 )
358 DOUBLE PRECISION ZERO
359 parameter( zero = 0.0d+0 )
360 INTEGER INT_ONE
361 parameter( int_one = 1 )
362 INTEGER DESCMULT, BIGNUM
363 parameter( descmult = 100, bignum = descmult*descmult )
364 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
365 $ LLD_, MB_, M_, NB_, N_, RSRC_
366 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
367 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
368 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
369
370
371 INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, BMN, BN, BW,
372 $ CSRC, DBPTR, FIRST_PROC, I, I1, I2, ICTXT,
373 $ ICTXT_NEW, ICTXT_SAVE, IDUM3, J, JA_NEW, JPTR,
374 $ L, LAF_MIN, LBWL, LBWU, LDB, LDBB, LLDA, LM,
375 $ LMJ, LN, LNJ, LPTR, MYCOL, MYROW, MY_NUM_COLS,
376 $ NB, NEICOL, NP, NPACT, NPCOL, NPROW, NPSTR,
377 $ NP_SAVE, NRHS, ODD_N, ODD_SIZE, ODPTR, OFST,
378 $ PART_OFFSET, PART_SIZE, RETURN_CODE, STORE_N_A,
379 $ WORK_SIZE_MIN
380
381
382 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
383
384
386 $ dgbtrf, dgemm, dger, dgerv2d, dgesd2d, dgetrf,
387 $ dlamov, dlaswp,
dlatcpy, dswap, dtrrv2d,
388 $ dtrsd2d, dtrsm,
globchk, igamx2d, igebr2d,
390
391
392 INTEGER NUMROC
394
395
397
398
399
400
401
402
403 info = 0
404
405
406
407
408 desca_1xp( 1 ) = 501
409
411
412 IF( return_code.NE.0 ) THEN
413 info = -( 6*100+2 )
414 END IF
415
416
417
418 ictxt = desca_1xp( 2 )
419 csrc = desca_1xp( 5 )
420 nb = desca_1xp( 4 )
421 llda = desca_1xp( 6 )
422 store_n_a = desca_1xp( 3 )
423
424
425
426
427 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
428 np = nprow*npcol
429
430
431
432 IF( lwork.LT.-1 ) THEN
433 info = -11
434 ELSE IF( lwork.EQ.-1 ) THEN
435 idum3 = -1
436 ELSE
437 idum3 = 1
438 END IF
439
440 IF( n.LT.0 ) THEN
441 info = -1
442 END IF
443
444 IF( n+ja-1.GT.store_n_a ) THEN
445 info = -( 6*100+6 )
446 END IF
447
448 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) ) THEN
449 info = -2
450 END IF
451
452 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) ) THEN
453 info = -3
454 END IF
455
456 IF( llda.LT.( 2*bwl+2*bwu+1 ) ) THEN
457 info = -( 6*100+6 )
458 END IF
459
460 IF( nb.LE.0 ) THEN
461 info = -( 6*100+4 )
462 END IF
463
464 bw = bwu + bwl
465
466
467
468 IF( nprow.NE.1 ) THEN
469 info = -( 6*100+2 )
470 END IF
471
472 IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
473 info = -( 1 )
474 CALL pxerbla( ictxt,
'PDGBTRF, D&C alg.: only 1 block per proc'
475 $ , -info )
476 RETURN
477 END IF
478
479 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.( bwl+bwu+1 ) ) ) THEN
480 info = -( 6*100+4 )
481 CALL pxerbla( ictxt,
'PDGBTRF, D&C alg.: NB too small', -info )
482 RETURN
483 END IF
484
485
486
487
488 laf_min = ( nb+bwu )*( bwl+bwu ) + 6*( bwl+bwu )*( bwl+2*bwu )
489
490 IF( laf.LT.laf_min ) THEN
491 info = -9
492
493 af( 1 ) = laf_min
494 CALL pxerbla( ictxt,
'PDGBTRF: auxiliary storage error ',
495 $ -info )
496 RETURN
497 END IF
498
499
500
501 work_size_min = 1
502
503 work( 1 ) = work_size_min
504
505 IF( lwork.LT.work_size_min ) THEN
506 IF( lwork.NE.-1 ) THEN
507 info = -11
508
509 work( 1 ) = work_size_min
510 CALL pxerbla( ictxt,
'PDGBTRF: worksize error ', -info )
511 END IF
512 RETURN
513 END IF
514
515
516
517 param_check( 9, 1 ) = desca( 5 )
518 param_check( 8, 1 ) = desca( 4 )
519 param_check( 7, 1 ) = desca( 3 )
520 param_check( 6, 1 ) = desca( 1 )
521 param_check( 5, 1 ) = ja
522 param_check( 4, 1 ) = bwu
523 param_check( 3, 1 ) = bwl
524 param_check( 2, 1 ) = n
525 param_check( 1, 1 ) = idum3
526
527 param_check( 9, 2 ) = 605
528 param_check( 8, 2 ) = 604
529 param_check( 7, 2 ) = 603
530 param_check( 6, 2 ) = 601
531 param_check( 5, 2 ) = 5
532 param_check( 4, 2 ) = 3
533 param_check( 3, 2 ) = 2
534 param_check( 2, 2 ) = 1
535 param_check( 1, 2 ) = 11
536
537
538
539
540
541 IF( info.GE.0 ) THEN
542 info = bignum
543 ELSE IF( info.LT.-descmult ) THEN
544 info = -info
545 ELSE
546 info = -info*descmult
547 END IF
548
549
550
551 CALL globchk( ictxt, 9, param_check, 9, param_check( 1, 3 ),
552 $ info )
553
554
555
556
557 IF( info.EQ.bignum ) THEN
558 info = 0
559 ELSE IF( mod( info, descmult ).EQ.0 ) THEN
560 info = -info / descmult
561 ELSE
562 info = -info
563 END IF
564
565 IF( info.LT.0 ) THEN
566 CALL pxerbla( ictxt,
'PDGBTRF', -info )
567 RETURN
568 END IF
569
570
571
572 IF( n.EQ.0 )
573 $ RETURN
574
575
576
577
578
579 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
580
581 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
582 part_offset = part_offset + nb
583 END IF
584
585 IF( mycol.LT.csrc ) THEN
586 part_offset = part_offset - nb
587 END IF
588
589
590
591
592
593
594
595 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
596
597
598
599 ja_new = mod( ja-1, nb ) + 1
600
601
602
603 np_save = np
604 np = ( ja_new+n-2 ) / nb + 1
605
606
607
608 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
609 $ int_one, np )
610
611
612
613 ictxt_save = ictxt
614 ictxt = ictxt_new
615 desca_1xp( 2 ) = ictxt_new
616
617
618
619 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
620
621
622
623 IF( myrow.LT.0 ) THEN
624 GO TO 210
625 END IF
626
627
628
629
630
631
632 part_size = nb
633
634
635
636 my_num_cols =
numroc( n, part_size, mycol, 0, npcol )
637
638
639
640 IF( mycol.EQ.0 ) THEN
641 part_offset = part_offset + mod( ja_new-1, part_size )
642 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
643 END IF
644
645
646
647 ofst = part_offset*llda
648
649
650
651 odd_size =
numroc( n, part_size, mycol, 0, npcol )
652
653
654
655
656 DO 10 i = 1, laf_min
657 af( i ) = zero
658 10 CONTINUE
659
660 DO 30 j = 1, odd_size
661 DO 20 i = 1, bw
662 a( i+( j-1 )*llda ) = zero
663 20 CONTINUE
664 30 CONTINUE
665
666
667
668
669
670
671
672
673
674
675
676 IF( mycol.LE.npcol-2 ) THEN
677
678
679
680 biptr = ( nb-bw )*llda + 2*bw + 1
681
682 CALL dtrsd2d( ictxt, 'U', 'N',
683 $
min( bw, bwu+
numroc( n, nb, mycol+1, 0,
684 $ npcol ) ), bw, a( biptr ), llda-1, 0, mycol+1 )
685
686 END IF
687
688
689
690
691
692
693
694
695
696
697
698
699 IF( mycol.NE.0 ) THEN
700 lbwl = bw
701 lbwu = 0
702 aptr = 1
703 ELSE
704 lbwl = bwl
705 lbwu = bwu
706 aptr = 1 + bwu
707 END IF
708
709 IF( mycol.NE.npcol-1 ) THEN
710 lm = nb - lbwu
711 ln = nb - bw
712 ELSE IF( mycol.NE.0 ) THEN
713 lm = odd_size + bwu
714 ln =
max( odd_size-bw, 0 )
715 ELSE
716 lm = n
718 END IF
719
720 IF( ln.GT.0 ) THEN
721
722 CALL dgbtrf( lm, ln, lbwl, lbwu, a( aptr ), llda, ipiv, info )
723
724 IF( info.NE.0 ) THEN
725 info = info + nb*mycol
726 GO TO 80
727 END IF
728
729 nrhs = bw
730 ldb = llda - 1
731
732
733
734
735
736
737 DO 40 j =
max( ln-bw+1, 1 ), ln
738
739 lmj =
min( lbwl, lm-j )
740 lnj =
min( bw, j+bw-ln+aptr-1 )
741
742 l = ipiv( j )
743
744 jptr = j - ( ln+1 ) + 2*bw + 1 - lbwl + ln*llda
745
746 IF( l.NE.j ) THEN
747
748
749
750
751
752
753 lptr = l - ( ln+1 ) + 2*bw + 1 - lbwl + ln*llda
754
755 CALL dswap( lnj, a( lptr ), ldb, a( jptr ), ldb )
756
757 END IF
758
759
760
761
762 lptr = bw + 1 + aptr + ( j-1 )*llda
763
764 CALL dger( lmj, lnj, -one, a( lptr ), 1, a( jptr ), ldb,
765 $ a( jptr+1 ), ldb )
766 40 CONTINUE
767
768 END IF
769
770
771
772
773
774 IF( mycol.GT.0 ) THEN
775 CALL dtrrv2d( ictxt,
'U',
'N',
min( bw, lm ), bw, af( 1 ), bw,
776 $ 0, mycol-1 )
777
778
779
780 DO 60 i2 = 1,
min( bw, lm )
781 DO 50 i1 = i2 + 1, bw
782 af( i1+( i2-1 )*bw ) = af( i2+( i1-1 )*bw )
783 af( i2+( i1-1 )*bw ) = zero
784 50 CONTINUE
785 60 CONTINUE
786
787
788
789 DO 70 j = 1, ln
790
791 lmj =
min( lbwl, lm-j )
792 l = ipiv( j )
793
794 IF( l.NE.j ) THEN
795 CALL dswap( bw, af( ( l-1 )*bw+1 ), 1,
796 $ af( ( j-1 )*bw+1 ), 1 )
797 END IF
798
799 lptr = bw + 1 + aptr + ( j-1 )*llda
800
801 CALL dger( nrhs, lmj, -one, af( ( j-1 )*bw+1 ), 1,
802 $ a( lptr ), 1, af( j*bw+1 ), bw )
803
804 70 CONTINUE
805
806 END IF
807
808 80 CONTINUE
809
810
811
812
813
814
815
816
817 IF( mycol.NE.npcol-1 ) THEN
818 bm = bw - lbwu
819 bn = bw
820 ELSE
821 bm =
min( bw, odd_size ) + bwu
822 bn =
min( bw, odd_size )
823 END IF
824
825
826
827
828 bbptr = ( nb+bwu )*bw + 1
829 ldbb = 2*bw + bwu
830
831
832
833
834 dbptr = bw + 1 + lbwu + ln*llda
835
836 CALL dlamov( 'G', bm, bn, a( dbptr ), llda-1, af( bbptr+bw*ldbb ),
837 $ ldbb )
838
839
840
841 DO 100 j = 1, bm
842 DO 90 i = j + lbwl, bm - 1
843 af( bbptr+bw*ldbb+( j-1 )*ldbb+i ) = zero
844 90 CONTINUE
845 100 CONTINUE
846
847 IF( mycol.NE.0 ) THEN
848
849
850
851 odptr = ( lm-bm )*bw + 1
852 CALL dlatcpy(
'G', bw, bm, af( odptr ), bw,
853 $ af( bbptr+2*bw*ldbb ), ldbb )
854 END IF
855
856 IF( npcol.EQ.1 ) THEN
857
858
859
860 CALL dgetrf( n-ln, n-ln, af( bbptr+bw*ldbb ), ldbb,
861 $ ipiv( ln+1 ), info )
862
863 END IF
864
865
866
867
868
869
870
871 npact = npcol
872 npstr = 1
873
874
875
876 110 CONTINUE
877 IF( npact.LE.1 )
878 $ GO TO 190
879
880
881
882 IF( mod( mycol, npstr ).EQ.0 ) THEN
883
884
885
886
887 IF( mod( mycol, 2*npstr ).EQ.0 ) THEN
888
889
890
891 neicol = mycol + npstr
892
893 IF( neicol / npstr.LT.npact-1 ) THEN
894 bmn = bw
895 ELSE IF( neicol / npstr.EQ.npact-1 ) THEN
896 odd_n =
numroc( n, nb, npcol-1, 0, npcol )
897 bmn =
min( bw, odd_n ) + bwu
898 ELSE
899
900
901 GO TO 180
902 END IF
903
904
905
906 bm1 = bm
907 bm2 = bmn
908
909 IF( neicol / npstr.LE.npact-1 ) THEN
910
911 CALL dgesd2d( ictxt, bm, 2*bw, af( bbptr+bw*ldbb ), ldbb,
912 $ 0, neicol )
913
914 CALL dgerv2d( ictxt, bmn, 2*bw, af( bbptr+bm ), ldbb, 0,
915 $ neicol )
916
917 IF( npact.EQ.2 ) THEN
918
919
920
921 CALL dlamov( 'G', bmn, bw, af( bbptr+bm ), ldbb,
922 $ af( bbptr+2*bw*ldbb+bm ), ldbb )
923 END IF
924
925 END IF
926
927 ELSE
928
929
930
931
932
933 neicol = mycol - npstr
934
935 IF( neicol.EQ.0 ) THEN
936 bmn = bw - bwu
937 ELSE
938 bmn = bw
939 END IF
940
941 bm1 = bmn
942 bm2 = bm
943
944 CALL dgesd2d( ictxt, bm, 2*bw, af( bbptr+bw*ldbb ), ldbb, 0,
945 $ neicol )
946
947 CALL dlamov( 'G', bm, 2*bw, af( bbptr+bw*ldbb ), ldbb,
948 $ af( bbptr+bmn ), ldbb )
949
950 DO 130 j = bbptr + 2*bw*ldbb, bbptr + 3*bw*ldbb - 1, ldbb
951 DO 120 i = 0, ldbb - 1
952 af( i+j ) = zero
953 120 CONTINUE
954 130 CONTINUE
955
956 CALL dgerv2d( ictxt, bmn, 2*bw, af( bbptr+bw*ldbb ), ldbb,
957 $ 0, neicol )
958
959 IF( npact.EQ.2 ) THEN
960
961
962
963 CALL dlamov( 'G', bm, bw, af( bbptr+bmn ), ldbb,
964 $ af( bbptr+2*bw*ldbb+bmn ), ldbb )
965 END IF
966
967 END IF
968
969
970
971 IF( npact.NE.2 ) THEN
972
973 CALL dgetrf( bm+bmn, bw, af( bbptr+bw*ldbb ), ldbb,
974 $ ipiv( ln+1 ), info )
975
976
977
978 DO 150 j = bbptr, bbptr + bw*ldbb - 1, ldbb
979 DO 140 i = 0, bm1 - 1
980 af( i+j ) = zero
981 140 CONTINUE
982 150 CONTINUE
983
984 CALL dlaswp( bw, af( bbptr ), ldbb, 1, bw, ipiv( ln+1 ), 1 )
985
986 CALL dtrsm( 'L', 'L', 'N', 'U', bw, bw, one,
987 $ af( bbptr+bw*ldbb ), ldbb, af( bbptr ), ldbb )
988
989
990
991 CALL dgemm( 'N', 'N', bm+bmn-bw, bw, bw, -one,
992 $ af( bbptr+bw*ldbb+bw ), ldbb, af( bbptr ), ldbb,
993 $ one, af( bbptr+bw ), ldbb )
994
995
996
997 nrhs = bw
998
999 CALL dlaswp( nrhs, af( bbptr+2*bw*ldbb ), ldbb, 1, bw,
1000 $ ipiv( ln+1 ), 1 )
1001
1002 CALL dtrsm( 'L', 'L', 'N', 'U', bw, nrhs, one,
1003 $ af( bbptr+bw*ldbb ), ldbb,
1004 $ af( bbptr+2*bw*ldbb ), ldbb )
1005
1006
1007
1008 CALL dgemm( 'N', 'N', bm+bmn-bw, nrhs, bw, -one,
1009 $ af( bbptr+bw*ldbb+bw ), ldbb,
1010 $ af( bbptr+2*bw*ldbb ), ldbb, one,
1011 $ af( bbptr+2*bw*ldbb+bw ), ldbb )
1012
1013
1014
1015
1016 IF( mod( mycol, 2*npstr ).EQ.0 ) THEN
1017
1018
1019
1020 bm = bm1 + bm2 - bw
1021
1022
1023
1024
1025 CALL dlamov( 'G', bm, bw, af( bbptr+bw ), ldbb,
1026 $ af( bbptr+bw*ldbb ), ldbb )
1027 CALL dlamov( 'G', bm, bw, af( bbptr+2*bw*ldbb+bw ), ldbb,
1028 $ af( bbptr+2*bw*ldbb ), ldbb )
1029
1030
1031
1032 DO 170 j = 0, bw - 1
1033 DO 160 i = 0, bm - 1
1034 af( bbptr+2*bw*ldbb+bw+j*ldbb+i ) = zero
1035 160 CONTINUE
1036 170 CONTINUE
1037
1038 END IF
1039
1040 ELSE
1041
1042
1043
1044 CALL dgetrf( bm+bmn, bm+bmn, af( bbptr+bw*ldbb ), ldbb,
1045 $ ipiv( ln+1 ), info )
1046 END IF
1047
1048 END IF
1049
1050
1051
1052 180 CONTINUE
1053
1054 npact = ( npact+1 ) / 2
1055 npstr = npstr*2
1056 GO TO 110
1057
1058 190 CONTINUE
1059
1060
1061 200 CONTINUE
1062
1063
1064
1065
1066 ictxt = ictxt_save
1067 IF( ictxt.NE.ictxt_new ) THEN
1068 CALL blacs_gridexit( ictxt_new )
1069 END IF
1070
1071 210 CONTINUE
1072
1073
1074
1075
1076
1077 ictxt = ictxt_save
1078 np = np_save
1079
1080
1081
1082 work( 1 ) = work_size_min
1083
1084
1085
1086 CALL igamx2d( ictxt, 'A', ' ', 1, 1, info, 1, info, info, -1, 0,
1087 $ 0 )
1088
1089 IF( mycol.EQ.0 ) THEN
1090 CALL igebs2d( ictxt, 'A', ' ', 1, 1, info, 1 )
1091 ELSE
1092 CALL igebr2d( ictxt, 'A', ' ', 1, 1, info, 1, 0, 0 )
1093 END IF
1094
1095
1096 RETURN
1097
1098
1099
subroutine desc_convert(desc_in, desc_out, info)
subroutine dlatcpy(uplo, m, n, a, lda, b, ldb)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine globchk(ictxt, n, x, ldx, iwork, info)
subroutine pxerbla(ictxt, srname, info)
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)