SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ pddttrsv()

subroutine pddttrsv ( character  uplo,
character  trans,
integer  n,
integer  nrhs,
double precision, dimension( * )  dl,
double precision, dimension( * )  d,
double precision, dimension( * )  du,
integer  ja,
integer, dimension( * )  desca,
double precision, dimension( * )  b,
integer  ib,
integer, dimension( * )  descb,
double precision, dimension( * )  af,
integer  laf,
double precision, dimension( * )  work,
integer  lwork,
integer  info 
)

Definition at line 1 of file pddttrsv.f.

3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* April 3, 2000
8*
9* .. Scalar Arguments ..
10 CHARACTER TRANS, UPLO
11 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * ), DESCB( * )
15 DOUBLE PRECISION AF( * ), B( * ), D( * ), DL( * ), DU( * ),
16 $ WORK( * )
17* ..
18*
19*
20* Purpose
21* =======
22*
23* PDDTTRSV solves a tridiagonal triangular system of linear equations
24*
25* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
26* or
27* A(1:N, JA:JA+N-1)^T * X = B(IB:IB+N-1, 1:NRHS)
28*
29* where A(1:N, JA:JA+N-1) is a tridiagonal
30* triangular matrix factor produced by the
31* Gaussian elimination code PD@(dom_pre)TTRF
32* and is stored in A(1:N,JA:JA+N-1) and AF.
33* The matrix stored in A(1:N, JA:JA+N-1) is either
34* upper or lower triangular according to UPLO,
35* and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^T
36* is dictated by the user by the parameter TRANS.
37*
38* Routine PDDTTRF MUST be called first.
39*
40* =====================================================================
41*
42* Arguments
43* =========
44*
45* UPLO (global input) CHARACTER
46* = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
47* = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
48*
49* TRANS (global input) CHARACTER
50* = 'N': Solve with A(1:N, JA:JA+N-1);
51* = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T;
52*
53* N (global input) INTEGER
54* The number of rows and columns to be operated on, i.e. the
55* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
56*
57* NRHS (global input) INTEGER
58* The number of right hand sides, i.e., the number of columns
59* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
60* NRHS >= 0.
61*
62* DL (local input/local output) DOUBLE PRECISION pointer to local
63* part of global vector storing the lower diagonal of the
64* matrix. Globally, DL(1) is not referenced, and DL must be
65* aligned with D.
66* Must be of size >= DESCA( NB_ ).
67* On exit, this array contains information containing the
68* factors of the matrix.
69*
70* D (local input/local output) DOUBLE PRECISION pointer to local
71* part of global vector storing the main diagonal of the
72* matrix.
73* On exit, this array contains information containing the
74* factors of the matrix.
75* Must be of size >= DESCA( NB_ ).
76*
77* DU (local input/local output) DOUBLE PRECISION pointer to local
78* part of global vector storing the upper diagonal of the
79* matrix. Globally, DU(n) is not referenced, and DU must be
80* aligned with D.
81* On exit, this array contains information containing the
82* factors of the matrix.
83* Must be of size >= DESCA( NB_ ).
84*
85* JA (global input) INTEGER
86* The index in the global array A that points to the start of
87* the matrix to be operated on (which may be either all of A
88* or a submatrix of A).
89*
90* DESCA (global and local input) INTEGER array of dimension DLEN.
91* if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
92* if 2D type (DTYPE_A=1), DLEN >= 9.
93* The array descriptor for the distributed matrix A.
94* Contains information of mapping of A to memory. Please
95* see NOTES below for full description and options.
96*
97* B (local input/local output) DOUBLE PRECISION pointer into
98* local memory to an array of local lead dimension lld_b>=NB.
99* On entry, this array contains the
100* the local pieces of the right hand sides
101* B(IB:IB+N-1, 1:NRHS).
102* On exit, this contains the local piece of the solutions
103* distributed matrix X.
104*
105* IB (global input) INTEGER
106* The row index in the global array B that points to the first
107* row of the matrix to be operated on (which may be either
108* all of B or a submatrix of B).
109*
110* DESCB (global and local input) INTEGER array of dimension DLEN.
111* if 1D type (DTYPE_B=502), DLEN >=7;
112* if 2D type (DTYPE_B=1), DLEN >= 9.
113* The array descriptor for the distributed matrix B.
114* Contains information of mapping of B to memory. Please
115* see NOTES below for full description and options.
116*
117* AF (local output) DOUBLE PRECISION array, dimension LAF.
118* Auxiliary Fillin Space.
119* Fillin is created during the factorization routine
120* PDDTTRF and this is stored in AF. If a linear system
121* is to be solved using PDDTTRS after the factorization
122* routine, AF *must not be altered* after the factorization.
123*
124* LAF (local input) INTEGER
125* Size of user-input Auxiliary Fillin space AF. Must be >=
126* 2*(NB+2)
127* If LAF is not large enough, an error code will be returned
128* and the minimum acceptable size will be returned in AF( 1 )
129*
130* WORK (local workspace/local output)
131* DOUBLE PRECISION temporary workspace. This space may
132* be overwritten in between calls to routines. WORK must be
133* the size given in LWORK.
134* On exit, WORK( 1 ) contains the minimal LWORK.
135*
136* LWORK (local input or global input) INTEGER
137* Size of user-input workspace WORK.
138* If LWORK is too small, the minimal acceptable size will be
139* returned in WORK(1) and an error code is returned. LWORK>=
140* 10*NPCOL+4*NRHS
141*
142* INFO (local output) INTEGER
143* = 0: successful exit
144* < 0: If the i-th argument is an array and the j-entry had
145* an illegal value, then INFO = -(i*100+j), if the i-th
146* argument is a scalar and had an illegal value, then
147* INFO = -i.
148*
149* =====================================================================
150*
151*
152* Restrictions
153* ============
154*
155* The following are restrictions on the input parameters. Some of these
156* are temporary and will be removed in future releases, while others
157* may reflect fundamental technical limitations.
158*
159* Non-cyclic restriction: VERY IMPORTANT!
160* P*NB>= mod(JA-1,NB)+N.
161* The mapping for matrices must be blocked, reflecting the nature
162* of the divide and conquer algorithm as a task-parallel algorithm.
163* This formula in words is: no processor may have more than one
164* chunk of the matrix.
165*
166* Blocksize cannot be too small:
167* If the matrix spans more than one processor, the following
168* restriction on NB, the size of each block on each processor,
169* must hold:
170* NB >= 2
171* The bulk of parallel computation is done on the matrix of size
172* O(NB) on each processor. If this is too small, divide and conquer
173* is a poor choice of algorithm.
174*
175* Submatrix reference:
176* JA = IB
177* Alignment restriction that prevents unnecessary communication.
178*
179*
180* =====================================================================
181*
182*
183* Notes
184* =====
185*
186* If the factorization routine and the solve routine are to be called
187* separately (to solve various sets of righthand sides using the same
188* coefficient matrix), the auxiliary space AF *must not be altered*
189* between calls to the factorization routine and the solve routine.
190*
191* The best algorithm for solving banded and tridiagonal linear systems
192* depends on a variety of parameters, especially the bandwidth.
193* Currently, only algorithms designed for the case N/P >> bw are
194* implemented. These go by many names, including Divide and Conquer,
195* Partitioning, domain decomposition-type, etc.
196* For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
197* algorithms are the appropriate choice.
198*
199* Algorithm description: Divide and Conquer
200*
201* The Divide and Conqer algorithm assumes the matrix is narrowly
202* banded compared with the number of equations. In this situation,
203* it is best to distribute the input matrix A one-dimensionally,
204* with columns atomic and rows divided amongst the processes.
205* The basic algorithm divides the tridiagonal matrix up into
206* P pieces with one stored on each processor,
207* and then proceeds in 2 phases for the factorization or 3 for the
208* solution of a linear system.
209* 1) Local Phase:
210* The individual pieces are factored independently and in
211* parallel. These factors are applied to the matrix creating
212* fillin, which is stored in a non-inspectable way in auxiliary
213* space AF. Mathematically, this is equivalent to reordering
214* the matrix A as P A P^T and then factoring the principal
215* leading submatrix of size equal to the sum of the sizes of
216* the matrices factored on each processor. The factors of
217* these submatrices overwrite the corresponding parts of A
218* in memory.
219* 2) Reduced System Phase:
220* A small ((P-1)) system is formed representing
221* interaction of the larger blocks, and is stored (as are its
222* factors) in the space AF. A parallel Block Cyclic Reduction
223* algorithm is used. For a linear system, a parallel front solve
224* followed by an analagous backsolve, both using the structure
225* of the factored matrix, are performed.
226* 3) Backsubsitution Phase:
227* For a linear system, a local backsubstitution is performed on
228* each processor in parallel.
229*
230*
231* Descriptors
232* ===========
233*
234* Descriptors now have *types* and differ from ScaLAPACK 1.0.
235*
236* Note: tridiagonal codes can use either the old two dimensional
237* or new one-dimensional descriptors, though the processor grid in
238* both cases *must be one-dimensional*. We describe both types below.
239*
240* Each global data object is described by an associated description
241* vector. This vector stores the information required to establish
242* the mapping between an object element and its corresponding process
243* and memory location.
244*
245* Let A be a generic term for any 2D block cyclicly distributed array.
246* Such a global array has an associated description vector DESCA.
247* In the following comments, the character _ should be read as
248* "of the global array".
249*
250* NOTATION STORED IN EXPLANATION
251* --------------- -------------- --------------------------------------
252* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
253* DTYPE_A = 1.
254* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
255* the BLACS process grid A is distribu-
256* ted over. The context itself is glo-
257* bal, but the handle (the integer
258* value) may vary.
259* M_A (global) DESCA( M_ ) The number of rows in the global
260* array A.
261* N_A (global) DESCA( N_ ) The number of columns in the global
262* array A.
263* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
264* the rows of the array.
265* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
266* the columns of the array.
267* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
268* row of the array A is distributed.
269* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
270* first column of the array A is
271* distributed.
272* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
273* array. LLD_A >= MAX(1,LOCr(M_A)).
274*
275* Let K be the number of rows or columns of a distributed matrix,
276* and assume that its process grid has dimension p x q.
277* LOCr( K ) denotes the number of elements of K that a process
278* would receive if K were distributed over the p processes of its
279* process column.
280* Similarly, LOCc( K ) denotes the number of elements of K that a
281* process would receive if K were distributed over the q processes of
282* its process row.
283* The values of LOCr() and LOCc() may be determined via a call to the
284* ScaLAPACK tool function, NUMROC:
285* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
286* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
287* An upper bound for these quantities may be computed by:
288* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
289* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
290*
291*
292* One-dimensional descriptors:
293*
294* One-dimensional descriptors are a new addition to ScaLAPACK since
295* version 1.0. They simplify and shorten the descriptor for 1D
296* arrays.
297*
298* Since ScaLAPACK supports two-dimensional arrays as the fundamental
299* object, we allow 1D arrays to be distributed either over the
300* first dimension of the array (as if the grid were P-by-1) or the
301* 2nd dimension (as if the grid were 1-by-P). This choice is
302* indicated by the descriptor type (501 or 502)
303* as described below.
304* However, for tridiagonal matrices, since the objects being
305* distributed are the individual vectors storing the diagonals, we
306* have adopted the convention that both the P-by-1 descriptor and
307* the 1-by-P descriptor are allowed and are equivalent for
308* tridiagonal matrices. Thus, for tridiagonal matrices,
309* DTYPE_A = 501 or 502 can be used interchangeably
310* without any other change.
311* We require that the distributed vectors storing the diagonals of a
312* tridiagonal matrix be aligned with each other. Because of this, a
313* single descriptor, DESCA, serves to describe the distribution of
314* of all diagonals simultaneously.
315*
316* IMPORTANT NOTE: the actual BLACS grid represented by the
317* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
318* irrespective of which one-dimensional descriptor type
319* (501 or 502) is input.
320* This routine will interpret the grid properly either way.
321* ScaLAPACK routines *do not support intercontext operations* so that
322* the grid passed to a single ScaLAPACK routine *must be the same*
323* for all array descriptors passed to that routine.
324*
325* NOTE: In all cases where 1D descriptors are used, 2D descriptors
326* may also be used, since a one-dimensional array is a special case
327* of a two-dimensional array with one dimension of size unity.
328* The two-dimensional array used in this case *must* be of the
329* proper orientation:
330* If the appropriate one-dimensional descriptor is DTYPEA=501
331* (1 by P type), then the two dimensional descriptor must
332* have a CTXT value that refers to a 1 by P BLACS grid;
333* If the appropriate one-dimensional descriptor is DTYPEA=502
334* (P by 1 type), then the two dimensional descriptor must
335* have a CTXT value that refers to a P by 1 BLACS grid.
336*
337*
338* Summary of allowed descriptors, types, and BLACS grids:
339* DTYPE 501 502 1 1
340* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
341* -----------------------------------------------------
342* A OK OK OK NO
343* B NO OK NO OK
344*
345* Note that a consequence of this chart is that it is not possible
346* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
347* to opposite requirements for the orientation of the BLACS grid,
348* and as noted before, the *same* BLACS context must be used in
349* all descriptors in a single ScaLAPACK subroutine call.
350*
351* Let A be a generic term for any 1D block cyclicly distributed array.
352* Such a global array has an associated description vector DESCA.
353* In the following comments, the character _ should be read as
354* "of the global array".
355*
356* NOTATION STORED IN EXPLANATION
357* --------------- ---------- ------------------------------------------
358* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
359* TYPE_A = 501: 1-by-P grid.
360* TYPE_A = 502: P-by-1 grid.
361* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
362* the BLACS process grid A is distribu-
363* ted over. The context itself is glo-
364* bal, but the handle (the integer
365* value) may vary.
366* N_A (global) DESCA( 3 ) The size of the array dimension being
367* distributed.
368* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
369* the distributed dimension of the array.
370* SRC_A (global) DESCA( 5 ) The process row or column over which the
371* first row or column of the array
372* is distributed.
373* Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
374* Reserved DESCA( 7 ) Reserved for future use.
375*
376*
377*
378* =====================================================================
379*
380* Code Developer: Andrew J. Cleary, University of Tennessee.
381* Current address: Lawrence Livermore National Labs.
382*
383* =====================================================================
384*
385* .. Parameters ..
386 DOUBLE PRECISION ONE
387 parameter( one = 1.0d+0 )
388 DOUBLE PRECISION ZERO
389 parameter( zero = 0.0d+0 )
390 INTEGER INT_ONE
391 parameter( int_one = 1 )
392 INTEGER DESCMULT, BIGNUM
393 parameter( descmult = 100, bignum = descmult*descmult )
394 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
395 $ LLD_, MB_, M_, NB_, N_, RSRC_
396 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
397 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
398 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
399* ..
400* .. Local Scalars ..
401 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
402 $ IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA,
403 $ LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL,
404 $ NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET,
405 $ PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A,
406 $ TEMP, WORK_SIZE_MIN, WORK_U
407* ..
408* .. Local Arrays ..
409 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
410 $ PARAM_CHECK( 16, 3 )
411* ..
412* .. External Subroutines ..
413 EXTERNAL blacs_gridexit, blacs_gridinfo, daxpy, ddttrsv,
414 $ desc_convert, dgemm, dgerv2d, dgesd2d, dmatadd,
415 $ dtbtrs, globchk, pxerbla, reshape
416* ..
417* .. External Functions ..
418 LOGICAL LSAME
419 INTEGER NUMROC
420 EXTERNAL lsame, numroc
421* ..
422* .. Intrinsic Functions ..
423 INTRINSIC ichar, min, mod
424* ..
425* .. Executable Statements ..
426*
427* Test the input parameters
428*
429 info = 0
430*
431* Convert descriptor into standard form for easy access to
432* parameters, check that grid is of right shape.
433*
434 desca_1xp( 1 ) = 501
435 descb_px1( 1 ) = 502
436*
437 temp = desca( dtype_ )
438 IF( temp.EQ.502 ) THEN
439* Temporarily set the descriptor type to 1xP type
440 desca( dtype_ ) = 501
441 END IF
442*
443 CALL desc_convert( desca, desca_1xp, return_code )
444*
445 desca( dtype_ ) = temp
446*
447 IF( return_code.NE.0 ) THEN
448 info = -( 9*100+2 )
449 END IF
450*
451 CALL desc_convert( descb, descb_px1, return_code )
452*
453 IF( return_code.NE.0 ) THEN
454 info = -( 12*100+2 )
455 END IF
456*
457* Consistency checks for DESCA and DESCB.
458*
459* Context must be the same
460 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
461 info = -( 12*100+2 )
462 END IF
463*
464* These are alignment restrictions that may or may not be removed
465* in future releases. -Andy Cleary, April 14, 1996.
466*
467* Block sizes must be the same
468 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
469 info = -( 12*100+4 )
470 END IF
471*
472* Source processor must be the same
473*
474 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
475 info = -( 12*100+5 )
476 END IF
477*
478* Get values out of descriptor for use in code.
479*
480 ictxt = desca_1xp( 2 )
481 csrc = desca_1xp( 5 )
482 nb = desca_1xp( 4 )
483 llda = desca_1xp( 6 )
484 store_n_a = desca_1xp( 3 )
485 lldb = descb_px1( 6 )
486 store_m_b = descb_px1( 3 )
487*
488* Get grid parameters
489*
490*
491 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
492 np = nprow*npcol
493*
494*
495*
496 IF( lsame( uplo, 'U' ) ) THEN
497 idum1 = ichar( 'U' )
498 ELSE IF( lsame( uplo, 'L' ) ) THEN
499 idum1 = ichar( 'L' )
500 ELSE
501 info = -1
502 END IF
503*
504 IF( lsame( trans, 'N' ) ) THEN
505 idum2 = ichar( 'N' )
506 ELSE IF( lsame( trans, 'T' ) ) THEN
507 idum2 = ichar( 'T' )
508 ELSE IF( lsame( trans, 'C' ) ) THEN
509 idum2 = ichar( 'T' )
510 ELSE
511 info = -2
512 END IF
513*
514 IF( lwork.LT.-1 ) THEN
515 info = -16
516 ELSE IF( lwork.EQ.-1 ) THEN
517 idum3 = -1
518 ELSE
519 idum3 = 1
520 END IF
521*
522 IF( n.LT.0 ) THEN
523 info = -3
524 END IF
525*
526 IF( n+ja-1.GT.store_n_a ) THEN
527 info = -( 9*100+6 )
528 END IF
529*
530 IF( n+ib-1.GT.store_m_b ) THEN
531 info = -( 12*100+3 )
532 END IF
533*
534 IF( lldb.LT.nb ) THEN
535 info = -( 12*100+6 )
536 END IF
537*
538 IF( nrhs.LT.0 ) THEN
539 info = -4
540 END IF
541*
542* Current alignment restriction
543*
544 IF( ja.NE.ib ) THEN
545 info = -8
546 END IF
547*
548* Argument checking that is specific to Divide & Conquer routine
549*
550 IF( nprow.NE.1 ) THEN
551 info = -( 9*100+2 )
552 END IF
553*
554 IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
555 info = -( 3 )
556 CALL pxerbla( ictxt,
557 $ 'PDDTTRSV, D&C alg.: only 1 block per proc',
558 $ -info )
559 RETURN
560 END IF
561*
562 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*int_one ) ) THEN
563 info = -( 9*100+4 )
564 CALL pxerbla( ictxt, 'PDDTTRSV, D&C alg.: NB too small',
565 $ -info )
566 RETURN
567 END IF
568*
569*
570 work_size_min = int_one*nrhs
571*
572 work( 1 ) = work_size_min
573*
574 IF( lwork.LT.work_size_min ) THEN
575 IF( lwork.NE.-1 ) THEN
576 info = -16
577 CALL pxerbla( ictxt, 'PDDTTRSV: worksize error', -info )
578 END IF
579 RETURN
580 END IF
581*
582* Pack params and positions into arrays for global consistency check
583*
584 param_check( 16, 1 ) = descb( 5 )
585 param_check( 15, 1 ) = descb( 4 )
586 param_check( 14, 1 ) = descb( 3 )
587 param_check( 13, 1 ) = descb( 2 )
588 param_check( 12, 1 ) = descb( 1 )
589 param_check( 11, 1 ) = ib
590 param_check( 10, 1 ) = desca( 5 )
591 param_check( 9, 1 ) = desca( 4 )
592 param_check( 8, 1 ) = desca( 3 )
593 param_check( 7, 1 ) = desca( 1 )
594 param_check( 6, 1 ) = ja
595 param_check( 5, 1 ) = nrhs
596 param_check( 4, 1 ) = n
597 param_check( 3, 1 ) = idum3
598 param_check( 2, 1 ) = idum2
599 param_check( 1, 1 ) = idum1
600*
601 param_check( 16, 2 ) = 1205
602 param_check( 15, 2 ) = 1204
603 param_check( 14, 2 ) = 1203
604 param_check( 13, 2 ) = 1202
605 param_check( 12, 2 ) = 1201
606 param_check( 11, 2 ) = 11
607 param_check( 10, 2 ) = 905
608 param_check( 9, 2 ) = 904
609 param_check( 8, 2 ) = 903
610 param_check( 7, 2 ) = 901
611 param_check( 6, 2 ) = 8
612 param_check( 5, 2 ) = 4
613 param_check( 4, 2 ) = 3
614 param_check( 3, 2 ) = 16
615 param_check( 2, 2 ) = 2
616 param_check( 1, 2 ) = 1
617*
618* Want to find errors with MIN( ), so if no error, set it to a big
619* number. If there already is an error, multiply by the the
620* descriptor multiplier.
621*
622 IF( info.GE.0 ) THEN
623 info = bignum
624 ELSE IF( info.LT.-descmult ) THEN
625 info = -info
626 ELSE
627 info = -info*descmult
628 END IF
629*
630* Check consistency across processors
631*
632 CALL globchk( ictxt, 16, param_check, 16, param_check( 1, 3 ),
633 $ info )
634*
635* Prepare output: set info = 0 if no error, and divide by DESCMULT
636* if error is not in a descriptor entry.
637*
638 IF( info.EQ.bignum ) THEN
639 info = 0
640 ELSE IF( mod( info, descmult ).EQ.0 ) THEN
641 info = -info / descmult
642 ELSE
643 info = -info
644 END IF
645*
646 IF( info.LT.0 ) THEN
647 CALL pxerbla( ictxt, 'PDDTTRSV', -info )
648 RETURN
649 END IF
650*
651* Quick return if possible
652*
653 IF( n.EQ.0 )
654 $ RETURN
655*
656 IF( nrhs.EQ.0 )
657 $ RETURN
658*
659*
660* Adjust addressing into matrix space to properly get into
661* the beginning part of the relevant data
662*
663 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
664*
665 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
666 part_offset = part_offset + nb
667 END IF
668*
669 IF( mycol.LT.csrc ) THEN
670 part_offset = part_offset - nb
671 END IF
672*
673* Form a new BLACS grid (the "standard form" grid) with only procs
674* holding part of the matrix, of size 1xNP where NP is adjusted,
675* starting at csrc=0, with JA modified to reflect dropped procs.
676*
677* First processor to hold part of the matrix:
678*
679 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
680*
681* Calculate new JA one while dropping off unused processors.
682*
683 ja_new = mod( ja-1, nb ) + 1
684*
685* Save and compute new value of NP
686*
687 np_save = np
688 np = ( ja_new+n-2 ) / nb + 1
689*
690* Call utility routine that forms "standard-form" grid
691*
692 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
693 $ int_one, np )
694*
695* Use new context from standard grid as context.
696*
697 ictxt_save = ictxt
698 ictxt = ictxt_new
699 desca_1xp( 2 ) = ictxt_new
700 descb_px1( 2 ) = ictxt_new
701*
702* Get information about new grid.
703*
704 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
705*
706* Drop out processors that do not have part of the matrix.
707*
708 IF( myrow.LT.0 ) THEN
709 GO TO 180
710 END IF
711*
712* ********************************
713* Values reused throughout routine
714*
715* User-input value of partition size
716*
717 part_size = nb
718*
719* Number of columns in each processor
720*
721 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
722*
723* Offset in columns to beginning of main partition in each proc
724*
725 IF( mycol.EQ.0 ) THEN
726 part_offset = part_offset + mod( ja_new-1, part_size )
727 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
728 END IF
729*
730* Size of main (or odd) partition in each processor
731*
732 odd_size = my_num_cols
733 IF( mycol.LT.np-1 ) THEN
734 odd_size = odd_size - int_one
735 END IF
736*
737* Offset to workspace for Upper triangular factor
738*
739 work_u = int_one*odd_size + 3
740*
741*
742*
743* Begin main code
744*
745 IF( lsame( uplo, 'L' ) ) THEN
746*
747 IF( lsame( trans, 'N' ) ) THEN
748*
749* Frontsolve
750*
751*
752******************************************
753* Local computation phase
754******************************************
755*
756* Use main partition in each processor to solve locally
757*
758 CALL ddttrsv( uplo, 'N', odd_size, nrhs,
759 $ dl( part_offset+2 ), d( part_offset+1 ),
760 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
761 $ info )
762*
763*
764 IF( mycol.LT.np-1 ) THEN
765* Use factorization of odd-even connection block to modify
766* locally stored portion of right hand side(s)
767*
768 CALL daxpy( nrhs, -dl( part_offset+odd_size+1 ),
769 $ b( part_offset+odd_size ), lldb,
770 $ b( part_offset+odd_size+1 ), lldb )
771*
772 END IF
773*
774*
775 IF( mycol.NE.0 ) THEN
776* Use the "spike" fillin to calculate contribution to previous
777* processor's righthand-side.
778*
779 CALL dgemm( 'T', 'N', int_one, nrhs, odd_size, -one,
780 $ af( 1 ), odd_size, b( part_offset+1 ), lldb,
781 $ zero, work( 1+int_one-int_one ), int_one )
782 END IF
783*
784*
785************************************************
786* Formation and solution of reduced system
787************************************************
788*
789*
790* Send modifications to prior processor's right hand sides
791*
792 IF( mycol.GT.0 ) THEN
793*
794 CALL dgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
795 $ 0, mycol-1 )
796*
797 END IF
798*
799* Receive modifications to processor's right hand sides
800*
801 IF( mycol.LT.npcol-1 ) THEN
802*
803 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
804 $ 0, mycol+1 )
805*
806* Combine contribution to locally stored right hand sides
807*
808 CALL dmatadd( int_one, nrhs, one, work( 1 ), int_one,
809 $ one, b( part_offset+odd_size+1 ), lldb )
810*
811 END IF
812*
813*
814* The last processor does not participate in the solution of the
815* reduced system, having sent its contribution already.
816 IF( mycol.EQ.npcol-1 ) THEN
817 GO TO 30
818 END IF
819*
820*
821* *************************************
822* Modification Loop
823*
824* The distance for sending and receiving for each level starts
825* at 1 for the first level.
826 level_dist = 1
827*
828* Do until this proc is needed to modify other procs' equations
829*
830 10 CONTINUE
831 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
832 $ GO TO 20
833*
834* Receive and add contribution to righthand sides from left
835*
836 IF( mycol-level_dist.GE.0 ) THEN
837*
838 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
839 $ 0, mycol-level_dist )
840*
841 CALL dmatadd( int_one, nrhs, one, work( 1 ), int_one,
842 $ one, b( part_offset+odd_size+1 ), lldb )
843*
844 END IF
845*
846* Receive and add contribution to righthand sides from right
847*
848 IF( mycol+level_dist.LT.npcol-1 ) THEN
849*
850 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
851 $ 0, mycol+level_dist )
852*
853 CALL dmatadd( int_one, nrhs, one, work( 1 ), int_one,
854 $ one, b( part_offset+odd_size+1 ), lldb )
855*
856 END IF
857*
858 level_dist = level_dist*2
859*
860 GO TO 10
861 20 CONTINUE
862* [End of GOTO Loop]
863*
864*
865*
866* *********************************
867* Calculate and use this proc's blocks to modify other procs
868*
869* Solve with diagonal block
870*
871 CALL dtbtrs( 'L', 'N', 'U', int_one,
872 $ min( int_one, int_one-1 ), nrhs,
873 $ af( odd_size+2 ), int_one+1,
874 $ b( part_offset+odd_size+1 ), lldb, info )
875*
876 IF( info.NE.0 ) THEN
877 GO TO 170
878 END IF
879*
880*
881*
882* *********
883 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 ) THEN
884*
885* Calculate contribution from this block to next diagonal block
886*
887 CALL dgemm( 'T', 'N', int_one, nrhs, int_one, -one,
888 $ af( ( odd_size )*int_one+1 ), int_one,
889 $ b( part_offset+odd_size+1 ), lldb, zero,
890 $ work( 1 ), int_one )
891*
892* Send contribution to diagonal block's owning processor.
893*
894 CALL dgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
895 $ 0, mycol+level_dist )
896*
897 END IF
898* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
899*
900* ************
901 IF( ( mycol / level_dist.GT.0 ) .AND.
902 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
903 $ THEN
904*
905*
906* Use offdiagonal block to calculate modification to diag block
907* of processor to the left
908*
909 CALL dgemm( 'N', 'N', int_one, nrhs, int_one, -one,
910 $ af( odd_size*int_one+2+1 ), int_one,
911 $ b( part_offset+odd_size+1 ), lldb, zero,
912 $ work( 1 ), int_one )
913*
914* Send contribution to diagonal block's owning processor.
915*
916 CALL dgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
917 $ 0, mycol-level_dist )
918*
919 END IF
920* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
921*
922 30 CONTINUE
923*
924 ELSE
925*
926******************** BACKSOLVE *************************************
927*
928********************************************************************
929* .. Begin reduced system phase of algorithm ..
930********************************************************************
931*
932*
933*
934* The last processor does not participate in the solution of the
935* reduced system and just waits to receive its solution.
936 IF( mycol.EQ.npcol-1 ) THEN
937 GO TO 80
938 END IF
939*
940* Determine number of steps in tree loop
941*
942 level_dist = 1
943 40 CONTINUE
944 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
945 $ GO TO 50
946*
947 level_dist = level_dist*2
948*
949 GO TO 40
950 50 CONTINUE
951*
952*
953 IF( ( mycol / level_dist.GT.0 ) .AND.
954 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
955 $ THEN
956*
957* Receive solution from processor to left
958*
959 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
960 $ 0, mycol-level_dist )
961*
962*
963* Use offdiagonal block to calculate modification to RHS stored
964* on this processor
965*
966 CALL dgemm( 'T', 'N', int_one, nrhs, int_one, -one,
967 $ af( odd_size*int_one+2+1 ), int_one,
968 $ work( 1 ), int_one, one,
969 $ b( part_offset+odd_size+1 ), lldb )
970 END IF
971* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
972*
973*
974 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 ) THEN
975*
976* Receive solution from processor to right
977*
978 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
979 $ 0, mycol+level_dist )
980*
981* Calculate contribution from this block to next diagonal block
982*
983 CALL dgemm( 'N', 'N', int_one, nrhs, int_one, -one,
984 $ af( ( odd_size )*int_one+1 ), int_one,
985 $ work( 1 ), int_one, one,
986 $ b( part_offset+odd_size+1 ), lldb )
987*
988 END IF
989* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
990*
991*
992* Solve with diagonal block
993*
994 CALL dtbtrs( 'L', 'T', 'U', int_one,
995 $ min( int_one, int_one-1 ), nrhs,
996 $ af( odd_size+2 ), int_one+1,
997 $ b( part_offset+odd_size+1 ), lldb, info )
998*
999 IF( info.NE.0 ) THEN
1000 GO TO 170
1001 END IF
1002*
1003*
1004*
1005***Modification Loop *******
1006*
1007 60 CONTINUE
1008 IF( level_dist.EQ.1 )
1009 $ GO TO 70
1010*
1011 level_dist = level_dist / 2
1012*
1013* Send solution to the right
1014*
1015 IF( mycol+level_dist.LT.npcol-1 ) THEN
1016*
1017 CALL dgesd2d( ictxt, int_one, nrhs,
1018 $ b( part_offset+odd_size+1 ), lldb, 0,
1019 $ mycol+level_dist )
1020*
1021 END IF
1022*
1023* Send solution to left
1024*
1025 IF( mycol-level_dist.GE.0 ) THEN
1026*
1027 CALL dgesd2d( ictxt, int_one, nrhs,
1028 $ b( part_offset+odd_size+1 ), lldb, 0,
1029 $ mycol-level_dist )
1030*
1031 END IF
1032*
1033 GO TO 60
1034 70 CONTINUE
1035* [End of GOTO Loop]
1036*
1037 80 CONTINUE
1038* [Processor npcol - 1 jumped to here to await next stage]
1039*
1040*******************************
1041* Reduced system has been solved, communicate solutions to nearest
1042* neighbors in preparation for local computation phase.
1043*
1044*
1045* Send elements of solution to next proc
1046*
1047 IF( mycol.LT.npcol-1 ) THEN
1048*
1049 CALL dgesd2d( ictxt, int_one, nrhs,
1050 $ b( part_offset+odd_size+1 ), lldb, 0,
1051 $ mycol+1 )
1052*
1053 END IF
1054*
1055* Receive modifications to processor's right hand sides
1056*
1057 IF( mycol.GT.0 ) THEN
1058*
1059 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1060 $ 0, mycol-1 )
1061*
1062 END IF
1063*
1064*
1065*
1066**********************************************
1067* Local computation phase
1068**********************************************
1069*
1070 IF( mycol.NE.0 ) THEN
1071* Use the "spike" fillin to calculate contribution from previous
1072* processor's solution.
1073*
1074 CALL dgemm( 'N', 'N', odd_size, nrhs, int_one, -one,
1075 $ af( 1 ), odd_size, work( 1+int_one-int_one ),
1076 $ int_one, one, b( part_offset+1 ), lldb )
1077*
1078 END IF
1079*
1080*
1081 IF( mycol.LT.np-1 ) THEN
1082* Use factorization of odd-even connection block to modify
1083* locally stored portion of right hand side(s)
1084*
1085 CALL daxpy( nrhs, -( dl( part_offset+odd_size+1 ) ),
1086 $ b( part_offset+odd_size+1 ), lldb,
1087 $ b( part_offset+odd_size ), lldb )
1088*
1089 END IF
1090*
1091* Use main partition in each processor to solve locally
1092*
1093 CALL ddttrsv( uplo, 'T', odd_size, nrhs,
1094 $ dl( part_offset+2 ), d( part_offset+1 ),
1095 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1096 $ info )
1097*
1098 END IF
1099* End of "IF( LSAME( TRANS, 'N' ) )"...
1100*
1101*
1102 ELSE
1103***************************************************************
1104* CASE UPLO = 'U' *
1105***************************************************************
1106 IF( lsame( trans, 'T' ) ) THEN
1107*
1108* Frontsolve
1109*
1110*
1111******************************************
1112* Local computation phase
1113******************************************
1114*
1115* Use main partition in each processor to solve locally
1116*
1117 CALL ddttrsv( uplo, 'T', odd_size, nrhs,
1118 $ dl( part_offset+2 ), d( part_offset+1 ),
1119 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1120 $ info )
1121*
1122*
1123 IF( mycol.LT.np-1 ) THEN
1124* Use factorization of odd-even connection block to modify
1125* locally stored portion of right hand side(s)
1126*
1127 CALL daxpy( nrhs, -( du( part_offset+odd_size ) ),
1128 $ b( part_offset+odd_size ), lldb,
1129 $ b( part_offset+odd_size+1 ), lldb )
1130*
1131 END IF
1132*
1133*
1134 IF( mycol.NE.0 ) THEN
1135* Use the "spike" fillin to calculate contribution to previous
1136* processor's righthand-side.
1137*
1138 CALL dgemm( 'T', 'N', int_one, nrhs, odd_size, -one,
1139 $ af( work_u+1 ), odd_size, b( part_offset+1 ),
1140 $ lldb, zero, work( 1+int_one-int_one ),
1141 $ int_one )
1142 END IF
1143*
1144*
1145************************************************
1146* Formation and solution of reduced system
1147************************************************
1148*
1149*
1150* Send modifications to prior processor's right hand sides
1151*
1152 IF( mycol.GT.0 ) THEN
1153*
1154 CALL dgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1155 $ 0, mycol-1 )
1156*
1157 END IF
1158*
1159* Receive modifications to processor's right hand sides
1160*
1161 IF( mycol.LT.npcol-1 ) THEN
1162*
1163 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1164 $ 0, mycol+1 )
1165*
1166* Combine contribution to locally stored right hand sides
1167*
1168 CALL dmatadd( int_one, nrhs, one, work( 1 ), int_one,
1169 $ one, b( part_offset+odd_size+1 ), lldb )
1170*
1171 END IF
1172*
1173*
1174* The last processor does not participate in the solution of the
1175* reduced system, having sent its contribution already.
1176 IF( mycol.EQ.npcol-1 ) THEN
1177 GO TO 110
1178 END IF
1179*
1180*
1181* *************************************
1182* Modification Loop
1183*
1184* The distance for sending and receiving for each level starts
1185* at 1 for the first level.
1186 level_dist = 1
1187*
1188* Do until this proc is needed to modify other procs' equations
1189*
1190 90 CONTINUE
1191 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1192 $ GO TO 100
1193*
1194* Receive and add contribution to righthand sides from left
1195*
1196 IF( mycol-level_dist.GE.0 ) THEN
1197*
1198 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1199 $ 0, mycol-level_dist )
1200*
1201 CALL dmatadd( int_one, nrhs, one, work( 1 ), int_one,
1202 $ one, b( part_offset+odd_size+1 ), lldb )
1203*
1204 END IF
1205*
1206* Receive and add contribution to righthand sides from right
1207*
1208 IF( mycol+level_dist.LT.npcol-1 ) THEN
1209*
1210 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1211 $ 0, mycol+level_dist )
1212*
1213 CALL dmatadd( int_one, nrhs, one, work( 1 ), int_one,
1214 $ one, b( part_offset+odd_size+1 ), lldb )
1215*
1216 END IF
1217*
1218 level_dist = level_dist*2
1219*
1220 GO TO 90
1221 100 CONTINUE
1222* [End of GOTO Loop]
1223*
1224*
1225*
1226* *********************************
1227* Calculate and use this proc's blocks to modify other procs
1228*
1229* Solve with diagonal block
1230*
1231 CALL dtbtrs( 'U', 'T', 'N', int_one,
1232 $ min( int_one, int_one-1 ), nrhs,
1233 $ af( odd_size+2 ), int_one+1,
1234 $ b( part_offset+odd_size+1 ), lldb, info )
1235*
1236 IF( info.NE.0 ) THEN
1237 GO TO 170
1238 END IF
1239*
1240*
1241*
1242* *********
1243 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 ) THEN
1244*
1245* Calculate contribution from this block to next diagonal block
1246*
1247 CALL dgemm( 'T', 'N', int_one, nrhs, int_one, -one,
1248 $ af( work_u+( odd_size )*int_one+1 ), int_one,
1249 $ b( part_offset+odd_size+1 ), lldb, zero,
1250 $ work( 1 ), int_one )
1251*
1252* Send contribution to diagonal block's owning processor.
1253*
1254 CALL dgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1255 $ 0, mycol+level_dist )
1256*
1257 END IF
1258* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1259*
1260* ************
1261 IF( ( mycol / level_dist.GT.0 ) .AND.
1262 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1263 $ THEN
1264*
1265*
1266* Use offdiagonal block to calculate modification to diag block
1267* of processor to the left
1268*
1269 CALL dgemm( 'N', 'N', int_one, nrhs, int_one, -one,
1270 $ af( work_u+odd_size*int_one+2+1 ), int_one,
1271 $ b( part_offset+odd_size+1 ), lldb, zero,
1272 $ work( 1 ), int_one )
1273*
1274* Send contribution to diagonal block's owning processor.
1275*
1276 CALL dgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1277 $ 0, mycol-level_dist )
1278*
1279 END IF
1280* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1281*
1282 110 CONTINUE
1283*
1284 ELSE
1285*
1286******************** BACKSOLVE *************************************
1287*
1288********************************************************************
1289* .. Begin reduced system phase of algorithm ..
1290********************************************************************
1291*
1292*
1293*
1294* The last processor does not participate in the solution of the
1295* reduced system and just waits to receive its solution.
1296 IF( mycol.EQ.npcol-1 ) THEN
1297 GO TO 160
1298 END IF
1299*
1300* Determine number of steps in tree loop
1301*
1302 level_dist = 1
1303 120 CONTINUE
1304 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
1305 $ GO TO 130
1306*
1307 level_dist = level_dist*2
1308*
1309 GO TO 120
1310 130 CONTINUE
1311*
1312*
1313 IF( ( mycol / level_dist.GT.0 ) .AND.
1314 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
1315 $ THEN
1316*
1317* Receive solution from processor to left
1318*
1319 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1320 $ 0, mycol-level_dist )
1321*
1322*
1323* Use offdiagonal block to calculate modification to RHS stored
1324* on this processor
1325*
1326 CALL dgemm( 'T', 'N', int_one, nrhs, int_one, -one,
1327 $ af( work_u+odd_size*int_one+2+1 ), int_one,
1328 $ work( 1 ), int_one, one,
1329 $ b( part_offset+odd_size+1 ), lldb )
1330 END IF
1331* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1332*
1333*
1334 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 ) THEN
1335*
1336* Receive solution from processor to right
1337*
1338 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1339 $ 0, mycol+level_dist )
1340*
1341* Calculate contribution from this block to next diagonal block
1342*
1343 CALL dgemm( 'N', 'N', int_one, nrhs, int_one, -one,
1344 $ af( work_u+( odd_size )*int_one+1 ), int_one,
1345 $ work( 1 ), int_one, one,
1346 $ b( part_offset+odd_size+1 ), lldb )
1347*
1348 END IF
1349* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1350*
1351*
1352* Solve with diagonal block
1353*
1354 CALL dtbtrs( 'U', 'N', 'N', int_one,
1355 $ min( int_one, int_one-1 ), nrhs,
1356 $ af( odd_size+2 ), int_one+1,
1357 $ b( part_offset+odd_size+1 ), lldb, info )
1358*
1359 IF( info.NE.0 ) THEN
1360 GO TO 170
1361 END IF
1362*
1363*
1364*
1365***Modification Loop *******
1366*
1367 140 CONTINUE
1368 IF( level_dist.EQ.1 )
1369 $ GO TO 150
1370*
1371 level_dist = level_dist / 2
1372*
1373* Send solution to the right
1374*
1375 IF( mycol+level_dist.LT.npcol-1 ) THEN
1376*
1377 CALL dgesd2d( ictxt, int_one, nrhs,
1378 $ b( part_offset+odd_size+1 ), lldb, 0,
1379 $ mycol+level_dist )
1380*
1381 END IF
1382*
1383* Send solution to left
1384*
1385 IF( mycol-level_dist.GE.0 ) THEN
1386*
1387 CALL dgesd2d( ictxt, int_one, nrhs,
1388 $ b( part_offset+odd_size+1 ), lldb, 0,
1389 $ mycol-level_dist )
1390*
1391 END IF
1392*
1393 GO TO 140
1394 150 CONTINUE
1395* [End of GOTO Loop]
1396*
1397 160 CONTINUE
1398* [Processor npcol - 1 jumped to here to await next stage]
1399*
1400*******************************
1401* Reduced system has been solved, communicate solutions to nearest
1402* neighbors in preparation for local computation phase.
1403*
1404*
1405* Send elements of solution to next proc
1406*
1407 IF( mycol.LT.npcol-1 ) THEN
1408*
1409 CALL dgesd2d( ictxt, int_one, nrhs,
1410 $ b( part_offset+odd_size+1 ), lldb, 0,
1411 $ mycol+1 )
1412*
1413 END IF
1414*
1415* Receive modifications to processor's right hand sides
1416*
1417 IF( mycol.GT.0 ) THEN
1418*
1419 CALL dgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1420 $ 0, mycol-1 )
1421*
1422 END IF
1423*
1424*
1425*
1426**********************************************
1427* Local computation phase
1428**********************************************
1429*
1430 IF( mycol.NE.0 ) THEN
1431* Use the "spike" fillin to calculate contribution from previous
1432* processor's solution.
1433*
1434 CALL dgemm( 'N', 'N', odd_size, nrhs, int_one, -one,
1435 $ af( work_u+1 ), odd_size,
1436 $ work( 1+int_one-int_one ), int_one, one,
1437 $ b( part_offset+1 ), lldb )
1438*
1439 END IF
1440*
1441*
1442 IF( mycol.LT.np-1 ) THEN
1443* Use factorization of odd-even connection block to modify
1444* locally stored portion of right hand side(s)
1445*
1446 CALL daxpy( nrhs, -( du( part_offset+odd_size ) ),
1447 $ b( part_offset+odd_size+1 ), lldb,
1448 $ b( part_offset+odd_size ), lldb )
1449*
1450 END IF
1451*
1452* Use main partition in each processor to solve locally
1453*
1454 CALL ddttrsv( uplo, 'N', odd_size, nrhs,
1455 $ du( part_offset+2 ), d( part_offset+1 ),
1456 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1457 $ info )
1458*
1459 END IF
1460* End of "IF( LSAME( TRANS, 'N' ) )"...
1461*
1462*
1463 END IF
1464* End of "IF( LSAME( UPLO, 'L' ) )"...
1465 170 CONTINUE
1466*
1467*
1468* Free BLACS space used to hold standard-form grid.
1469*
1470 IF( ictxt_save.NE.ictxt_new ) THEN
1471 CALL blacs_gridexit( ictxt_new )
1472 END IF
1473*
1474 180 CONTINUE
1475*
1476* Restore saved input parameters
1477*
1478 ictxt = ictxt_save
1479 np = np_save
1480*
1481* Output minimum worksize
1482*
1483 work( 1 ) = work_size_min
1484*
1485*
1486 RETURN
1487*
1488* End of PDDTTRSV
1489*
subroutine ddttrsv(uplo, trans, n, nrhs, dl, d, du, b, ldb, info)
Definition ddttrsv.f:3
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
subroutine dmatadd(m, n, alpha, a, lda, beta, c, ldc)
Definition dmatadd.f:2
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80
logical function lsame(ca, cb)
Definition tools.f:1724
Here is the call graph for this function:
Here is the caller graph for this function: