SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcpttrsv.f
Go to the documentation of this file.
1 SUBROUTINE pcpttrsv( UPLO, TRANS, N, NRHS, D, E, JA, DESCA, B, IB,
2 $ DESCB, AF, LAF, WORK, LWORK, INFO )
3*
4*
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* November 15, 1997
10*
11* .. Scalar Arguments ..
12 CHARACTER TRANS, UPLO
13 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
14* ..
15* .. Array Arguments ..
16 INTEGER DESCA( * ), DESCB( * )
17 COMPLEX AF( * ), B( * ), E( * ), WORK( * )
18 REAL D( * )
19* ..
20*
21*
22* Purpose
23* =======
24*
25* PCPTTRSV solves a tridiagonal triangular system of linear equations
26*
27* A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
28* or
29* A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS)
30*
31* where A(1:N, JA:JA+N-1) is a tridiagonal
32* triangular matrix factor produced by the
33* Cholesky factorization code PCPTTRF
34* and is stored in A(1:N,JA:JA+N-1) and AF.
35* The matrix stored in A(1:N, JA:JA+N-1) is either
36* upper or lower triangular according to UPLO,
37* and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H
38* is dictated by the user by the parameter TRANS.
39*
40* Routine PCPTTRF MUST be called first.
41*
42* =====================================================================
43*
44* Arguments
45* =========
46*
47* UPLO (global input) CHARACTER
48* = 'U': Upper triangle of A(1:N, JA:JA+N-1) is stored;
49* = 'L': Lower triangle of A(1:N, JA:JA+N-1) is stored.
50*
51* TRANS (global input) CHARACTER
52* = 'N': Solve with A(1:N, JA:JA+N-1);
53* = 'C': Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
54*
55* N (global input) INTEGER
56* The number of rows and columns to be operated on, i.e. the
57* order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
58*
59* NRHS (global input) INTEGER
60* The number of right hand sides, i.e., the number of columns
61* of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
62* NRHS >= 0.
63*
64* D (local input/local output) COMPLEX pointer to local
65* part of global vector storing the main diagonal of the
66* matrix.
67* On exit, this array contains information containing the
68* factors of the matrix.
69* Must be of size >= DESCA( NB_ ).
70*
71* E (local input/local output) COMPLEX pointer to local
72* part of global vector storing the upper diagonal of the
73* matrix. Globally, DU(n) is not referenced, and DU must be
74* aligned with D.
75* On exit, this array contains information containing the
76* factors of the matrix.
77* Must be of size >= DESCA( NB_ ).
78*
79* JA (global input) INTEGER
80* The index in the global array A that points to the start of
81* the matrix to be operated on (which may be either all of A
82* or a submatrix of A).
83*
84* DESCA (global and local input) INTEGER array of dimension DLEN.
85* if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
86* if 2D type (DTYPE_A=1), DLEN >= 9.
87* The array descriptor for the distributed matrix A.
88* Contains information of mapping of A to memory. Please
89* see NOTES below for full description and options.
90*
91* B (local input/local output) COMPLEX pointer into
92* local memory to an array of local lead dimension lld_b>=NB.
93* On entry, this array contains the
94* the local pieces of the right hand sides
95* B(IB:IB+N-1, 1:NRHS).
96* On exit, this contains the local piece of the solutions
97* distributed matrix X.
98*
99* IB (global input) INTEGER
100* The row index in the global array B that points to the first
101* row of the matrix to be operated on (which may be either
102* all of B or a submatrix of B).
103*
104* DESCB (global and local input) INTEGER array of dimension DLEN.
105* if 1D type (DTYPE_B=502), DLEN >=7;
106* if 2D type (DTYPE_B=1), DLEN >= 9.
107* The array descriptor for the distributed matrix B.
108* Contains information of mapping of B to memory. Please
109* see NOTES below for full description and options.
110*
111* AF (local output) COMPLEX array, dimension LAF.
112* Auxiliary Fillin Space.
113* Fillin is created during the factorization routine
114* PCPTTRF and this is stored in AF. If a linear system
115* is to be solved using PCPTTRS after the factorization
116* routine, AF *must not be altered* after the factorization.
117*
118* LAF (local input) INTEGER
119* Size of user-input Auxiliary Fillin space AF. Must be >=
120* (NB+2)
121* If LAF is not large enough, an error code will be returned
122* and the minimum acceptable size will be returned in AF( 1 )
123*
124* WORK (local workspace/local output)
125* COMPLEX temporary workspace. This space may
126* be overwritten in between calls to routines. WORK must be
127* the size given in LWORK.
128* On exit, WORK( 1 ) contains the minimal LWORK.
129*
130* LWORK (local input or global input) INTEGER
131* Size of user-input workspace WORK.
132* If LWORK is too small, the minimal acceptable size will be
133* returned in WORK(1) and an error code is returned. LWORK>=
134* (10+2*min(100,NRHS))*NPCOL+4*NRHS
135*
136* INFO (local output) INTEGER
137* = 0: successful exit
138* < 0: If the i-th argument is an array and the j-entry had
139* an illegal value, then INFO = -(i*100+j), if the i-th
140* argument is a scalar and had an illegal value, then
141* INFO = -i.
142*
143* =====================================================================
144*
145*
146* Restrictions
147* ============
148*
149* The following are restrictions on the input parameters. Some of these
150* are temporary and will be removed in future releases, while others
151* may reflect fundamental technical limitations.
152*
153* Non-cyclic restriction: VERY IMPORTANT!
154* P*NB>= mod(JA-1,NB)+N.
155* The mapping for matrices must be blocked, reflecting the nature
156* of the divide and conquer algorithm as a task-parallel algorithm.
157* This formula in words is: no processor may have more than one
158* chunk of the matrix.
159*
160* Blocksize cannot be too small:
161* If the matrix spans more than one processor, the following
162* restriction on NB, the size of each block on each processor,
163* must hold:
164* NB >= 2
165* The bulk of parallel computation is done on the matrix of size
166* O(NB) on each processor. If this is too small, divide and conquer
167* is a poor choice of algorithm.
168*
169* Submatrix reference:
170* JA = IB
171* Alignment restriction that prevents unnecessary communication.
172*
173*
174* =====================================================================
175*
176*
177* Notes
178* =====
179*
180* If the factorization routine and the solve routine are to be called
181* separately (to solve various sets of righthand sides using the same
182* coefficient matrix), the auxiliary space AF *must not be altered*
183* between calls to the factorization routine and the solve routine.
184*
185* The best algorithm for solving banded and tridiagonal linear systems
186* depends on a variety of parameters, especially the bandwidth.
187* Currently, only algorithms designed for the case N/P >> bw are
188* implemented. These go by many names, including Divide and Conquer,
189* Partitioning, domain decomposition-type, etc.
190* For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
191* algorithms are the appropriate choice.
192*
193* Algorithm description: Divide and Conquer
194*
195* The Divide and Conqer algorithm assumes the matrix is narrowly
196* banded compared with the number of equations. In this situation,
197* it is best to distribute the input matrix A one-dimensionally,
198* with columns atomic and rows divided amongst the processes.
199* The basic algorithm divides the tridiagonal matrix up into
200* P pieces with one stored on each processor,
201* and then proceeds in 2 phases for the factorization or 3 for the
202* solution of a linear system.
203* 1) Local Phase:
204* The individual pieces are factored independently and in
205* parallel. These factors are applied to the matrix creating
206* fillin, which is stored in a non-inspectable way in auxiliary
207* space AF. Mathematically, this is equivalent to reordering
208* the matrix A as P A P^T and then factoring the principal
209* leading submatrix of size equal to the sum of the sizes of
210* the matrices factored on each processor. The factors of
211* these submatrices overwrite the corresponding parts of A
212* in memory.
213* 2) Reduced System Phase:
214* A small ((P-1)) system is formed representing
215* interaction of the larger blocks, and is stored (as are its
216* factors) in the space AF. A parallel Block Cyclic Reduction
217* algorithm is used. For a linear system, a parallel front solve
218* followed by an analagous backsolve, both using the structure
219* of the factored matrix, are performed.
220* 3) Backsubsitution Phase:
221* For a linear system, a local backsubstitution is performed on
222* each processor in parallel.
223*
224*
225* Descriptors
226* ===========
227*
228* Descriptors now have *types* and differ from ScaLAPACK 1.0.
229*
230* Note: tridiagonal codes can use either the old two dimensional
231* or new one-dimensional descriptors, though the processor grid in
232* both cases *must be one-dimensional*. We describe both types below.
233*
234* Each global data object is described by an associated description
235* vector. This vector stores the information required to establish
236* the mapping between an object element and its corresponding process
237* and memory location.
238*
239* Let A be a generic term for any 2D block cyclicly distributed array.
240* Such a global array has an associated description vector DESCA.
241* In the following comments, the character _ should be read as
242* "of the global array".
243*
244* NOTATION STORED IN EXPLANATION
245* --------------- -------------- --------------------------------------
246* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
247* DTYPE_A = 1.
248* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
249* the BLACS process grid A is distribu-
250* ted over. The context itself is glo-
251* bal, but the handle (the integer
252* value) may vary.
253* M_A (global) DESCA( M_ ) The number of rows in the global
254* array A.
255* N_A (global) DESCA( N_ ) The number of columns in the global
256* array A.
257* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
258* the rows of the array.
259* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
260* the columns of the array.
261* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
262* row of the array A is distributed.
263* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
264* first column of the array A is
265* distributed.
266* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
267* array. LLD_A >= MAX(1,LOCr(M_A)).
268*
269* Let K be the number of rows or columns of a distributed matrix,
270* and assume that its process grid has dimension p x q.
271* LOCr( K ) denotes the number of elements of K that a process
272* would receive if K were distributed over the p processes of its
273* process column.
274* Similarly, LOCc( K ) denotes the number of elements of K that a
275* process would receive if K were distributed over the q processes of
276* its process row.
277* The values of LOCr() and LOCc() may be determined via a call to the
278* ScaLAPACK tool function, NUMROC:
279* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
280* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
281* An upper bound for these quantities may be computed by:
282* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
283* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
284*
285*
286* One-dimensional descriptors:
287*
288* One-dimensional descriptors are a new addition to ScaLAPACK since
289* version 1.0. They simplify and shorten the descriptor for 1D
290* arrays.
291*
292* Since ScaLAPACK supports two-dimensional arrays as the fundamental
293* object, we allow 1D arrays to be distributed either over the
294* first dimension of the array (as if the grid were P-by-1) or the
295* 2nd dimension (as if the grid were 1-by-P). This choice is
296* indicated by the descriptor type (501 or 502)
297* as described below.
298* However, for tridiagonal matrices, since the objects being
299* distributed are the individual vectors storing the diagonals, we
300* have adopted the convention that both the P-by-1 descriptor and
301* the 1-by-P descriptor are allowed and are equivalent for
302* tridiagonal matrices. Thus, for tridiagonal matrices,
303* DTYPE_A = 501 or 502 can be used interchangeably
304* without any other change.
305* We require that the distributed vectors storing the diagonals of a
306* tridiagonal matrix be aligned with each other. Because of this, a
307* single descriptor, DESCA, serves to describe the distribution of
308* of all diagonals simultaneously.
309*
310* IMPORTANT NOTE: the actual BLACS grid represented by the
311* CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
312* irrespective of which one-dimensional descriptor type
313* (501 or 502) is input.
314* This routine will interpret the grid properly either way.
315* ScaLAPACK routines *do not support intercontext operations* so that
316* the grid passed to a single ScaLAPACK routine *must be the same*
317* for all array descriptors passed to that routine.
318*
319* NOTE: In all cases where 1D descriptors are used, 2D descriptors
320* may also be used, since a one-dimensional array is a special case
321* of a two-dimensional array with one dimension of size unity.
322* The two-dimensional array used in this case *must* be of the
323* proper orientation:
324* If the appropriate one-dimensional descriptor is DTYPEA=501
325* (1 by P type), then the two dimensional descriptor must
326* have a CTXT value that refers to a 1 by P BLACS grid;
327* If the appropriate one-dimensional descriptor is DTYPEA=502
328* (P by 1 type), then the two dimensional descriptor must
329* have a CTXT value that refers to a P by 1 BLACS grid.
330*
331*
332* Summary of allowed descriptors, types, and BLACS grids:
333* DTYPE 501 502 1 1
334* BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
335* -----------------------------------------------------
336* A OK OK OK NO
337* B NO OK NO OK
338*
339* Note that a consequence of this chart is that it is not possible
340* for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
341* to opposite requirements for the orientation of the BLACS grid,
342* and as noted before, the *same* BLACS context must be used in
343* all descriptors in a single ScaLAPACK subroutine call.
344*
345* Let A be a generic term for any 1D block cyclicly distributed array.
346* Such a global array has an associated description vector DESCA.
347* In the following comments, the character _ should be read as
348* "of the global array".
349*
350* NOTATION STORED IN EXPLANATION
351* --------------- ---------- ------------------------------------------
352* DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
353* TYPE_A = 501: 1-by-P grid.
354* TYPE_A = 502: P-by-1 grid.
355* CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
356* the BLACS process grid A is distribu-
357* ted over. The context itself is glo-
358* bal, but the handle (the integer
359* value) may vary.
360* N_A (global) DESCA( 3 ) The size of the array dimension being
361* distributed.
362* NB_A (global) DESCA( 4 ) The blocking factor used to distribute
363* the distributed dimension of the array.
364* SRC_A (global) DESCA( 5 ) The process row or column over which the
365* first row or column of the array
366* is distributed.
367* Ignored DESCA( 6 ) Ignored for tridiagonal matrices.
368* Reserved DESCA( 7 ) Reserved for future use.
369*
370*
371*
372* =====================================================================
373*
374* Code Developer: Andrew J. Cleary, University of Tennessee.
375* Current address: Lawrence Livermore National Labs.
376* This version released: August, 2001.
377*
378* =====================================================================
379*
380* ..
381* .. Parameters ..
382 REAL ONE, ZERO
383 parameter( one = 1.0e+0 )
384 parameter( zero = 0.0e+0 )
385 COMPLEX CONE, CZERO
386 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
387 parameter( czero = ( 0.0e+0, 0.0e+0 ) )
388 INTEGER INT_ONE
389 parameter( int_one = 1 )
390 INTEGER DESCMULT, BIGNUM
391 parameter(descmult = 100, bignum = descmult * descmult)
392 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
393 $ lld_, mb_, m_, nb_, n_, rsrc_
394 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
395 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
396 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
397* ..
398* .. Local Scalars ..
399 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
400 $ idum1, idum2, idum3, ja_new, level_dist, llda,
401 $ lldb, mycol, myrow, my_num_cols, nb, np, npcol,
402 $ nprow, np_save, odd_size, part_offset,
403 $ part_size, return_code, store_m_b, store_n_a,
404 $ temp, work_size_min
405* ..
406* .. Local Arrays ..
407 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
408 $ param_check( 16, 3 )
409* ..
410* .. External Subroutines ..
411 EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
412 $ cgemm, cgerv2d, cgesd2d, clamov, cmatadd,
413 $ ctbtrs, ctrmm, ctrtrs, desc_convert, globchk,
415* ..
416* .. External Functions ..
417 LOGICAL LSAME
418 INTEGER NUMROC
419 EXTERNAL lsame, numroc
420* ..
421* .. Intrinsic Functions ..
422 INTRINSIC ichar, min, mod
423* ..
424* .. Executable Statements ..
425*
426* Test the input parameters
427*
428 info = 0
429*
430* Convert descriptor into standard form for easy access to
431* parameters, check that grid is of right shape.
432*
433 desca_1xp( 1 ) = 501
434 descb_px1( 1 ) = 502
435*
436 temp = desca( dtype_ )
437 IF( temp .EQ. 502 ) THEN
438* Temporarily set the descriptor type to 1xP type
439 desca( dtype_ ) = 501
440 ENDIF
441*
442 CALL desc_convert( desca, desca_1xp, return_code )
443*
444 desca( dtype_ ) = temp
445*
446 IF( return_code .NE. 0) THEN
447 info = -( 8*100 + 2 )
448 ENDIF
449*
450 CALL desc_convert( descb, descb_px1, return_code )
451*
452 IF( return_code .NE. 0) THEN
453 info = -( 11*100 + 2 )
454 ENDIF
455*
456* Consistency checks for DESCA and DESCB.
457*
458* Context must be the same
459 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) ) THEN
460 info = -( 11*100 + 2 )
461 ENDIF
462*
463* These are alignment restrictions that may or may not be removed
464* in future releases. -Andy Cleary, April 14, 1996.
465*
466* Block sizes must be the same
467 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) ) THEN
468 info = -( 11*100 + 4 )
469 ENDIF
470*
471* Source processor must be the same
472*
473 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) ) THEN
474 info = -( 11*100 + 5 )
475 ENDIF
476*
477* Get values out of descriptor for use in code.
478*
479 ictxt = desca_1xp( 2 )
480 csrc = desca_1xp( 5 )
481 nb = desca_1xp( 4 )
482 llda = desca_1xp( 6 )
483 store_n_a = desca_1xp( 3 )
484 lldb = descb_px1( 6 )
485 store_m_b = descb_px1( 3 )
486*
487* Get grid parameters
488*
489*
490 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
491 np = nprow * npcol
492*
493*
494*
495 IF( lsame( uplo, 'U' ) ) THEN
496 idum1 = ichar( 'U' )
497 ELSE IF ( lsame( uplo, 'L' ) ) THEN
498 idum1 = ichar( 'L' )
499 ELSE
500 info = -1
501 END IF
502*
503 IF( lsame( trans, 'N' ) ) THEN
504 idum2 = ichar( 'N' )
505 ELSE IF ( lsame( trans, 'C' ) ) THEN
506 idum2 = ichar( 'C' )
507 ELSE
508 info = -2
509 END IF
510*
511 IF( lwork .LT. -1) THEN
512 info = -15
513 ELSE IF ( lwork .EQ. -1 ) THEN
514 idum3 = -1
515 ELSE
516 idum3 = 1
517 ENDIF
518*
519 IF( n .LT. 0 ) THEN
520 info = -3
521 ENDIF
522*
523 IF( n+ja-1 .GT. store_n_a ) THEN
524 info = -( 8*100 + 6 )
525 ENDIF
526*
527 IF( n+ib-1 .GT. store_m_b ) THEN
528 info = -( 11*100 + 3 )
529 ENDIF
530*
531 IF( lldb .LT. nb ) THEN
532 info = -( 11*100 + 6 )
533 ENDIF
534*
535 IF( nrhs .LT. 0 ) THEN
536 info = -4
537 ENDIF
538*
539* Current alignment restriction
540*
541 IF( ja .NE. ib) THEN
542 info = -7
543 ENDIF
544*
545* Argument checking that is specific to Divide & Conquer routine
546*
547 IF( nprow .NE. 1 ) THEN
548 info = -( 8*100+2 )
549 ENDIF
550*
551 IF( n .GT. np*nb-mod( ja-1, nb )) THEN
552 info = -( 3 )
553 CALL pxerbla( ictxt,
554 $ 'PCPTTRSV, D&C alg.: only 1 block per proc',
555 $ -info )
556 RETURN
557 ENDIF
558*
559 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*int_one )) THEN
560 info = -( 8*100+4 )
561 CALL pxerbla( ictxt,
562 $ 'PCPTTRSV, D&C alg.: NB too small',
563 $ -info )
564 RETURN
565 ENDIF
566*
567*
568 work_size_min =
569 $ int_one*nrhs
570*
571 work( 1 ) = work_size_min
572*
573 IF( lwork .LT. work_size_min ) THEN
574 IF( lwork .NE. -1 ) THEN
575 info = -15
576 CALL pxerbla( ictxt,
577 $ 'PCPTTRSV: worksize error',
578 $ -info )
579 ENDIF
580 RETURN
581 ENDIF
582*
583* Pack params and positions into arrays for global consistency check
584*
585 param_check( 16, 1 ) = descb(5)
586 param_check( 15, 1 ) = descb(4)
587 param_check( 14, 1 ) = descb(3)
588 param_check( 13, 1 ) = descb(2)
589 param_check( 12, 1 ) = descb(1)
590 param_check( 11, 1 ) = ib
591 param_check( 10, 1 ) = desca(5)
592 param_check( 9, 1 ) = desca(4)
593 param_check( 8, 1 ) = desca(3)
594 param_check( 7, 1 ) = desca(1)
595 param_check( 6, 1 ) = ja
596 param_check( 5, 1 ) = nrhs
597 param_check( 4, 1 ) = n
598 param_check( 3, 1 ) = idum3
599 param_check( 2, 1 ) = idum2
600 param_check( 1, 1 ) = idum1
601*
602 param_check( 16, 2 ) = 1105
603 param_check( 15, 2 ) = 1104
604 param_check( 14, 2 ) = 1103
605 param_check( 13, 2 ) = 1102
606 param_check( 12, 2 ) = 1101
607 param_check( 11, 2 ) = 10
608 param_check( 10, 2 ) = 805
609 param_check( 9, 2 ) = 804
610 param_check( 8, 2 ) = 803
611 param_check( 7, 2 ) = 801
612 param_check( 6, 2 ) = 7
613 param_check( 5, 2 ) = 4
614 param_check( 4, 2 ) = 3
615 param_check( 3, 2 ) = 14
616 param_check( 2, 2 ) = 2
617 param_check( 1, 2 ) = 1
618*
619* Want to find errors with MIN( ), so if no error, set it to a big
620* number. If there already is an error, multiply by the the
621* descriptor multiplier.
622*
623 IF( info.GE.0 ) THEN
624 info = bignum
625 ELSE IF( info.LT.-descmult ) THEN
626 info = -info
627 ELSE
628 info = -info * descmult
629 END IF
630*
631* Check consistency across processors
632*
633 CALL globchk( ictxt, 16, param_check, 16,
634 $ param_check( 1, 3 ), info )
635*
636* Prepare output: set info = 0 if no error, and divide by DESCMULT
637* if error is not in a descriptor entry.
638*
639 IF( info.EQ.bignum ) THEN
640 info = 0
641 ELSE IF( mod( info, descmult ) .EQ. 0 ) THEN
642 info = -info / descmult
643 ELSE
644 info = -info
645 END IF
646*
647 IF( info.LT.0 ) THEN
648 CALL pxerbla( ictxt, 'PCPTTRSV', -info )
649 RETURN
650 END IF
651*
652* Quick return if possible
653*
654 IF( n.EQ.0 )
655 $ RETURN
656*
657 IF( nrhs.EQ.0 )
658 $ RETURN
659*
660*
661* Adjust addressing into matrix space to properly get into
662* the beginning part of the relevant data
663*
664 part_offset = nb*( (ja-1)/(npcol*nb) )
665*
666 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb ) THEN
667 part_offset = part_offset + nb
668 ENDIF
669*
670 IF ( mycol .LT. csrc ) THEN
671 part_offset = part_offset - nb
672 ENDIF
673*
674* Form a new BLACS grid (the "standard form" grid) with only procs
675* holding part of the matrix, of size 1xNP where NP is adjusted,
676* starting at csrc=0, with JA modified to reflect dropped procs.
677*
678* First processor to hold part of the matrix:
679*
680 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
681*
682* Calculate new JA one while dropping off unused processors.
683*
684 ja_new = mod( ja-1, nb ) + 1
685*
686* Save and compute new value of NP
687*
688 np_save = np
689 np = ( ja_new+n-2 )/nb + 1
690*
691* Call utility routine that forms "standard-form" grid
692*
693 CALL reshape( ictxt, int_one, ictxt_new, int_one,
694 $ first_proc, int_one, np )
695*
696* Use new context from standard grid as context.
697*
698 ictxt_save = ictxt
699 ictxt = ictxt_new
700 desca_1xp( 2 ) = ictxt_new
701 descb_px1( 2 ) = ictxt_new
702*
703* Get information about new grid.
704*
705 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
706*
707* Drop out processors that do not have part of the matrix.
708*
709 IF( myrow .LT. 0 ) THEN
710 GOTO 1234
711 ENDIF
712*
713* ********************************
714* Values reused throughout routine
715*
716* User-input value of partition size
717*
718 part_size = nb
719*
720* Number of columns in each processor
721*
722 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
723*
724* Offset in columns to beginning of main partition in each proc
725*
726 IF ( mycol .EQ. 0 ) THEN
727 part_offset = part_offset+mod( ja_new-1, part_size )
728 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
729 ENDIF
730*
731* Size of main (or odd) partition in each processor
732*
733 odd_size = my_num_cols
734 IF ( mycol .LT. np-1 ) THEN
735 odd_size = odd_size - int_one
736 ENDIF
737*
738*
739*
740* Begin main code
741*
742 IF ( lsame( uplo, 'L' ) ) THEN
743*
744 IF ( lsame( trans, 'N' ) ) THEN
745*
746* Frontsolve
747*
748*
749******************************************
750* Local computation phase
751******************************************
752*
753* Use main partition in each processor to solve locally
754*
755 CALL cpttrsv( uplo, 'N', odd_size, nrhs, d( part_offset+1 ),
756 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
757 $ info )
758*
759*
760 IF ( mycol .LT. np-1 ) THEN
761* Use factorization of odd-even connection block to modify
762* locally stored portion of right hand side(s)
763*
764 CALL caxpy( nrhs, -e( part_offset+odd_size ),
765 $ b( part_offset+odd_size ), lldb,
766 $ b( part_offset+odd_size+1 ), lldb )
767*
768 ENDIF
769*
770*
771 IF ( mycol .NE. 0 ) THEN
772* Use the "spike" fillin to calculate contribution to previous
773* processor's righthand-side.
774*
775 CALL cgemm( 'C', 'N', 1, nrhs, odd_size, -cone, af( 1 ),
776 $ odd_size, b( part_offset+1 ), lldb, czero,
777 $ work( 1+int_one-1 ), int_one )
778 ENDIF
779*
780*
781************************************************
782* Formation and solution of reduced system
783************************************************
784*
785*
786* Send modifications to prior processor's right hand sides
787*
788 IF( mycol .GT. 0) THEN
789*
790 CALL cgesd2d( ictxt, int_one, nrhs,
791 $ work( 1 ), int_one,
792 $ 0, mycol - 1 )
793*
794 ENDIF
795*
796* Receive modifications to processor's right hand sides
797*
798 IF( mycol .LT. npcol-1) THEN
799*
800 CALL cgerv2d( ictxt, int_one, nrhs,
801 $ work( 1 ), int_one,
802 $ 0, mycol + 1 )
803*
804* Combine contribution to locally stored right hand sides
805*
806 CALL cmatadd( int_one, nrhs, cone,
807 $ work( 1 ), int_one, cone,
808 $ b( part_offset+odd_size + 1 ), lldb )
809*
810 ENDIF
811*
812*
813* The last processor does not participate in the solution of the
814* reduced system, having sent its contribution already.
815 IF( mycol .EQ. npcol-1 ) THEN
816 GOTO 14
817 ENDIF
818*
819*
820* *************************************
821* Modification Loop
822*
823* The distance for sending and receiving for each level starts
824* at 1 for the first level.
825 level_dist = 1
826*
827* Do until this proc is needed to modify other procs' equations
828*
829 12 CONTINUE
830 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 11
831*
832* Receive and add contribution to righthand sides from left
833*
834 IF( mycol-level_dist .GE. 0 ) THEN
835*
836 CALL cgerv2d( ictxt, int_one, nrhs,
837 $ work( 1 ),
838 $ int_one, 0, mycol-level_dist )
839*
840 CALL cmatadd( int_one, nrhs, cone,
841 $ work( 1 ), int_one, cone,
842 $ b( part_offset+odd_size + 1 ), lldb )
843*
844 ENDIF
845*
846* Receive and add contribution to righthand sides from right
847*
848 IF( mycol+level_dist .LT. npcol-1 ) THEN
849*
850 CALL cgerv2d( ictxt, int_one, nrhs,
851 $ work( 1 ),
852 $ int_one, 0, mycol+level_dist )
853*
854 CALL cmatadd( int_one, nrhs, cone,
855 $ work( 1 ), int_one, cone,
856 $ b( part_offset+odd_size + 1 ), lldb )
857*
858 ENDIF
859*
860 level_dist = level_dist*2
861*
862 GOTO 12
863 11 CONTINUE
864* [End of GOTO Loop]
865*
866*
867*
868* *********************************
869* Calculate and use this proc's blocks to modify other procs
870*
871* Solve with diagonal block
872*
873 CALL ctrtrs( 'L', 'N', 'U', int_one, nrhs, af( odd_size+2 ),
874 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
875*
876 IF( info.NE.0 ) THEN
877 GO TO 1000
878 ENDIF
879*
880*
881*
882* *********
883 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
884*
885* Calculate contribution from this block to next diagonal block
886*
887 CALL cgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
888 $ af( (odd_size)*1+1 ),
889 $ int_one,
890 $ b( part_offset+odd_size+1 ),
891 $ lldb, czero,
892 $ work( 1 ),
893 $ int_one )
894*
895* Send contribution to diagonal block's owning processor.
896*
897 CALL cgesd2d( ictxt, int_one, nrhs,
898 $ work( 1 ),
899 $ int_one, 0, mycol+level_dist )
900*
901 ENDIF
902* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
903*
904* ************
905 IF( (mycol/level_dist .GT. 0 ).AND.
906 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
907*
908*
909* Use offdiagonal block to calculate modification to diag block
910* of processor to the left
911*
912 CALL cgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
913 $ af( odd_size*1+2+1 ),
914 $ int_one,
915 $ b( part_offset+odd_size+1 ),
916 $ lldb, czero,
917 $ work( 1 ),
918 $ int_one )
919*
920* Send contribution to diagonal block's owning processor.
921*
922 CALL cgesd2d( ictxt, int_one, nrhs,
923 $ work( 1 ),
924 $ int_one, 0, mycol-level_dist )
925*
926 ENDIF
927* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
928*
929 14 CONTINUE
930*
931 ELSE
932*
933******************** BACKSOLVE *************************************
934*
935********************************************************************
936* .. Begin reduced system phase of algorithm ..
937********************************************************************
938*
939*
940*
941* The last processor does not participate in the solution of the
942* reduced system and just waits to receive its solution.
943 IF( mycol .EQ. npcol-1 ) THEN
944 GOTO 24
945 ENDIF
946*
947* Determine number of steps in tree loop
948*
949 level_dist = 1
950 27 CONTINUE
951 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 26
952*
953 level_dist = level_dist*2
954*
955 GOTO 27
956 26 CONTINUE
957*
958*
959 IF( (mycol/level_dist .GT. 0 ).AND.
960 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
961*
962* Receive solution from processor to left
963*
964 CALL cgerv2d( ictxt, int_one, nrhs,
965 $ work( 1 ),
966 $ int_one, 0, mycol-level_dist )
967*
968*
969* Use offdiagonal block to calculate modification to RHS stored
970* on this processor
971*
972 CALL cgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
973 $ af( odd_size*1+2+1 ),
974 $ int_one,
975 $ work( 1 ),
976 $ int_one, cone,
977 $ b( part_offset+odd_size+1 ),
978 $ lldb )
979 ENDIF
980* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
981*
982*
983 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
984*
985* Receive solution from processor to right
986*
987 CALL cgerv2d( ictxt, int_one, nrhs,
988 $ work( 1 ),
989 $ int_one, 0, mycol+level_dist )
990*
991* Calculate contribution from this block to next diagonal block
992*
993 CALL cgemm( 'N', 'N', int_one, nrhs, int_one, -cone,
994 $ af( (odd_size)*1+1 ),
995 $ int_one,
996 $ work( 1 ),
997 $ int_one, cone,
998 $ b( part_offset+odd_size+1 ),
999 $ lldb )
1000*
1001 ENDIF
1002* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1003*
1004*
1005* Solve with diagonal block
1006*
1007 CALL ctrtrs( 'L', 'C', 'U', int_one, nrhs, af( odd_size+2 ),
1008 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1009*
1010 IF( info.NE.0 ) THEN
1011 GO TO 1000
1012 ENDIF
1013*
1014*
1015*
1016***Modification Loop *******
1017*
1018 22 CONTINUE
1019 IF( level_dist .EQ. 1 ) GOTO 21
1020*
1021 level_dist = level_dist/2
1022*
1023* Send solution to the right
1024*
1025 IF( mycol+level_dist .LT. npcol-1 ) THEN
1026*
1027 CALL cgesd2d( ictxt, int_one, nrhs,
1028 $ b( part_offset+odd_size+1 ),
1029 $ lldb, 0, mycol+level_dist )
1030*
1031 ENDIF
1032*
1033* Send solution to left
1034*
1035 IF( mycol-level_dist .GE. 0 ) THEN
1036*
1037 CALL cgesd2d( ictxt, int_one, nrhs,
1038 $ b( part_offset+odd_size+1 ),
1039 $ lldb, 0, mycol-level_dist )
1040*
1041 ENDIF
1042*
1043 GOTO 22
1044 21 CONTINUE
1045* [End of GOTO Loop]
1046*
1047 24 CONTINUE
1048* [Processor npcol - 1 jumped to here to await next stage]
1049*
1050*******************************
1051* Reduced system has been solved, communicate solutions to nearest
1052* neighbors in preparation for local computation phase.
1053*
1054*
1055* Send elements of solution to next proc
1056*
1057 IF( mycol .LT. npcol-1) THEN
1058*
1059 CALL cgesd2d( ictxt, int_one, nrhs,
1060 $ b( part_offset+odd_size+1 ), lldb,
1061 $ 0, mycol +1 )
1062*
1063 ENDIF
1064*
1065* Receive modifications to processor's right hand sides
1066*
1067 IF( mycol .GT. 0) THEN
1068*
1069 CALL cgerv2d( ictxt, int_one, nrhs,
1070 $ work( 1 ), int_one,
1071 $ 0, mycol - 1 )
1072*
1073 ENDIF
1074*
1075*
1076*
1077**********************************************
1078* Local computation phase
1079**********************************************
1080*
1081 IF ( mycol .NE. 0 ) THEN
1082* Use the "spike" fillin to calculate contribution from previous
1083* processor's solution.
1084*
1085 CALL cgemm( 'N', 'N', odd_size, nrhs, 1, -cone, af( 1 ),
1086 $ odd_size, work( 1+int_one-1 ), int_one, cone,
1087 $ b( part_offset+1 ), lldb )
1088*
1089 ENDIF
1090*
1091*
1092 IF ( mycol .LT. np-1 ) THEN
1093* Use factorization of odd-even connection block to modify
1094* locally stored portion of right hand side(s)
1095*
1096 CALL caxpy( nrhs, -conjg( e( part_offset+odd_size ) ),
1097 $ b( part_offset+odd_size+1 ), lldb,
1098 $ b( part_offset+odd_size ), lldb )
1099*
1100 ENDIF
1101*
1102* Use main partition in each processor to solve locally
1103*
1104 CALL cpttrsv( uplo, 'C', odd_size, nrhs, d( part_offset+1 ),
1105 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1106 $ info )
1107*
1108 ENDIF
1109* End of "IF( LSAME( TRANS, 'N' ) )"...
1110*
1111*
1112 ELSE
1113***************************************************************
1114* CASE UPLO = 'U' *
1115***************************************************************
1116 IF ( lsame( trans, 'C' ) ) THEN
1117*
1118* Frontsolve
1119*
1120*
1121******************************************
1122* Local computation phase
1123******************************************
1124*
1125* Use main partition in each processor to solve locally
1126*
1127 CALL cpttrsv( uplo, 'C', odd_size, nrhs, d( part_offset+1 ),
1128 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1129 $ info )
1130*
1131*
1132 IF ( mycol .LT. np-1 ) THEN
1133* Use factorization of odd-even connection block to modify
1134* locally stored portion of right hand side(s)
1135*
1136 CALL caxpy( nrhs, -conjg( e( part_offset+odd_size ) ),
1137 $ b( part_offset+odd_size ), lldb,
1138 $ b( part_offset+odd_size+1 ), lldb )
1139*
1140 ENDIF
1141*
1142*
1143 IF ( mycol .NE. 0 ) THEN
1144* Use the "spike" fillin to calculate contribution to previous
1145* processor's righthand-side.
1146*
1147 CALL cgemm( 'T', 'N', 1, nrhs, odd_size, -cone, af( 1 ),
1148 $ odd_size, b( part_offset+1 ), lldb, czero,
1149 $ work( 1+int_one-1 ), int_one )
1150 ENDIF
1151*
1152*
1153************************************************
1154* Formation and solution of reduced system
1155************************************************
1156*
1157*
1158* Send modifications to prior processor's right hand sides
1159*
1160 IF( mycol .GT. 0) THEN
1161*
1162 CALL cgesd2d( ictxt, int_one, nrhs,
1163 $ work( 1 ), int_one,
1164 $ 0, mycol - 1 )
1165*
1166 ENDIF
1167*
1168* Receive modifications to processor's right hand sides
1169*
1170 IF( mycol .LT. npcol-1) THEN
1171*
1172 CALL cgerv2d( ictxt, int_one, nrhs,
1173 $ work( 1 ), int_one,
1174 $ 0, mycol + 1 )
1175*
1176* Combine contribution to locally stored right hand sides
1177*
1178 CALL cmatadd( int_one, nrhs, cone,
1179 $ work( 1 ), int_one, cone,
1180 $ b( part_offset+odd_size + 1 ), lldb )
1181*
1182 ENDIF
1183*
1184*
1185* The last processor does not participate in the solution of the
1186* reduced system, having sent its contribution already.
1187 IF( mycol .EQ. npcol-1 ) THEN
1188 GOTO 44
1189 ENDIF
1190*
1191*
1192* *************************************
1193* Modification Loop
1194*
1195* The distance for sending and receiving for each level starts
1196* at 1 for the first level.
1197 level_dist = 1
1198*
1199* Do until this proc is needed to modify other procs' equations
1200*
1201 42 CONTINUE
1202 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 41
1203*
1204* Receive and add contribution to righthand sides from left
1205*
1206 IF( mycol-level_dist .GE. 0 ) THEN
1207*
1208 CALL cgerv2d( ictxt, int_one, nrhs,
1209 $ work( 1 ),
1210 $ int_one, 0, mycol-level_dist )
1211*
1212 CALL cmatadd( int_one, nrhs, cone,
1213 $ work( 1 ), int_one, cone,
1214 $ b( part_offset+odd_size + 1 ), lldb )
1215*
1216 ENDIF
1217*
1218* Receive and add contribution to righthand sides from right
1219*
1220 IF( mycol+level_dist .LT. npcol-1 ) THEN
1221*
1222 CALL cgerv2d( ictxt, int_one, nrhs,
1223 $ work( 1 ),
1224 $ int_one, 0, mycol+level_dist )
1225*
1226 CALL cmatadd( int_one, nrhs, cone,
1227 $ work( 1 ), int_one, cone,
1228 $ b( part_offset+odd_size + 1 ), lldb )
1229*
1230 ENDIF
1231*
1232 level_dist = level_dist*2
1233*
1234 GOTO 42
1235 41 CONTINUE
1236* [End of GOTO Loop]
1237*
1238*
1239*
1240* *********************************
1241* Calculate and use this proc's blocks to modify other procs
1242*
1243* Solve with diagonal block
1244*
1245 CALL ctrtrs( 'L', 'N', 'U', int_one, nrhs, af( odd_size+2 ),
1246 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1247*
1248 IF( info.NE.0 ) THEN
1249 GO TO 1000
1250 ENDIF
1251*
1252*
1253*
1254* *********
1255 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1256*
1257* Calculate contribution from this block to next diagonal block
1258*
1259 CALL cgemm( 'T', 'N', int_one, nrhs, int_one, -cone,
1260 $ af( (odd_size)*1+1 ),
1261 $ int_one,
1262 $ b( part_offset+odd_size+1 ),
1263 $ lldb, czero,
1264 $ work( 1 ),
1265 $ int_one )
1266*
1267* Send contribution to diagonal block's owning processor.
1268*
1269 CALL cgesd2d( ictxt, int_one, nrhs,
1270 $ work( 1 ),
1271 $ int_one, 0, mycol+level_dist )
1272*
1273 ENDIF
1274* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1275*
1276* ************
1277 IF( (mycol/level_dist .GT. 0 ).AND.
1278 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1279*
1280*
1281* Use offdiagonal block to calculate modification to diag block
1282* of processor to the left
1283*
1284 CALL cgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1285 $ af( odd_size*1+2+1 ),
1286 $ int_one,
1287 $ b( part_offset+odd_size+1 ),
1288 $ lldb, czero,
1289 $ work( 1 ),
1290 $ int_one )
1291*
1292* Send contribution to diagonal block's owning processor.
1293*
1294 CALL cgesd2d( ictxt, int_one, nrhs,
1295 $ work( 1 ),
1296 $ int_one, 0, mycol-level_dist )
1297*
1298 ENDIF
1299* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1300*
1301 44 CONTINUE
1302*
1303 ELSE
1304*
1305******************** BACKSOLVE *************************************
1306*
1307********************************************************************
1308* .. Begin reduced system phase of algorithm ..
1309********************************************************************
1310*
1311*
1312*
1313* The last processor does not participate in the solution of the
1314* reduced system and just waits to receive its solution.
1315 IF( mycol .EQ. npcol-1 ) THEN
1316 GOTO 54
1317 ENDIF
1318*
1319* Determine number of steps in tree loop
1320*
1321 level_dist = 1
1322 57 CONTINUE
1323 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 ) GOTO 56
1324*
1325 level_dist = level_dist*2
1326*
1327 GOTO 57
1328 56 CONTINUE
1329*
1330*
1331 IF( (mycol/level_dist .GT. 0 ).AND.
1332 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) ) THEN
1333*
1334* Receive solution from processor to left
1335*
1336 CALL cgerv2d( ictxt, int_one, nrhs,
1337 $ work( 1 ),
1338 $ int_one, 0, mycol-level_dist )
1339*
1340*
1341* Use offdiagonal block to calculate modification to RHS stored
1342* on this processor
1343*
1344 CALL cgemm( 'T', 'N', int_one, nrhs, int_one, -cone,
1345 $ af( odd_size*1+2+1 ),
1346 $ int_one,
1347 $ work( 1 ),
1348 $ int_one, cone,
1349 $ b( part_offset+odd_size+1 ),
1350 $ lldb )
1351 ENDIF
1352* End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
1353*
1354*
1355 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )THEN
1356*
1357* Receive solution from processor to right
1358*
1359 CALL cgerv2d( ictxt, int_one, nrhs,
1360 $ work( 1 ),
1361 $ int_one, 0, mycol+level_dist )
1362*
1363* Calculate contribution from this block to next diagonal block
1364*
1365 CALL cgemm( 'C', 'N', int_one, nrhs, int_one, -cone,
1366 $ af( (odd_size)*1+1 ),
1367 $ int_one,
1368 $ work( 1 ),
1369 $ int_one, cone,
1370 $ b( part_offset+odd_size+1 ),
1371 $ lldb )
1372*
1373 ENDIF
1374* End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
1375*
1376*
1377* Solve with diagonal block
1378*
1379 CALL ctrtrs( 'L', 'C', 'U', int_one, nrhs, af( odd_size+2 ),
1380 $ int_one, b( part_offset+odd_size+1 ), lldb, info )
1381*
1382 IF( info.NE.0 ) THEN
1383 GO TO 1000
1384 ENDIF
1385*
1386*
1387*
1388***Modification Loop *******
1389*
1390 52 CONTINUE
1391 IF( level_dist .EQ. 1 ) GOTO 51
1392*
1393 level_dist = level_dist/2
1394*
1395* Send solution to the right
1396*
1397 IF( mycol+level_dist .LT. npcol-1 ) THEN
1398*
1399 CALL cgesd2d( ictxt, int_one, nrhs,
1400 $ b( part_offset+odd_size+1 ),
1401 $ lldb, 0, mycol+level_dist )
1402*
1403 ENDIF
1404*
1405* Send solution to left
1406*
1407 IF( mycol-level_dist .GE. 0 ) THEN
1408*
1409 CALL cgesd2d( ictxt, int_one, nrhs,
1410 $ b( part_offset+odd_size+1 ),
1411 $ lldb, 0, mycol-level_dist )
1412*
1413 ENDIF
1414*
1415 GOTO 52
1416 51 CONTINUE
1417* [End of GOTO Loop]
1418*
1419 54 CONTINUE
1420* [Processor npcol - 1 jumped to here to await next stage]
1421*
1422*******************************
1423* Reduced system has been solved, communicate solutions to nearest
1424* neighbors in preparation for local computation phase.
1425*
1426*
1427* Send elements of solution to next proc
1428*
1429 IF( mycol .LT. npcol-1) THEN
1430*
1431 CALL cgesd2d( ictxt, int_one, nrhs,
1432 $ b( part_offset+odd_size+1 ), lldb,
1433 $ 0, mycol +1 )
1434*
1435 ENDIF
1436*
1437* Receive modifications to processor's right hand sides
1438*
1439 IF( mycol .GT. 0) THEN
1440*
1441 CALL cgerv2d( ictxt, int_one, nrhs,
1442 $ work( 1 ), int_one,
1443 $ 0, mycol - 1 )
1444*
1445 ENDIF
1446*
1447*
1448*
1449**********************************************
1450* Local computation phase
1451**********************************************
1452*
1453 IF ( mycol .NE. 0 ) THEN
1454* Use the "spike" fillin to calculate contribution from previous
1455* processor's solution.
1456*
1457 CALL cgemm( 'C', 'N', odd_size, nrhs, 1, -cone, af( 1 ),
1458 $ int_one, work( 1 ), int_one, cone,
1459 $ b( part_offset+1 ), lldb )
1460*
1461 ENDIF
1462*
1463*
1464 IF ( mycol .LT. np-1 ) THEN
1465* Use factorization of odd-even connection block to modify
1466* locally stored portion of right hand side(s)
1467*
1468 CALL caxpy( nrhs, -e( part_offset+odd_size ),
1469 $ b( part_offset+odd_size+1 ), lldb,
1470 $ b( part_offset+odd_size ), lldb )
1471*
1472 ENDIF
1473*
1474* Use main partition in each processor to solve locally
1475*
1476 CALL cpttrsv( uplo, 'N', odd_size, nrhs, d( part_offset+1 ),
1477 $ e( part_offset+1 ), b( part_offset+1 ), lldb,
1478 $ info )
1479*
1480 ENDIF
1481* End of "IF( LSAME( TRANS, 'N' ) )"...
1482*
1483*
1484 ENDIF
1485* End of "IF( LSAME( UPLO, 'L' ) )"...
1486 1000 CONTINUE
1487*
1488*
1489* Free BLACS space used to hold standard-form grid.
1490*
1491 IF( ictxt_save .NE. ictxt_new ) THEN
1492 CALL blacs_gridexit( ictxt_new )
1493 ENDIF
1494*
1495 1234 CONTINUE
1496*
1497* Restore saved input parameters
1498*
1499 ictxt = ictxt_save
1500 np = np_save
1501*
1502* Output minimum worksize
1503*
1504 work( 1 ) = work_size_min
1505*
1506*
1507 RETURN
1508*
1509* End of PCPTTRSV
1510*
1511 END
subroutine cmatadd(m, n, alpha, a, lda, beta, c, ldc)
Definition cmatadd.f:2
subroutine cpttrsv(uplo, trans, n, nrhs, d, e, b, ldb, info)
Definition cpttrsv.f:3
subroutine desc_convert(desc_in, desc_out, info)
Definition desc_convert.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine globchk(ictxt, n, x, ldx, iwork, info)
Definition pchkxmat.f:403
subroutine pcpttrsv(uplo, trans, n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pcpttrsv.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
void reshape(Int *context_in, Int *major_in, Int *context_out, Int *major_out, Int *first_proc, Int *nprow_new, Int *npcol_new)
Definition reshape.c:80