ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pddbtrs.f
Go to the documentation of this file.
1  SUBROUTINE pddbtrs( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB,
2  $ DESCB, AF, LAF, WORK, LWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * April 3, 2000
8 *
9 * .. Scalar Arguments ..
10  CHARACTER TRANS
11  INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCB( * )
15  DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * )
16 * ..
17 *
18 *
19 * Purpose
20 * =======
21 *
22 * PDDBTRS solves a system of linear equations
23 *
24 * A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
25 * or
26 * A(1:N, JA:JA+N-1)' * X = B(IB:IB+N-1, 1:NRHS)
27 *
28 * where A(1:N, JA:JA+N-1) is the matrix used to produce the factors
29 * stored in A(1:N,JA:JA+N-1) and AF by PDDBTRF.
30 * A(1:N, JA:JA+N-1) is an N-by-N real
31 * banded diagonally dominant-like distributed
32 * matrix with bandwidth BWL, BWU.
33 *
34 * Routine PDDBTRF MUST be called first.
35 *
36 * =====================================================================
37 *
38 * Arguments
39 * =========
40 *
41 *
42 * TRANS (global input) CHARACTER
43 * = 'N': Solve with A(1:N, JA:JA+N-1);
44 * = 'T' or 'C': Solve with A(1:N, JA:JA+N-1)^T;
45 *
46 * N (global input) INTEGER
47 * The number of rows and columns to be operated on, i.e. the
48 * order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
49 *
50 * BWL (global input) INTEGER
51 * Number of subdiagonals. 0 <= BWL <= N-1
52 *
53 * BWU (global input) INTEGER
54 * Number of superdiagonals. 0 <= BWU <= N-1
55 *
56 * NRHS (global input) INTEGER
57 * The number of right hand sides, i.e., the number of columns
58 * of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
59 * NRHS >= 0.
60 *
61 * A (local input/local output) DOUBLE PRECISION pointer into
62 * local memory to an array with first dimension
63 * LLD_A >=(bwl+bwu+1) (stored in DESCA).
64 * On entry, this array contains the local pieces of the
65 * N-by-N unsymmetric banded distributed Cholesky factor L or
66 * L^T A(1:N, JA:JA+N-1).
67 * This local portion is stored in the packed banded format
68 * used in LAPACK. Please see the Notes below and the
69 * ScaLAPACK manual for more detail on the format of
70 * distributed matrices.
71 *
72 * JA (global input) INTEGER
73 * The index in the global array A that points to the start of
74 * the matrix to be operated on (which may be either all of A
75 * or a submatrix of A).
76 *
77 * DESCA (global and local input) INTEGER array of dimension DLEN.
78 * if 1D type (DTYPE_A=501), DLEN >= 7;
79 * if 2D type (DTYPE_A=1), DLEN >= 9 .
80 * The array descriptor for the distributed matrix A.
81 * Contains information of mapping of A to memory. Please
82 * see NOTES below for full description and options.
83 *
84 * B (local input/local output) DOUBLE PRECISION pointer into
85 * local memory to an array of local lead dimension lld_b>=NB.
86 * On entry, this array contains the
87 * the local pieces of the right hand sides
88 * B(IB:IB+N-1, 1:NRHS).
89 * On exit, this contains the local piece of the solutions
90 * distributed matrix X.
91 *
92 * IB (global input) INTEGER
93 * The row index in the global array B that points to the first
94 * row of the matrix to be operated on (which may be either
95 * all of B or a submatrix of B).
96 *
97 * DESCB (global and local input) INTEGER array of dimension DLEN.
98 * if 1D type (DTYPE_B=502), DLEN >=7;
99 * if 2D type (DTYPE_B=1), DLEN >= 9.
100 * The array descriptor for the distributed matrix B.
101 * Contains information of mapping of B to memory. Please
102 * see NOTES below for full description and options.
103 *
104 * AF (local output) DOUBLE PRECISION array, dimension LAF.
105 * Auxiliary Fillin Space.
106 * Fillin is created during the factorization routine
107 * PDDBTRF and this is stored in AF. If a linear system
108 * is to be solved using PDDBTRS after the factorization
109 * routine, AF *must not be altered* after the factorization.
110 *
111 * LAF (local input) INTEGER
112 * Size of user-input Auxiliary Fillin space AF. Must be >=
113 * NB*(bwl+bwu)+6*max(bwl,bwu)*max(bwl,bwu)
114 * If LAF is not large enough, an error code will be returned
115 * and the minimum acceptable size will be returned in AF( 1 )
116 *
117 * WORK (local workspace/local output)
118 * DOUBLE PRECISION temporary workspace. This space may
119 * be overwritten in between calls to routines. WORK must be
120 * the size given in LWORK.
121 * On exit, WORK( 1 ) contains the minimal LWORK.
122 *
123 * LWORK (local input or global input) INTEGER
124 * Size of user-input workspace WORK.
125 * If LWORK is too small, the minimal acceptable size will be
126 * returned in WORK(1) and an error code is returned. LWORK>=
127 * (max(bwl,bwu)*NRHS)
128 *
129 * INFO (global output) INTEGER
130 * = 0: successful exit
131 * < 0: If the i-th argument is an array and the j-entry had
132 * an illegal value, then INFO = -(i*100+j), if the i-th
133 * argument is a scalar and had an illegal value, then
134 * INFO = -i.
135 *
136 * =====================================================================
137 *
138 *
139 * Restrictions
140 * ============
141 *
142 * The following are restrictions on the input parameters. Some of these
143 * are temporary and will be removed in future releases, while others
144 * may reflect fundamental technical limitations.
145 *
146 * Non-cyclic restriction: VERY IMPORTANT!
147 * P*NB>= mod(JA-1,NB)+N.
148 * The mapping for matrices must be blocked, reflecting the nature
149 * of the divide and conquer algorithm as a task-parallel algorithm.
150 * This formula in words is: no processor may have more than one
151 * chunk of the matrix.
152 *
153 * Blocksize cannot be too small:
154 * If the matrix spans more than one processor, the following
155 * restriction on NB, the size of each block on each processor,
156 * must hold:
157 * NB >= 2*MAX(BWL,BWU)
158 * The bulk of parallel computation is done on the matrix of size
159 * O(NB) on each processor. If this is too small, divide and conquer
160 * is a poor choice of algorithm.
161 *
162 * Submatrix reference:
163 * JA = IB
164 * Alignment restriction that prevents unnecessary communication.
165 *
166 *
167 * =====================================================================
168 *
169 *
170 * Notes
171 * =====
172 *
173 * If the factorization routine and the solve routine are to be called
174 * separately (to solve various sets of righthand sides using the same
175 * coefficient matrix), the auxiliary space AF *must not be altered*
176 * between calls to the factorization routine and the solve routine.
177 *
178 * The best algorithm for solving banded and tridiagonal linear systems
179 * depends on a variety of parameters, especially the bandwidth.
180 * Currently, only algorithms designed for the case N/P >> bw are
181 * implemented. These go by many names, including Divide and Conquer,
182 * Partitioning, domain decomposition-type, etc.
183 *
184 * Algorithm description: Divide and Conquer
185 *
186 * The Divide and Conqer algorithm assumes the matrix is narrowly
187 * banded compared with the number of equations. In this situation,
188 * it is best to distribute the input matrix A one-dimensionally,
189 * with columns atomic and rows divided amongst the processes.
190 * The basic algorithm divides the banded matrix up into
191 * P pieces with one stored on each processor,
192 * and then proceeds in 2 phases for the factorization or 3 for the
193 * solution of a linear system.
194 * 1) Local Phase:
195 * The individual pieces are factored independently and in
196 * parallel. These factors are applied to the matrix creating
197 * fillin, which is stored in a non-inspectable way in auxiliary
198 * space AF. Mathematically, this is equivalent to reordering
199 * the matrix A as P A P^T and then factoring the principal
200 * leading submatrix of size equal to the sum of the sizes of
201 * the matrices factored on each processor. The factors of
202 * these submatrices overwrite the corresponding parts of A
203 * in memory.
204 * 2) Reduced System Phase:
205 * A small (max(bwl,bwu)* (P-1)) system is formed representing
206 * interaction of the larger blocks, and is stored (as are its
207 * factors) in the space AF. A parallel Block Cyclic Reduction
208 * algorithm is used. For a linear system, a parallel front solve
209 * followed by an analagous backsolve, both using the structure
210 * of the factored matrix, are performed.
211 * 3) Backsubsitution Phase:
212 * For a linear system, a local backsubstitution is performed on
213 * each processor in parallel.
214 *
215 *
216 * Descriptors
217 * ===========
218 *
219 * Descriptors now have *types* and differ from ScaLAPACK 1.0.
220 *
221 * Note: banded codes can use either the old two dimensional
222 * or new one-dimensional descriptors, though the processor grid in
223 * both cases *must be one-dimensional*. We describe both types below.
224 *
225 * Each global data object is described by an associated description
226 * vector. This vector stores the information required to establish
227 * the mapping between an object element and its corresponding process
228 * and memory location.
229 *
230 * Let A be a generic term for any 2D block cyclicly distributed array.
231 * Such a global array has an associated description vector DESCA.
232 * In the following comments, the character _ should be read as
233 * "of the global array".
234 *
235 * NOTATION STORED IN EXPLANATION
236 * --------------- -------------- --------------------------------------
237 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
238 * DTYPE_A = 1.
239 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
240 * the BLACS process grid A is distribu-
241 * ted over. The context itself is glo-
242 * bal, but the handle (the integer
243 * value) may vary.
244 * M_A (global) DESCA( M_ ) The number of rows in the global
245 * array A.
246 * N_A (global) DESCA( N_ ) The number of columns in the global
247 * array A.
248 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
249 * the rows of the array.
250 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
251 * the columns of the array.
252 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
253 * row of the array A is distributed.
254 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
255 * first column of the array A is
256 * distributed.
257 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
258 * array. LLD_A >= MAX(1,LOCr(M_A)).
259 *
260 * Let K be the number of rows or columns of a distributed matrix,
261 * and assume that its process grid has dimension p x q.
262 * LOCr( K ) denotes the number of elements of K that a process
263 * would receive if K were distributed over the p processes of its
264 * process column.
265 * Similarly, LOCc( K ) denotes the number of elements of K that a
266 * process would receive if K were distributed over the q processes of
267 * its process row.
268 * The values of LOCr() and LOCc() may be determined via a call to the
269 * ScaLAPACK tool function, NUMROC:
270 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
271 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
272 * An upper bound for these quantities may be computed by:
273 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
274 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
275 *
276 *
277 * One-dimensional descriptors:
278 *
279 * One-dimensional descriptors are a new addition to ScaLAPACK since
280 * version 1.0. They simplify and shorten the descriptor for 1D
281 * arrays.
282 *
283 * Since ScaLAPACK supports two-dimensional arrays as the fundamental
284 * object, we allow 1D arrays to be distributed either over the
285 * first dimension of the array (as if the grid were P-by-1) or the
286 * 2nd dimension (as if the grid were 1-by-P). This choice is
287 * indicated by the descriptor type (501 or 502)
288 * as described below.
289 *
290 * IMPORTANT NOTE: the actual BLACS grid represented by the
291 * CTXT entry in the descriptor may be *either* P-by-1 or 1-by-P
292 * irrespective of which one-dimensional descriptor type
293 * (501 or 502) is input.
294 * This routine will interpret the grid properly either way.
295 * ScaLAPACK routines *do not support intercontext operations* so that
296 * the grid passed to a single ScaLAPACK routine *must be the same*
297 * for all array descriptors passed to that routine.
298 *
299 * NOTE: In all cases where 1D descriptors are used, 2D descriptors
300 * may also be used, since a one-dimensional array is a special case
301 * of a two-dimensional array with one dimension of size unity.
302 * The two-dimensional array used in this case *must* be of the
303 * proper orientation:
304 * If the appropriate one-dimensional descriptor is DTYPEA=501
305 * (1 by P type), then the two dimensional descriptor must
306 * have a CTXT value that refers to a 1 by P BLACS grid;
307 * If the appropriate one-dimensional descriptor is DTYPEA=502
308 * (P by 1 type), then the two dimensional descriptor must
309 * have a CTXT value that refers to a P by 1 BLACS grid.
310 *
311 *
312 * Summary of allowed descriptors, types, and BLACS grids:
313 * DTYPE 501 502 1 1
314 * BLACS grid 1xP or Px1 1xP or Px1 1xP Px1
315 * -----------------------------------------------------
316 * A OK NO OK NO
317 * B NO OK NO OK
318 *
319 * Note that a consequence of this chart is that it is not possible
320 * for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
321 * to opposite requirements for the orientation of the BLACS grid,
322 * and as noted before, the *same* BLACS context must be used in
323 * all descriptors in a single ScaLAPACK subroutine call.
324 *
325 * Let A be a generic term for any 1D block cyclicly distributed array.
326 * Such a global array has an associated description vector DESCA.
327 * In the following comments, the character _ should be read as
328 * "of the global array".
329 *
330 * NOTATION STORED IN EXPLANATION
331 * --------------- ---------- ------------------------------------------
332 * DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
333 * TYPE_A = 501: 1-by-P grid.
334 * TYPE_A = 502: P-by-1 grid.
335 * CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
336 * the BLACS process grid A is distribu-
337 * ted over. The context itself is glo-
338 * bal, but the handle (the integer
339 * value) may vary.
340 * N_A (global) DESCA( 3 ) The size of the array dimension being
341 * distributed.
342 * NB_A (global) DESCA( 4 ) The blocking factor used to distribute
343 * the distributed dimension of the array.
344 * SRC_A (global) DESCA( 5 ) The process row or column over which the
345 * first row or column of the array
346 * is distributed.
347 * LLD_A (local) DESCA( 6 ) The leading dimension of the local array
348 * storing the local blocks of the distri-
349 * buted array A. Minimum value of LLD_A
350 * depends on TYPE_A.
351 * TYPE_A = 501: LLD_A >=
352 * size of undistributed dimension, 1.
353 * TYPE_A = 502: LLD_A >=NB_A, 1.
354 * Reserved DESCA( 7 ) Reserved for future use.
355 *
356 *
357 *
358 * =====================================================================
359 *
360 * Code Developer: Andrew J. Cleary, University of Tennessee.
361 * Current address: Lawrence Livermore National Labs.
362 *
363 * =====================================================================
364 *
365 * .. Parameters ..
366  INTEGER INT_ONE
367  parameter( int_one = 1 )
368  INTEGER DESCMULT, BIGNUM
369  parameter( descmult = 100, bignum = descmult*descmult )
370  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
371  $ lld_, mb_, m_, nb_, n_, rsrc_
372  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
373  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
374  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
375 * ..
376 * .. Local Scalars ..
377  INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
378  $ idum2, idum3, ja_new, llda, lldb, mycol, myrow,
379  $ nb, np, npcol, nprow, np_save, part_offset,
380  $ return_code, store_m_b, store_n_a,
381  $ work_size_min
382 * ..
383 * .. Local Arrays ..
384  INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
385  $ param_check( 17, 3 )
386 * ..
387 * .. External Subroutines ..
388  EXTERNAL blacs_gridexit, blacs_gridinfo, desc_convert,
390 * ..
391 * .. External Functions ..
392  LOGICAL LSAME
393  EXTERNAL lsame
394 * ..
395 * .. Intrinsic Functions ..
396  INTRINSIC ichar, max, mod
397 * ..
398 * .. Executable Statements ..
399 *
400 * Test the input parameters
401 *
402  info = 0
403 *
404 * Convert descriptor into standard form for easy access to
405 * parameters, check that grid is of right shape.
406 *
407  desca_1xp( 1 ) = 501
408  descb_px1( 1 ) = 502
409 *
410  CALL desc_convert( desca, desca_1xp, return_code )
411 *
412  IF( return_code.NE.0 ) THEN
413  info = -( 8*100+2 )
414  END IF
415 *
416  CALL desc_convert( descb, descb_px1, return_code )
417 *
418  IF( return_code.NE.0 ) THEN
419  info = -( 11*100+2 )
420  END IF
421 *
422 * Consistency checks for DESCA and DESCB.
423 *
424 * Context must be the same
425  IF( desca_1xp( 2 ).NE.descb_px1( 2 ) ) THEN
426  info = -( 11*100+2 )
427  END IF
428 *
429 * These are alignment restrictions that may or may not be removed
430 * in future releases. -Andy Cleary, April 14, 1996.
431 *
432 * Block sizes must be the same
433  IF( desca_1xp( 4 ).NE.descb_px1( 4 ) ) THEN
434  info = -( 11*100+4 )
435  END IF
436 *
437 * Source processor must be the same
438 *
439  IF( desca_1xp( 5 ).NE.descb_px1( 5 ) ) THEN
440  info = -( 11*100+5 )
441  END IF
442 *
443 * Get values out of descriptor for use in code.
444 *
445  ictxt = desca_1xp( 2 )
446  csrc = desca_1xp( 5 )
447  nb = desca_1xp( 4 )
448  llda = desca_1xp( 6 )
449  store_n_a = desca_1xp( 3 )
450  lldb = descb_px1( 6 )
451  store_m_b = descb_px1( 3 )
452 *
453 * Get grid parameters
454 *
455 *
456  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
457  np = nprow*npcol
458 *
459 *
460 *
461  IF( lsame( trans, 'N' ) ) THEN
462  idum2 = ichar( 'N' )
463  ELSE IF( lsame( trans, 'T' ) ) THEN
464  idum2 = ichar( 'T' )
465  ELSE IF( lsame( trans, 'C' ) ) THEN
466  idum2 = ichar( 'T' )
467  ELSE
468  info = -1
469  END IF
470 *
471  IF( lwork.LT.-1 ) THEN
472  info = -15
473  ELSE IF( lwork.EQ.-1 ) THEN
474  idum3 = -1
475  ELSE
476  idum3 = 1
477  END IF
478 *
479  IF( n.LT.0 ) THEN
480  info = -2
481  END IF
482 *
483  IF( n+ja-1.GT.store_n_a ) THEN
484  info = -( 8*100+6 )
485  END IF
486 *
487  IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) ) THEN
488  info = -3
489  END IF
490 *
491  IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) ) THEN
492  info = -4
493  END IF
494 *
495  IF( llda.LT.( bwl+bwu+1 ) ) THEN
496  info = -( 8*100+6 )
497  END IF
498 *
499  IF( nb.LE.0 ) THEN
500  info = -( 8*100+4 )
501  END IF
502 *
503  IF( n+ib-1.GT.store_m_b ) THEN
504  info = -( 11*100+3 )
505  END IF
506 *
507  IF( lldb.LT.nb ) THEN
508  info = -( 11*100+6 )
509  END IF
510 *
511  IF( nrhs.LT.0 ) THEN
512  info = -5
513  END IF
514 *
515 * Current alignment restriction
516 *
517  IF( ja.NE.ib ) THEN
518  info = -7
519  END IF
520 *
521 * Argument checking that is specific to Divide & Conquer routine
522 *
523  IF( nprow.NE.1 ) THEN
524  info = -( 8*100+2 )
525  END IF
526 *
527  IF( n.GT.np*nb-mod( ja-1, nb ) ) THEN
528  info = -( 2 )
529  CALL pxerbla( ictxt, 'PDDBTRS, D&C alg.: only 1 block per proc'
530  $ , -info )
531  RETURN
532  END IF
533 *
534  IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*max( bwl, bwu ) ) ) THEN
535  info = -( 8*100+4 )
536  CALL pxerbla( ictxt, 'PDDBTRS, D&C alg.: NB too small', -info )
537  RETURN
538  END IF
539 *
540 *
541  work_size_min = ( max( bwl, bwu )*nrhs )
542 *
543  work( 1 ) = work_size_min
544 *
545  IF( lwork.LT.work_size_min ) THEN
546  IF( lwork.NE.-1 ) THEN
547  info = -15
548  CALL pxerbla( ictxt, 'PDDBTRS: worksize error', -info )
549  END IF
550  RETURN
551  END IF
552 *
553 * Pack params and positions into arrays for global consistency check
554 *
555  param_check( 17, 1 ) = descb( 5 )
556  param_check( 16, 1 ) = descb( 4 )
557  param_check( 15, 1 ) = descb( 3 )
558  param_check( 14, 1 ) = descb( 2 )
559  param_check( 13, 1 ) = descb( 1 )
560  param_check( 12, 1 ) = ib
561  param_check( 11, 1 ) = desca( 5 )
562  param_check( 10, 1 ) = desca( 4 )
563  param_check( 9, 1 ) = desca( 3 )
564  param_check( 8, 1 ) = desca( 1 )
565  param_check( 7, 1 ) = ja
566  param_check( 6, 1 ) = nrhs
567  param_check( 5, 1 ) = bwu
568  param_check( 4, 1 ) = bwl
569  param_check( 3, 1 ) = n
570  param_check( 2, 1 ) = idum3
571  param_check( 1, 1 ) = idum2
572 *
573  param_check( 17, 2 ) = 1105
574  param_check( 16, 2 ) = 1104
575  param_check( 15, 2 ) = 1103
576  param_check( 14, 2 ) = 1102
577  param_check( 13, 2 ) = 1101
578  param_check( 12, 2 ) = 10
579  param_check( 11, 2 ) = 805
580  param_check( 10, 2 ) = 804
581  param_check( 9, 2 ) = 803
582  param_check( 8, 2 ) = 801
583  param_check( 7, 2 ) = 7
584  param_check( 6, 2 ) = 5
585  param_check( 5, 2 ) = 4
586  param_check( 4, 2 ) = 3
587  param_check( 3, 2 ) = 2
588  param_check( 2, 2 ) = 15
589  param_check( 1, 2 ) = 1
590 *
591 * Want to find errors with MIN( ), so if no error, set it to a big
592 * number. If there already is an error, multiply by the the
593 * descriptor multiplier.
594 *
595  IF( info.GE.0 ) THEN
596  info = bignum
597  ELSE IF( info.LT.-descmult ) THEN
598  info = -info
599  ELSE
600  info = -info*descmult
601  END IF
602 *
603 * Check consistency across processors
604 *
605  CALL globchk( ictxt, 17, param_check, 17, param_check( 1, 3 ),
606  $ info )
607 *
608 * Prepare output: set info = 0 if no error, and divide by DESCMULT
609 * if error is not in a descriptor entry.
610 *
611  IF( info.EQ.bignum ) THEN
612  info = 0
613  ELSE IF( mod( info, descmult ).EQ.0 ) THEN
614  info = -info / descmult
615  ELSE
616  info = -info
617  END IF
618 *
619  IF( info.LT.0 ) THEN
620  CALL pxerbla( ictxt, 'PDDBTRS', -info )
621  RETURN
622  END IF
623 *
624 * Quick return if possible
625 *
626  IF( n.EQ.0 )
627  $ RETURN
628 *
629  IF( nrhs.EQ.0 )
630  $ RETURN
631 *
632 *
633 * Adjust addressing into matrix space to properly get into
634 * the beginning part of the relevant data
635 *
636  part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
637 *
638  IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb ) THEN
639  part_offset = part_offset + nb
640  END IF
641 *
642  IF( mycol.LT.csrc ) THEN
643  part_offset = part_offset - nb
644  END IF
645 *
646 * Form a new BLACS grid (the "standard form" grid) with only procs
647 * holding part of the matrix, of size 1xNP where NP is adjusted,
648 * starting at csrc=0, with JA modified to reflect dropped procs.
649 *
650 * First processor to hold part of the matrix:
651 *
652  first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
653 *
654 * Calculate new JA one while dropping off unused processors.
655 *
656  ja_new = mod( ja-1, nb ) + 1
657 *
658 * Save and compute new value of NP
659 *
660  np_save = np
661  np = ( ja_new+n-2 ) / nb + 1
662 *
663 * Call utility routine that forms "standard-form" grid
664 *
665  CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
666  $ int_one, np )
667 *
668 * Use new context from standard grid as context.
669 *
670  ictxt_save = ictxt
671  ictxt = ictxt_new
672  desca_1xp( 2 ) = ictxt_new
673  descb_px1( 2 ) = ictxt_new
674 *
675 * Get information about new grid.
676 *
677  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
678 *
679 * Drop out processors that do not have part of the matrix.
680 *
681  IF( myrow.LT.0 ) THEN
682  GO TO 20
683  END IF
684 *
685 *
686 *
687 * Begin main code
688 *
689  info = 0
690 *
691 * Call frontsolve routine
692 *
693  IF( lsame( trans, 'N' ) ) THEN
694 *
695  CALL pddbtrsv( 'L', 'N', n, bwl, bwu, nrhs, a( part_offset+1 ),
696  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
697  $ work, lwork, info )
698 *
699  ELSE
700 *
701  CALL pddbtrsv( 'U', 'T', n, bwl, bwu, nrhs, a( part_offset+1 ),
702  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
703  $ work, lwork, info )
704 *
705  END IF
706 *
707 * Call backsolve routine
708 *
709  IF( ( lsame( trans, 'C' ) ) .OR. ( lsame( trans, 'T' ) ) ) THEN
710 *
711  CALL pddbtrsv( 'L', 'T', n, bwl, bwu, nrhs, a( part_offset+1 ),
712  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
713  $ work, lwork, info )
714 *
715  ELSE
716 *
717  CALL pddbtrsv( 'U', 'N', n, bwl, bwu, nrhs, a( part_offset+1 ),
718  $ ja_new, desca_1xp, b, ib, descb_px1, af, laf,
719  $ work, lwork, info )
720 *
721  END IF
722  10 CONTINUE
723 *
724 *
725 * Free BLACS space used to hold standard-form grid.
726 *
727  IF( ictxt_save.NE.ictxt_new ) THEN
728  CALL blacs_gridexit( ictxt_new )
729  END IF
730 *
731  20 CONTINUE
732 *
733 * Restore saved input parameters
734 *
735  ictxt = ictxt_save
736  np = np_save
737 *
738 * Output minimum worksize
739 *
740  work( 1 ) = work_size_min
741 *
742 *
743  RETURN
744 *
745 * End of PDDBTRS
746 *
747  END
globchk
subroutine globchk(ICTXT, N, X, LDX, IWORK, INFO)
Definition: pchkxmat.f:403
max
#define max(A, B)
Definition: pcgemr.c:180
pddbtrsv
subroutine pddbtrsv(UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pddbtrsv.f:3
reshape
void reshape(int *context_in, int *major_in, int *context_out, int *major_out, int *first_proc, int *nprow_new, int *npcol_new)
Definition: reshape.c:77
desc_convert
subroutine desc_convert(DESC_IN, DESC_OUT, INFO)
Definition: desc_convert.f:2
pddbtrs
subroutine pddbtrs(TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, AF, LAF, WORK, LWORK, INFO)
Definition: pddbtrs.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2