SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pbdtrnv.f
Go to the documentation of this file.
1 SUBROUTINE pbdtrnv( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX,
2 $ BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL,
3 $ WORK )
4*
5* -- PB-BLAS routine (version 2.1) --
6* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7* April 28, 1996
8*
9* Jaeyoung Choi, Oak Ridge National Laboratory
10* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11* David Walker, Oak Ridge National Laboratory
12*
13* .. Scalar Arguments ..
14 CHARACTER*1 TRANS, XDIST
15 INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16 $ iyrow, n, nb, nz
17 DOUBLE PRECISION BETA
18* ..
19* .. Array Arguments ..
20 DOUBLE PRECISION WORK( * ), X( * ), Y( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PBDTRNV transposes a column vector to row vector, or a row vector to
27* column vector by reallocating data distribution.
28*
29* Y := X'
30*
31* where X and Y are N vectors.
32*
33* Parameters
34* ==========
35*
36* ICONTXT (input) INTEGER
37* ICONTXT is the BLACS mechanism for partitioning communication
38* space. A defining property of a context is that a message in
39* a context cannot be sent or received in another context. The
40* BLACS context includes the definition of a grid, and each
41* process' coordinates in it.
42*
43* XDIST (input) CHARACTER*1
44* XDIST specifies whether X is a column vector or a row vector,
45*
46* XDIST = 'C', X is a column vector (distributed columnwise)
47* XDIST = 'R', X is a row vector (distributed rowwise)
48*
49* TRANS (input) CHARACTER*1
50* TRANS specifies whether the transposed format is transpose
51* or conjugate transpose. If the vectors X and Y are real,
52* the argument is ignored.
53*
54* TRANS = 'T', transpose
55* TRANS = 'C', conjugate transpose
56*
57* N (input) INTEGER
58* N specifies the (global) number of the vector X and the
59* vector Y. N >= 0.
60*
61* NB (input) INTEGER
62* NB specifies the block size of vectors X and Y. NB >= 0.
63*
64* NZ (input) INTEGER
65* NZ is the column offset to specify the column distance from
66* the beginning of the block to the first element of the
67* vector X, and the row offset to the first element of the
68* vector Y if XDIST = 'C'.
69* Otherwise, it is row offset to specify the row distance
70* from the beginning of the block to the first element of the
71* vector X, and the column offset to the first element of the
72* vector Y. 0 < NZ <= NB.
73*
74* X (input) DOUBLE PRECISION array of dimension at least
75* ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76* ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77* The incremented array X must contain the vector X.
78*
79* INCX (input) INTEGER
80* INCX specifies the increment for the elements of X.
81* INCX <> 0.
82*
83* BETA (input) DOUBLE PRECISION
84* BETA specifies scaler beta.
85*
86* Y (input/output) DOUBLE PRECISION array of dimension at least
87* ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88* ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89* The incremented array Y must contain the vector Y.
90* Y will not be referenced if beta is zero.
91*
92* INCY (input) INTEGER
93* INCY specifies the increment for the elements of Y.
94* INCY <> 0.
95*
96* IXROW (input) INTEGER
97* IXROW specifies a row of the process template, which holds
98* the first element of the vector X. If X is a row vector and
99* all rows of processes have a copy of X, then set IXROW = -1.
100*
101* IXCOL (input) INTEGER
102* IXCOL specifies a column of the process template,
103* which holds the first element of the vector X. If X is a
104* column block and all columns of processes have a copy of X,
105* then set IXCOL = -1.
106*
107* IYROW (input) INTEGER
108* IYROW specifies the current row process which holds the
109* first element of the vector Y, which is transposed of X.
110* If X is a column vector and the transposed row vector Y is
111* distributed all rows of processes, set IYROW = -1.
112*
113* IYCOL (input) INTEGER
114* IYCOL specifies the current column process which holds
115* the first element of the vector Y, which is transposed of Y.
116* If X is a row block and the transposed column vector Y is
117* distributed all columns of processes, set IYCOL = -1.
118*
119* WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK).
120* It needs extra working space of x**T or x**H.
121*
122* Parameters Details
123* ==================
124*
125* Nx It is a local portion of N owned by a process, where x is
126* replaced by either p (=NPROW) or q (=NPCOL)). The value is
127* determined by N, NB, NZ, x, and MI, where NB is a block size,
128* NZ is a offset from the beginning of the block, and MI is a
129* row or column position in a process template. Nx is equal
130* to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131*
132* Communication Scheme
133* ====================
134*
135* The communication scheme of the routine is set to '1-tree', which is
136* fan-out. (For details, see BLACS user's guide.)
137*
138* Memory Requirement of WORK
139* ==========================
140*
141* NN = N + NZ
142* Npb = CEIL( NN, NB*NPROW )
143* Nqb = CEIL( NN, NB*NPCOL )
144* LCMP = LCM / NPROW
145* LCMQ = LCM / NPCOL
146*
147* (1) XDIST = 'C'
148* (a) IXCOL != -1
149* Size(WORK) = CEIL(Nqb,LCMQ)*NB
150* (b) IXCOL = -1
151* Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152*
153* (2) XDIST = 'R'
154* (a) IXROW != -1
155* Size(WORK) = CEIL(Npb,LCMP)*NB
156* (b) IXROW = -1
157* Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158*
159* Notes
160* -----
161* More precise space can be computed as
162*
163* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164* CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165*
166* =====================================================================
167*
168* .. Parameters ..
169 DOUBLE PRECISION ONE, ZERO
170 PARAMETER ( ONE = 1.0d+0, zero = 0.0d+0 )
171* ..
172* .. Local Scalars ..
173 LOGICAL COLFORM, ROWFORM
174 INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
175 $ lcm, lcmp, lcmq, mccol, mcrow, mrcol, mrrow,
176 $ mycol, myrow, nn, np, np0, np1, npcol, nprow,
177 $ nq, nq0, nq1
178 DOUBLE PRECISION TBETA
179* ..
180* .. External Functions ..
181 LOGICAL LSAME
182 INTEGER ILCM, ICEIL, NUMROC
183 EXTERNAL lsame, ilcm, iceil, numroc
184* ..
185* .. External Subroutines ..
186 EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,
187 $ dgesd2d, pbdtr2a1, pbdtr2b1, pbdtrget,
189* ..
190* .. Intrinsic Functions ..
191 INTRINSIC max, min, mod
192* ..
193* .. Executable Statements ..
194*
195* Quick return if possible.
196*
197 IF( n.EQ.0 ) RETURN
198*
199 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
200*
201 colform = lsame( xdist, 'C' )
202 rowform = lsame( xdist, 'R' )
203*
204* Test the input parameters.
205*
206 info = 0
207 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
208 info = 2
209 ELSE IF( n .LT.0 ) THEN
210 info = 4
211 ELSE IF( nb .LT.1 ) THEN
212 info = 5
213 ELSE IF( nz .LT.0 .OR. nz.GE.nb ) THEN
214 info = 6
215 ELSE IF( incx.EQ.0 ) THEN
216 info = 8
217 ELSE IF( incy.EQ.0 ) THEN
218 info = 11
219 ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
220 $ ( ixrow.EQ.-1 .AND. colform ) ) THEN
221 info = 12
222 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
223 $ ( ixcol.EQ.-1 .AND. rowform ) ) THEN
224 info = 13
225 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
226 $ ( iyrow.EQ.-1 .AND. rowform ) ) THEN
227 info = 14
228 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
229 $ ( iycol.EQ.-1 .AND. colform ) ) THEN
230 info = 15
231 END IF
232*
233 10 CONTINUE
234 IF( info.NE.0 ) THEN
235 CALL pxerbla( icontxt, 'PBDTRNV ', info )
236 RETURN
237 END IF
238*
239* Start the operations.
240*
241* LCM : the least common multiple of NPROW and NPCOL
242*
243 lcm = ilcm( nprow, npcol )
244 lcmp = lcm / nprow
245 lcmq = lcm / npcol
246 igd = npcol / lcmp
247 nn = n + nz
248*
249* When x is a column vector
250*
251 IF( colform ) THEN
252*
253* Form y <== x' ( x is a column vector )
254*
255* ||
256* ||
257* _____________ ||
258* -----(y)----- <== (x)
259* ||
260* ||
261* ||
262*
263 IF( ixrow.LT.0 .OR. ixrow.GE.nprow ) THEN
264 info = 12
265 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN
266 info = 13
267 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN
268 info = 14
269 ELSE IF( iycol.LT.0 .OR. iycol.GE.npcol ) THEN
270 info = 15
271 END IF
272 IF( info.NE.0 ) GO TO 10
273*
274* MRROW : row relative position in template from IXROW
275* MRCOL : column relative position in template from IYCOL
276*
277 mrrow = mod( nprow+myrow-ixrow, nprow )
278 mrcol = mod( npcol+mycol-iycol, npcol )
279 jyrow = iyrow
280 IF( iyrow.EQ.-1 ) jyrow = ixrow
281*
282 np = numroc( nn, nb, myrow, ixrow, nprow )
283 IF( mrrow.EQ.0 ) np = np - nz
284 nq = numroc( nn, nb, mycol, iycol, npcol )
285 IF( mrcol.EQ.0 ) nq = nq - nz
286 nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )
287*
288* When a column process of IXCOL has a column block A,
289*
290 IF( ixcol .GE. 0 ) THEN
291 tbeta = zero
292 IF( myrow.EQ.jyrow ) tbeta = beta
293 kz = nz
294*
295 DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1
296 mcrow = mod( mod(i, nprow) + ixrow, nprow )
297 mccol = mod( mod(i, npcol) + iycol, npcol )
298 IF( lcmq.EQ.1 ) nq0 = numroc( nn, nb, i, 0, npcol )
299 jdex = (i/npcol) * nb
300 IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)
301*
302* A source node copies the blocks to WORK, and send it
303*
304 IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN
305*
306* The source node is a destination node
307*
308 idex = (i/nprow) * nb
309 IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )
310 IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
311 CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
312 $ x(idex*incx+1), incx, tbeta,
313 $ y(jdex*incy+1), incy, lcmp, lcmq )
314*
315* The source node sends blocks to a destination node
316*
317 ELSE
318 CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
319 $ x(idex*incx+1), incx, zero, work, 1,
320 $ lcmp, 1 )
321 CALL dgesd2d( icontxt, 1, nq0-kz, work, 1,
322 $ jyrow, mccol )
323 END IF
324*
325* A destination node receives the copied vector
326*
327 ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
328 IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
329 CALL dgerv2d( icontxt, 1, nq0-kz, y, incy,
330 $ mcrow, ixcol )
331 ELSE
332 CALL dgerv2d( icontxt, 1, nq0-kz, work, 1,
333 $ mcrow, ixcol )
334 CALL pbdtr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,
335 $ y(jdex*incy+1), incy, lcmq*nb )
336 END IF
337 END IF
338 kz = 0
339 20 CONTINUE
340*
341* Broadcast a row block of WORK in each column of template
342*
343 IF( iyrow.EQ.-1 ) THEN
344 IF( myrow.EQ.jyrow ) THEN
345 CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
346 ELSE
347 CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
348 $ jyrow, mycol )
349 END IF
350 END IF
351*
352* When all column procesors have a copy of the column block A,
353*
354 ELSE
355 IF( lcmq.EQ.1 ) nq0 = nq
356*
357* Processors, which have diagonal blocks of X, copy them to
358* WORK array in transposed form
359*
360 kz = 0
361 IF( mrrow.EQ.0 ) kz = nz
362 jz = 0
363 IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz
364*
365 DO 30 i = 0, lcmp - 1
366 IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN
367 idex = max( 0, i*nb-kz )
368 IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN
369 CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
370 $ x(idex*incx+1), incx, beta, y, incy,
371 $ lcmp, 1 )
372 ELSE
373 CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
374 $ x(idex*incx+1), incx, zero, work, 1,
375 $ lcmp, 1 )
376 END IF
377 END IF
378 30 CONTINUE
379*
380* Get diagonal blocks of A for each column of the template
381*
382 mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )
383 IF( lcmq.GT.1 ) THEN
384 mccol = mod( npcol+mycol-iycol, npcol )
385 CALL pbdtrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),
386 $ work, 1, mcrow, mccol, igd, myrow, mycol,
387 $ nprow, npcol )
388 END IF
389*
390* Broadcast a row block of WORK in every row of template
391*
392 IF( iyrow.EQ.-1 ) THEN
393 IF( myrow.EQ.mcrow ) THEN
394 IF( lcmq.GT.1 ) THEN
395 kz = 0
396 IF( mycol.EQ.iycol ) kz = nz
397 CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
398 $ beta, y, incy, lcmp, lcmq, nq0 )
399 END IF
400 CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
401 ELSE
402 CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
403 $ mcrow, mycol )
404 END IF
405*
406* Send a row block of WORK to the destination row
407*
408 ELSE
409 IF( lcmq.EQ.1 ) THEN
410 IF( myrow.EQ.mcrow ) THEN
411 IF( myrow.NE.iyrow )
412 $ CALL dgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )
413 ELSE IF( myrow.EQ.iyrow ) THEN
414 IF( beta.EQ.zero ) THEN
415 CALL dgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )
416 ELSE
417 CALL dgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )
418 CALL pbdvecadd( icontxt, 'G', nq0, one, work, 1,
419 $ beta, y, incy )
420 END IF
421 END IF
422*
423 ELSE
424 nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )
425 IF( myrow.EQ.mcrow ) THEN
426 IF( myrow.NE.iyrow )
427 $ CALL dgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )
428 ELSE IF( myrow.EQ.iyrow ) THEN
429 CALL dgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )
430 END IF
431*
432 IF( myrow.EQ.iyrow ) THEN
433 kz = 0
434 IF( mycol.EQ.iycol ) kz = nz
435 CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
436 $ beta, y, incy, lcmp, lcmq, nq0 )
437 END IF
438 END IF
439 END IF
440 END IF
441*
442* When x is a row vector
443*
444 ELSE
445*
446* Form y <== x' ( x is a row block )
447*
448* ||
449* ||
450* || _____________
451* (y) <== -----(x)-----
452* ||
453* ||
454* ||
455*
456 IF( ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN
457 info = 12
458 ELSE IF( ixcol.LT.0 .OR. ixcol.GE.npcol ) THEN
459 info = 13
460 ELSE IF( iyrow.LT.0 .OR. iyrow.GE.nprow ) THEN
461 info = 14
462 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN
463 info = 15
464 END IF
465 IF( info.NE.0 ) GO TO 10
466*
467* MRROW : row relative position in template from IYROW
468* MRCOL : column relative position in template from IXCOL
469*
470 mrrow = mod( nprow+myrow-iyrow, nprow )
471 mrcol = mod( npcol+mycol-ixcol, npcol )
472 jycol = iycol
473 IF( iycol.EQ.-1 ) jycol = ixcol
474*
475 np = numroc( nn, nb, myrow, iyrow, nprow )
476 IF( mrrow.EQ.0 ) np = np - nz
477 nq = numroc( nn, nb, mycol, ixcol, npcol )
478 IF( mrcol.EQ.0 ) nq = nq - nz
479 np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )
480*
481* When a row process of IXROW has a row block A,
482*
483 IF( ixrow .GE. 0 ) THEN
484 tbeta = zero
485 IF( mycol.EQ.jycol ) tbeta = beta
486 kz = nz
487*
488 DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1
489 mcrow = mod( mod(i, nprow) + iyrow, nprow )
490 mccol = mod( mod(i, npcol) + ixcol, npcol )
491 IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )
492 jdex = (i/nprow) * nb
493 IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)
494*
495* A source node copies the blocks to WORK, and send it
496*
497 IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN
498*
499* The source node is a destination node
500*
501 idex = (i/npcol) * nb
502 IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )
503 IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
504 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
505 $ x(idex*incx+1), incx, tbeta,
506 $ y(jdex*incy+1), incy, lcmq, lcmp )
507*
508* The source node sends blocks to a destination node
509*
510 ELSE
511 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
512 $ x(idex*incx+1), incx, zero, work, 1,
513 $ lcmq, 1 )
514 CALL dgesd2d( icontxt, 1, np0-kz, work, 1,
515 $ mcrow, jycol )
516 END IF
517*
518* A destination node receives the copied blocks
519*
520 ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
521 IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
522 CALL dgerv2d( icontxt, 1, np0-kz, y, incy,
523 $ ixrow, mccol )
524 ELSE
525 CALL dgerv2d( icontxt, 1, np0-kz, work, 1,
526 $ ixrow, mccol )
527 CALL pbdtr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,
528 $ y(jdex*incy+1), incy, lcmp*nb )
529 END IF
530 END IF
531 kz = 0
532 40 CONTINUE
533*
534* Broadcast a column vector Y in each row of template
535*
536 IF( iycol.EQ.-1 ) THEN
537 IF( mycol.EQ.jycol ) THEN
538 CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
539 ELSE
540 CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
541 $ myrow, jycol )
542 END IF
543 END IF
544*
545* When all row procesors have a copy of the row block A,
546*
547 ELSE
548 IF( lcmp.EQ.1 ) np0 = np
549*
550* Processors, which have diagonal blocks of A, copy them to
551* WORK array in transposed form
552*
553 kz = 0
554 IF( mrcol.EQ.0 ) kz = nz
555 jz = 0
556 IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
557*
558 DO 50 i = 0, lcmq-1
559 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
560 idex = max( 0, i*nb-kz )
561 IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
562 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
563 $ x(idex*incx+1), incx, beta, y, incy,
564 $ lcmq, 1 )
565 ELSE
566 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
567 $ x(idex*incx+1), incx, zero, work, 1,
568 $ lcmq, 1 )
569 END IF
570 END IF
571 50 CONTINUE
572*
573* Get diagonal blocks of A for each row of the template
574*
575 mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
576 IF( lcmp.GT.1 ) THEN
577 mcrow = mod( nprow+myrow-iyrow, nprow )
578 CALL pbdtrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
579 $ work, 1, mcrow, mccol, igd, myrow, mycol,
580 $ nprow, npcol )
581 END IF
582*
583* Broadcast a column block of WORK in every column of template
584*
585 IF( iycol.EQ.-1 ) THEN
586 IF( mycol.EQ.mccol ) THEN
587 IF( lcmp.GT.1 ) THEN
588 kz = 0
589 IF( myrow.EQ.iyrow ) kz = nz
590 CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
591 $ beta, y, incy, lcmp, lcmq, np0 )
592 END IF
593 CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
594 ELSE
595 CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
596 $ myrow, mccol )
597 END IF
598*
599* Send a column block of WORK to the destination column
600*
601 ELSE
602 IF( lcmp.EQ.1 ) THEN
603 IF( mycol.EQ.mccol ) THEN
604 IF( mycol.NE.iycol )
605 $ CALL dgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
606 ELSE IF( mycol.EQ.iycol ) THEN
607 IF( beta.EQ.zero ) THEN
608 CALL dgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
609 ELSE
610 CALL dgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
611 CALL pbdvecadd( icontxt, 'G', np, one, work, 1, beta,
612 $ y, incy )
613 END IF
614 END IF
615*
616 ELSE
617 np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
618 IF( mycol.EQ.mccol ) THEN
619 IF( mycol.NE.iycol )
620 $ CALL dgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
621 ELSE IF( mycol.EQ.iycol ) THEN
622 CALL dgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
623 END IF
624*
625 IF( mycol.EQ.iycol ) THEN
626 kz = 0
627 IF( myrow.EQ.iyrow ) kz = nz
628 CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
629 $ beta, y, incy, lcmp, lcmq, np0 )
630 END IF
631 END IF
632 END IF
633 END IF
634 END IF
635*
636 RETURN
637*
638* End of PBDTRNV
639*
640 END
641*
642*=======================================================================
643* SUBROUTINE PBDTR2A1
644*=======================================================================
645*
646 SUBROUTINE pbdtr2a1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY,
647 $ INTV )
648*
649* -- PB-BLAS routine (version 2.1) --
650* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
651* April 28, 1996
652*
653* .. Scalar Arguments ..
654 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV
655 DOUBLE PRECISION BETA
656* ..
657* .. Array Arguments ..
658 DOUBLE PRECISION X( * ), Y( * )
659* ..
660*
661* Purpose
662* =======
663*
664* y <== x
665* y is a scattered vector, copied from a condensed vector x.
666*
667* ..
668* .. Intrinsic Functions ..
669 INTRINSIC min
670* ..
671* .. External Functions ..
672 INTEGER ICEIL
673 EXTERNAL ICEIL
674* ..
675* .. External Subroutines ..
676 EXTERNAL pbdvecadd
677* ..
678* .. Parameters ..
679 DOUBLE PRECISION ONE
680 PARAMETER ( ONE = 1.0d+0 )
681* ..
682* .. Local Variables ..
683 INTEGER IX, IY, JZ, K, ITER
684*
685 IX = 0
686 iy = 0
687 jz = nz
688 iter = iceil( n+nz, intv )
689*
690 IF( iter.GT.1 ) THEN
691 CALL pbdvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
692 $ beta, y(iy*incy+1), incy )
693 ix = ix + nb - jz
694 iy = iy + intv - jz
695 jz = 0
696*
697 DO 10 k = 2, iter-1
698 CALL pbdvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
699 $ beta, y(iy*incy+1), incy )
700 ix = ix + nb
701 iy = iy + intv
702 10 CONTINUE
703 END IF
704*
705 CALL pbdvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
706 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
707*
708 RETURN
709*
710* End of PBDTR2A1
711*
712 END
713*
714*=======================================================================
715* SUBROUTINE PBDTR2B1
716*=======================================================================
717*
718 SUBROUTINE pbdtr2b1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y,
719 $ INCY, JINX, JINY )
720*
721* -- PB-BLAS routine (version 2.1) --
722* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
723* April 28, 1996
724*
725* .. Scalar Arguments ..
726 CHARACTER*1 TRANS
727 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
728 DOUBLE PRECISION BETA
729* ..
730* .. Array Arguments ..
731 DOUBLE PRECISION X( * ), Y( * )
732* ..
733*
734* Purpose
735* =======
736*
737* y <== x + beta * y
738* y is a condensed vector, copied from a scattered vector x
739*
740* ..
741* .. Intrinsic Functions ..
742 INTRINSIC min
743* ..
744* .. External Functions ..
745 INTEGER ICEIL
746 EXTERNAL iceil
747* ..
748* .. External Subroutines ..
749 EXTERNAL pbdvecadd
750* ..
751* .. Parameters ..
752 DOUBLE PRECISION ONE
753 parameter( one = 1.0d+0 )
754* ..
755* .. Local Variables ..
756 INTEGER IX, IY, JZ, K, ITER, LENX, LENY
757*
758 IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
759 CALL pbdvecadd( icontxt, trans, n, one, x, incx, beta,
760 $ y, incy )
761*
762 ELSE
763 ix = 0
764 iy = 0
765 jz = nz
766 lenx = nb * jinx
767 leny = nb * jiny
768 iter = iceil( n+nz, lenx )
769*
770 IF( iter.GT.1 ) THEN
771 CALL pbdvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
772 $ incx, beta, y(iy*incy+1), incy )
773 ix = ix + lenx - jz
774 iy = iy + leny - jz
775 jz = 0
776*
777 DO 10 k = 2, iter-1
778 CALL pbdvecadd( icontxt, trans, nb, one, x(ix*incx+1),
779 $ incx, beta, y(iy*incy+1), incy )
780 ix = ix + lenx
781 iy = iy + leny
782 10 CONTINUE
783 END IF
784*
785 CALL pbdvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
786 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
787 END IF
788*
789 RETURN
790*
791* End of PBDTR2B1
792*
793 END
subroutine pbdtrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbdtrget.f:3
subroutine pbdtrnv(icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
Definition pbdtrnv.f:4
subroutine pbdtr2a1(icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
Definition pbdtrnv.f:648
subroutine pbdtr2b1(icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)
Definition pbdtrnv.f:720
subroutine pbdtrst1(icontxt, xdist, n, nb, nz, x, incx, beta, y, incy, lcmp, lcmq, nint)
Definition pbdtrst1.f:3
subroutine pbdvecadd(icontxt, mode, n, alpha, x, incx, beta, y, incy)
Definition pbdvecadd.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2