C      ALGORITHM 826, COLLECTED ALGORITHMS FROM ACM.
C      THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE,
C      VOL. 29,NO. 3, September, 2003, P.  326--336.
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	SLmake.inc
#	Src/
#	Src/Makefile
#	Src/README
#	Src/install
#	Src/pblas.h
#	Src/pzaxpy.c
#	Src/pzdotc.c
#	Src/pzdotu.c
#	Src/pzlaconsb.f
#	Src/pzlacp3.f
#	Src/pzlahqr.f
#	Src/pzlasmsub.f
#	Src/pzlatrs.f
#	Src/pzlawil.f
#	Src/pzrot.c
#	Src/pztrevc.f
#	Src/zlahqr.f
#	Src/zlamsh.f
#	Src/zlanv2.f
#	Src/zlaref.f
#	Testing/
#	Testing/EVC.dat
#	Testing/Makefile
#	Testing/NEP.dat
#	Testing/README
#	Testing/pzevcdriver.f
#	Testing/pzevcinfo.f
#	Testing/pzget22.f
#	Testing/pznepdriver.f
#	Testing/pznepfchk.f
#	Testing/pznepinfo.f
# This archive created: Wed Apr 16 10:13:06 2003
export PATH; PATH=/bin:$PATH
if test -f 'SLmake.inc'
then
	echo shar: will not over-write existing file "'SLmake.inc'"
else
cat << "SHAR_EOF" > 'SLmake.inc'
FORTRAN = f90
OPTS = -O3
DRVOPTS  = $(OPTS)
NOOPT    =
LOADER   = f90
LOADOPTS =

CC = cc
CCFLAGS = $(OPTS)
CDEFS = -DAdd_ $(USEMPI)

ARCH     = ar
ARCHFLAGS= cr
RANLIB   = echo

MACH = origin
PLAT = _irix65

SCALAPACK = /usr/local/usp/PETtools/lib/libscalapackn32.a
BLACSMPI = /usr/local/usp/PETtools/lib/libmpiblacsn32.a

BLASLIB     = -lblas
LAPACKLIB    = $(HOME)/LAPACK/lapack$(PLAT).a
MATGENLIB    = $(HOME)/LAPACK/tmglib$(PLAT).a
PXLAHQRLIB    = $(HOME)/$(MACH)/ScaLAPACK/pxlahqrlib.a

SERLIBS = $(MATGENLIB) $(LAPACKLIB) $(BLASLIB)
PARLIBS = $(PXLAHQRLIB) $(SCALAPACK) $(BLACSMPI) $(SERLIBS) -lscs -lmpi

SHAR_EOF
fi # end of overwriting check
if test ! -d 'Src'
then
	mkdir 'Src'
fi
cd 'Src'
if test -f 'Makefile'
then
	echo shar: will not over-write existing file "'Makefile'"
else
cat << "SHAR_EOF" > 'Makefile'
include ../../SLmake.inc

ZQRSRC = pzlahqr.o pzlaconsb.o pzlasmsub.o pzlacp3.o pzlawil.o pzrot.o \
         zlamsh.o zlaref.o zlanv2.o zlahqr.o
ZVCSRC = pztrevc.o 
#pzlatrs.o pzaxpy.o pzdotc.o pzdotu.o

all: complex16

complex16: $(ZQRSRC) $(ZVCSRC)
	$(ARCH) $(ARCHFLAGS) $(PXLAHQRLIB) $(ZQRSRC) $(ZVCSRC)
	$(RANLIB) $(PXLAHQRLIB)

$(ZQRSRC): $(FRC)
$(ZVCSRC): $(FRC)

FRC:
	@FRC=$(FRC)

clean:
	rm -f *.o

.c.o: 
	$(CC) $(CDEFS) $(CCFLAGS) -c $*.c

.f.o: 
	$(FORTRAN) $(OPTS) -c $*.f

SHAR_EOF
fi # end of overwriting check
if test -f 'README'
then
	echo shar: will not over-write existing file "'README'"
else
cat << "SHAR_EOF" > 'README'
Files included in this distribution
===================================

Makefile      README        pblas.h       pzaxpy.c      pzdotu.c 
pzdotc.c      pzlaconsb.f   pzlacp3.f     pzlahqr.f     pzlatrs.f 
pzlasmsub.f   pzlawil.f     pzrot.c       pztrevc.f     zlahqr.f
zlamsh.f      zlanv2.f      zlaref.f


List of computational routines with dependencies
================================================

PZLAHQR
  PZLACONSB
  PZLACP3
  PZLASMSUB
  PZLAWIL
  PZROT
  ZLAHQR
  ZLAMSH
  ZLANV2
  ZLAREF
PZTREVC
  PZLATRS (optional)
    PZAXPY (optional)
    PZDOTC (optional)
    PZDOTU (optional)


Notes
=====
The routine ZLAHQR included in this distribution is not the same as found 
in LAPACK 1.0.  This routine uses a multiple double-shift strategy as is 
found in DLAHQR.  This is included for completeness, but is not needed.  It 
can be removed from the list of codes in the Makefile so that the default
LAPACK ZLAHQR is used instead.

PZLATRS is an updated version of the routine included in ScaLAPACK 1.0.
It has been updated to use scaling.  It is, however, very slow in comparision 
to PZTRSV which theoretically does the same computation.  Although slow,
it controls scaling for ill-conditioned problems.

PZLATRS calls some PBLAS and BLAS routines.  I found that some of the 
PBLAS routines (ScaLAPACK version 1.0) each have an undocumented bug in 
them.  They are
  PZAXPY  line 363    &desc_Y[LLD_] -> incy
  PZDOTU  line 472    ixcol -> iycol
  PZDOTC  line 471    ixcol -> iycol
Thus, I have included these three update level 1 PBLAS routines with this 
distribution.  
SHAR_EOF
fi # end of overwriting check
if test -f 'install'
then
	echo shar: will not over-write existing file "'install'"
else
cat << "SHAR_EOF" > 'install'
%\section{Installation}
%   \label{sec:install}

%The SBR toolbox comes with a {\tt UNIX} makefile for easy installation and a
%testing program for validation and performance tuning.
%
%In this section we briefly describe the ``standard'' installation procedure.
%Detailed information about the installation process may be found in the
%\NAME{README} file distributed with the software.
%
%The installation consists of the following steps.
%\begin{enumerate}
%   \item Get the SBR toolbox from the TOMS repository at \texttt{netlib}
%	   and unpack it with
%	   \COMMAND{zcat sbr.tar.Z | tar xf -}%
%	   This command 
%	   puts all the SBR software into a new directory \texttt{sbr}.
%   \item Edit the file \NAME{make.inc} to match your system setup (e.g., the
%	   location of the LAPACK library, if the latter is installed on your
%	   machine).
%   \item Type
%	   \COMMAND{make library}%
%	   This command will build the library (called \NAME{libSBR.a} if you
%	   did not change the name).
%   \item (Optional.) Fine-tune the performance of the algorithms (cf.\ the
%	   \NAME{README} file).
%   \item (Recommended.) Run the validation tests by typing
%	   \COMMAND{make checks}%
%	   and have a look at the output files \NAME{DOUTCHK} and \NAME{SOUTCHK}.
%	   They should report no ``skipped'' or ``failed'' tests.
%   \item (Optional.) Run the additional timings (cf.\ the \NAME{README} file).
%   \item (Optional.) Move the SBR library to a directory searched by
%	   the linker.
%\end{enumerate}
%
%The testing programs provide the residuals $|| U \tilde A - A U ||_{F}$,
%the orthogonality errors $|| U^{T} U - I ||_{F}$, and the timings for the
%reduction of a symmetric (full or banded) matrix $A$ to a (narrower) banded
%or tridiagonal matrix $\tilde A$ for matrices of different orders $n$ and
%different semibandwidths.
%If the orthogonal matrix $U$ is not accumulated then the above error
%measures are not available.
%In this case the deviation of the eigenvalues,
%$|| \mbox{spec}( A ) - \mbox{spec}( \tilde A ) ||_{2}$, is computed instead,
%where $\mbox{spec}( A )$ and $\mbox{spec}( \tilde A )$ are the eigenvalues
%of $A$ and $\tilde A$, respectively, in ascending order.

SHAR_EOF
fi # end of overwriting check
if test -f 'pblas.h'
then
	echo shar: will not over-write existing file "'pblas.h'"
else
cat << "SHAR_EOF" > 'pblas.h'
/* ---------------------------------------------------------------------
*
*  -- ScaLAPACK routine (version 1.0) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     November 17, 1996
*
*  ---------------------------------------------------------------------
*/

/*
* This file includes the standard C libraries, as well as system
* dependent include files.  All PBLAS routines include this file.
*/

/*
* ========================================================================
* Machine Specific PBLAS macros
* ========================================================================
*/

#define _HAL_           0
#define _T3D_           1

#ifdef T3D
#define _MACH_          _T3D_
#endif

#ifndef _MACH_
#define _MACH_          _HAL_
#endif

/*
* ========================================================================
* Include files
* ========================================================================
*/
#include <stdio.h>
#include <stdlib.h>

#if( _MACH_ == _T3D_ )
#include <fortran.h>
#endif

/*
* ========================================================================
* FORTRAN <-> C interface
* ========================================================================
*
* These macros define how the PBLAS will be called. _F2C_ADD_ assumes 
* that they will be called by FORTRAN, which expects C routines to have
* an underscore postfixed to the name (Suns, and Intel machines expect
* this). _F2C_NOCHANGE indicates that FORTRAN will be calling, and that
* it expects the name called by FORTRAN to be identical to that compiled
* by the C (RS6K's do this).  _F2C_UPCASE says it expects C routines
* called by FORTRAN to be in all upcase (CRAY wants this).   
* _F2C_F77ISF2C indicates that the fortran "compiler" in use is
* actually f2c, a FORTRAN to C converter.
*/

#define _F2C_ADD_       0
#define _F2C_NOCHANGE   1
#define _F2C_UPCASE     2
#define _F2C_F77ISF2C   3

#ifdef UpCase
#define _F2C_CALL_      _F2C_UPCASE
#endif

#ifdef NoChange
#define _F2C_CALL_      _F2C_NOCHANGE
#endif

#ifdef Add_
#define _F2C_CALL_      _F2C_ADD_
#endif

#ifdef f77IsF2C
#define _F2C_CALL_      _F2C_F77ISF2C
#endif

#ifndef _F2C_CALL_
#define _F2C_CALL_      _F2C_ADD_
#endif

/*
* ========================================================================
* TYPE DEFINITIONS AND CONVERSION UTILITIES
* ========================================================================
*/

typedef struct { float  re, im; } complex;
typedef struct { double re, im; } complex16;

#if( _MACH_ == _T3D_ )
#define float  double
                       /* Type of character argument in a FORTRAN call */
#define F_CHAR          _fcd
                                     /* Character conversion utilities */
#define F2C_CHAR(a)     ( _fcdtocp( (a) ) )
#define C2F_CHAR(a)     ( _cptofcd( (a), 1 ) )
                                          /* Type of FORTRAN functions */
#define F_VOID_FCT      void   fortran                   /* Subroutine */
#define F_INTG_FCT      int    fortran             /* INTEGER function */
#define F_DBLE_FCT      double fortran    /* DOUBLE PRECISION function */

#else
                       /* Type of character argument in a FORTRAN call */
typedef char *          F_CHAR;
                                     /* Character conversion utilities */
#define F2C_CHAR(a)     (a)
#define C2F_CHAR(a)     (a)
                                          /* Type of FORTRAN functions */
#define F_VOID_FCT      void                             /* Subroutine */
#define F_INTG_FCT      int                        /* INTEGER function */
#define F_DBLE_FCT      double            /* DOUBLE PRECISION function */

#endif

/*
* ========================================================================
* #DEFINE MACRO CONSTANTS
* ========================================================================
*/
#define    DLEN_        9                     /* Length of a descriptor */
#define    DT_          0                     /* Descriptor Type        */
#define    CTXT_        1                              /* BLACS context */
#define    M_           2                      /* Global Number of Rows */
#define    N_           3                   /* Global Number of Columns */
#define    MB_          4                          /* Row Blocking Size */
#define    NB_          5                       /* Column Blocking Size */
#define    RSRC_        6                     /* Starting Processor Row */
#define    CSRC_        7                  /* Starting Processor Column */
#define    LLD_         8                    /* Local Leading Dimension */

/*
 * Descriptor types
 */
#define    BLOCK_CYCLIC_2D                1
#define    BLOCK_CYCLIC_INB_2D            2

#define    BROADCAST    "B"              /* Blacs operation definitions */
#define    COMBINE      "C"

#define    ALL          "A"                        /* Scope definitions */
#define    COLUMN       "C"
#define    ROW          "R"

#define    TOPDEF       " " /* Default BLACS topology, PB-BLAS routines */
#define    CTOPDEF      ' '
#define    TOPGET       "!"

#define    YES          "Y"
#define    NO           "N"

#define    MULLENFAC    2

#define    ONE          1.0
#define    ZERO         0.0

/*
* ========================================================================
* PREPROCESSOR MACRO FUNCTIONS USED FOR OPTIMIZATION & CONVENIENCE
* ========================================================================
*/

#define ABS(a)   (((a) < 0) ? -(a) : (a))

#define MIN(a,b) (((a) < (b)) ? (a) : (b))

#define MAX(a,b) (((a) > (b)) ? (a) : (b))

#define CEIL(a,b) ( ((a)+(b)-1) / (b) )

#define Mlowcase(C) ( ((C) > 64 && (C) < 91) ? (C) | 32 : (C) )

#define Mupcase(C) ( ((C) > 96 && (C) < 123) ? (C) & 0xDF : (C) )

#define INDXG2L( iglob, nb, iproc, isrcproc, nprocs )\
    ( (nb) * ( ( (iglob)-1) / ( (nb) * (nprocs) ) ) +\
      ( ( (iglob) - 1 ) % (nb) ) + 1 )

#define INDXL2G( iloc, nb, iproc, isrcproc, nprocs )\
    ( (nprocs) * (nb) * ( ( (iloc) - 1 ) / (nb) ) +\
      ( ( (iloc) - 1 ) % (nb) ) +\
      ( ( (nprocs) + (iproc) - (isrcproc) ) % (nprocs) ) * (nb) + 1 )

#define INDXG2P( iglob, nb, iproc, isrcproc, nprocs ) \
    ( ( (isrcproc) + ( (iglob) - 1 ) / (nb) ) % (nprocs) )

#define MYROC0( nblocks, n, nb, nprocs )\
  ( ( (nblocks) % (nprocs) ) ? ( ( (nblocks) / (nprocs) ) * (nb) + (nb) )\
                   : ( ( (nblocks) / (nprocs) )* (nb) + ( (n) % (nb) ) ) )

#if( _F2C_CALL_ == _F2C_ADD_ )
/*
* These defines set up the naming scheme required to have a FORTRAN
* routine call a C routine (which is what the PBLAS are written in).
* No redefinition necessary to have following FORTRAN to C interface:
*           FORTRAN CALL               C DECLARATION
*           call pdgemm(...)           void pdgemm_(...)
*
* This is the default.
*/

#endif

#if( _F2C_CALL_ == _F2C_UPCASE )
/*
* These defines set up the naming scheme required to have a FORTRAN
* routine call a C routine (which is what the PBLAS are written in)
* following FORTRAN to C interface:
*           FORTRAN CALL               C DECLARATION
*           call pdgemm(...)           void PDGEMM(...)
*/
                                                            /* TOOLS */
#define ilcm_             ILCM
#define infog2l_          INFOG2L
#define numroc_           NUMROC
#define pstreecomb_       PSTREECOMB
#define pdtreecomb_       PDTREECOMB
#define pctreecomb_       PCTREECOMB
#define pztreecomb_       PZTREECOMB
#define scombamax_        SCOMBAMAX
#define dcombamax_        DCOMBAMAX
#define ccombamax_        CCOMBAMAX
#define zcombamax_        ZCOMBAMAX
#define scombnrm2_        SCOMBNRM2
#define dcombnrm2_        DCOMBNRM2
                                                            /* BLACS */
#define blacs_abort_      BLACS_ABORT
#define blacs_gridinfo_   BLACS_GRIDINFO

#define igesd2d_          IGESD2D
#define igebs2d_          IGEBS2D
#define itrsd2d_          ITRSD2D
#define itrbs2d_          ITRBS2D
#define igerv2d_          IGERV2D
#define igebr2d_          IGEBR2D
#define itrrv2d_          ITRRV2D
#define itrbr2d_          ITRBR2D
#define igamx2d_          IGAMX2D
#define igamn2d_          IGAMN2D
#define igsum2d_          IGSUM2D

#define sgesd2d_          SGESD2D
#define sgebs2d_          SGEBS2D
#define strsd2d_          STRSD2D
#define strbs2d_          STRBS2D
#define sgerv2d_          SGERV2D
#define sgebr2d_          SGEBR2D
#define strrv2d_          STRRV2D
#define strbr2d_          STRBR2D
#define sgamx2d_          SGAMX2D
#define sgamn2d_          SGAMN2D
#define sgsum2d_          SGSUM2D

#define dgesd2d_          DGESD2D
#define dgebs2d_          DGEBS2D
#define dtrsd2d_          DTRSD2D
#define dtrbs2d_          DTRBS2D
#define dgerv2d_          DGERV2D
#define dgebr2d_          DGEBR2D
#define dtrrv2d_          DTRRV2D
#define dtrbr2d_          DTRBR2D
#define dgamx2d_          DGAMX2D
#define dgamn2d_          DGAMN2D
#define dgsum2d_          DGSUM2D

#define cgesd2d_          CGESD2D
#define cgebs2d_          CGEBS2D
#define ctrsd2d_          CTRSD2D
#define ctrbs2d_          CTRBS2D
#define cgerv2d_          CGERV2D
#define cgebr2d_          CGEBR2D
#define ctrrv2d_          CTRRV2D
#define ctrbr2d_          CTRBR2D
#define cgamx2d_          CGAMX2D
#define cgamn2d_          CGAMN2D
#define cgsum2d_          CGSUM2D

#define zgesd2d_          ZGESD2D
#define zgebs2d_          ZGEBS2D
#define ztrsd2d_          ZTRSD2D
#define ztrbs2d_          ZTRBS2D
#define zgerv2d_          ZGERV2D
#define zgebr2d_          ZGEBR2D
#define ztrrv2d_          ZTRRV2D
#define ztrbr2d_          ZTRBR2D
#define zgamx2d_          ZGAMX2D
#define zgamn2d_          ZGAMN2D
#define zgsum2d_          ZGSUM2D
                                                     /* Level-1 BLAS */
#define srotg_            SROTG
#define srotmg_           SROTMG
#define srot_             SROT
#define srotm_            SROTM
#define sswap_            SSWAP
#define sscal_            SSCAL
#define scopy_            SCOPY
#define saxpy_            SAXPY
#define ssdot_            SSDOT
#define isamax_           ISAMAX

#define drotg_            DROTG
#define drotmg_           DROTMG
#define drot_             DROT
#define drotm_            DROTM
#define dswap_            DSWAP
#define dscal_            DSCAL
#define dcopy_            DCOPY
#define daxpy_            DAXPY
#define dddot_            DDDOT
#define dnrm2_            DNRM2
#define dsnrm2_           DSNRM2
#define dasum_            DASUM
#define dsasum_           DSASUM
#define idamax_           IDAMAX

#define cswap_            CSWAP
#define cscal_            CSCAL
#define csscal_           CSSCAL
#define ccopy_            CCOPY
#define caxpy_            CAXPY
#define ccdotu_           CCDOTU
#define ccdotc_           CCDOTC
#define icamax_           ICAMAX

#define zswap_            ZSWAP
#define zscal_            ZSCAL
#define zdscal_           ZDSCAL
#define zcopy_            ZCOPY
#define zaxpy_            ZAXPY
#define zzdotu_           ZZDOTU
#define zzdotc_           ZZDOTC
#define dscnrm2_          DSCNRM2
#define dznrm2_           DZNRM2
#define dscasum_          DSCASUM
#define dzasum_           DZASUM
#define izamax_           IZAMAX
                                                     /* Level-2 BLAS */
#define sgemv_            SGEMV
#define ssymv_            SSYMV
#define strmv_            STRMV
#define strsv_            STRSV
#define sger_             SGER
#define ssyr_             SSYR
#define ssyr2_            SSYR2

#define dgemv_            DGEMV
#define dsymv_            DSYMV
#define dtrmv_            DTRMV
#define dtrsv_            DTRSV
#define dger_             DGER
#define dsyr_             DSYR
#define dsyr2_            DSYR2

#define cgemv_            CGEMV
#define chemv_            CHEMV
#define ctrmv_            CTRMV
#define ctrsv_            CTRSV
#define cgeru_            CGERU
#define cgerc_            CGERC
#define cher_             CHER
#define cher2_            CHER2

#define zgemv_            ZGEMV
#define zhemv_            ZHEMV
#define ztrmv_            ZTRMV
#define ztrsv_            ZTRSV
#define zgeru_            ZGERU
#define zgerc_            ZGERC
#define zher_             ZHER
#define zher2_            ZHER2
                                                     /* Level-3 BLAS */
#define sgemm_            SGEMM
#define ssymm_            SSYMM
#define ssyrk_            SSYRK
#define ssyr2k_           SSYR2K
#define strmm_            STRMM
#define strsm_            STRSM

#define dgemm_            DGEMM
#define dsymm_            DSYMM
#define dsyrk_            DSYRK
#define dsyr2k_           DSYR2K
#define dtrmm_            DTRMM
#define dtrsm_            DTRSM

#define cgemm_            CGEMM
#define chemm_            CHEMM
#define csymm_            CSYMM
#define csyrk_            CSYRK
#define cherk_            CHERK
#define csyr2k_           CSYR2K
#define cher2k_           CHER2K
#define ctrmm_            CTRMM
#define ctrsm_            CTRSM

#define zgemm_            ZGEMM
#define zhemm_            ZHEMM
#define zsymm_            ZSYMM
#define zsyrk_            ZSYRK
#define zherk_            ZHERK
#define zsyr2k_           ZSYR2K
#define zher2k_           ZHER2K
#define ztrmm_            ZTRMM
#define ztrsm_            ZTRSM
                                   /* absolute value auxiliary PBLAS */
#define psatrmv_          PSATRMV
#define pdatrmv_          PDATRMV
#define pcatrmv_          PCATRMV
#define pzatrmv_          PZATRMV
#define psagemv_          PSAGEMV
#define pdagemv_          PDAGEMV
#define pcagemv_          PCAGEMV
#define pzagemv_          PZAGEMV
#define psasymv_          PSASYMV
#define pdasymv_          PDASYMV
#define pcahemv_          PCAHEMV
#define pzahemv_          PZAHEMV
                                                /* Auxiliary PB-BLAS */
#define pbcmatadd_        PBCMATADD
#define pbdmatadd_        PBDMATADD
#define pbsmatadd_        PBSMATADD
#define pbzmatadd_        PBZMATADD
                                                   /* Level-2 PBBLAS */
#define pbcgemv_          PBCGEMV
#define pbcgeru_          PBCGERU
#define pbcgerc_          PBCGERC
#define pbchemv_          PBCHEMV
#define pbcher_           PBCHER
#define pbcher2_          PBCHER2
#define pbctrmv_          PBCTRMV
#define pbctrnv_          PBCTRNV
#define pbctrsv_          PBCTRSV

#define pbdgemv_          PBDGEMV
#define pbdger_           PBDGER
#define pbdsymv_          PBDSYMV
#define pbdsyr_           PBDSYR
#define pbdsyr2_          PBDSYR2
#define pbdtrmv_          PBDTRMV
#define pbdtrnv_          PBDTRNV
#define pbdtrsv_          PBDTRSV

#define pbsgemv_          PBSGEMV
#define pbsger_           PBSGER
#define pbssymv_          PBSSYMV
#define pbssyr_           PBSSYR
#define pbssyr2_          PBSSYR2
#define pbstrmv_          PBSTRMV
#define pbstrnv_          PBSTRNV
#define pbstrsv_          PBSTRSV

#define pbzgemv_          PBZGEMV
#define pbzgeru_          PBZGERU
#define pbzgerc_          PBZGERC
#define pbzhemv_          PBZHEMV
#define pbzher_           PBZHER
#define pbzher2_          PBZHER2
#define pbztrmv_          PBZTRMV
#define pbztrnv_          PBZTRNV
#define pbztrsv_          PBZTRSV
                                                   /* Level-3 PBBLAS */
#define pbcgemm_          PBCGEMM
#define pbchemm_          PBCHEMM
#define pbcher2k_         PBCHER2K
#define pbcherk_          PBCHERK
#define pbcsymm_          PBCSYMM
#define pbcsyr2k_         PBCSYR2K
#define pbcsyrk_          PBCSYRK
#define pbctrmm_          PBCTRMM
#define pbctrsm_          PBCTRSM
#define pbctran_          PBCTRAN

#define pbdgemm_          PBDGEMM
#define pbdsymm_          PBDSYMM
#define pbdsyr2k_         PBDSYR2K
#define pbdsyrk_          PBDSYRK
#define pbdtrmm_          PBDTRMM
#define pbdtrsm_          PBDTRSM
#define pbdtran_          PBDTRAN

#define pbsgemm_          PBSGEMM
#define pbssymm_          PBSSYMM
#define pbssyr2k_         PBSSYR2K
#define pbssyrk_          PBSSYRK
#define pbstrmm_          PBSTRMM
#define pbstrsm_          PBSTRSM
#define pbstran_          PBSTRAN

#define pbzgemm_          PBZGEMM
#define pbzhemm_          PBZHEMM
#define pbzher2k_         PBZHER2K
#define pbzherk_          PBZHERK
#define pbzsymm_          PBZSYMM
#define pbzsyr2k_         PBZSYR2K
#define pbzsyrk_          PBZSYRK
#define pbztrmm_          PBZTRMM
#define pbztrsm_          PBZTRSM
#define pbztran_          PBZTRAN
                                                 /* Auxilliary PBLAS */
#define pberror_          PBERROR
#define pbfreebuf_        PBFREEBUF
#define ptopget_          PTOPGET
#define ptopset_          PTOPSET
                                                    /* Level-1 PBLAS */
#define psrotg_           PSROTG
#define psrotmg_          PSROTMG
#define psrot_            PSROT
#define psrotm_           PSROTM
#define psswap_           PSSWAP
#define psscal_           PSSCAL
#define pscopy_           PSCOPY
#define psaxpy_           PSAXPY
#define psdot_            PSDOT
#define psnrm2_           PSNRM2
#define psasum_           PSASUM
#define psamax_           PSAMAX

#define pdrotg_           PDROTG
#define pdrotmg_          PDROTMG
#define pdrot_            PDROT
#define pdrotm_           PDROTM
#define pdswap_           PDSWAP
#define pdscal_           PDSCAL
#define pdcopy_           PDCOPY
#define pdaxpy_           PDAXPY
#define pddot_            PDDOT
#define pdnrm2_           PDNRM2
#define pdasum_           PDASUM
#define pdamax_           PDAMAX

#define pcswap_           PCSWAP
#define pcscal_           PCSCAL
#define pcsscal_          PCSSCAL
#define pccopy_           PCCOPY
#define pcaxpy_           PCAXPY
#define pcdotu_           PCDOTU
#define pcdotc_           PCDOTC
#define pscnrm2_          PSCNRM2
#define pscasum_          PSCASUM
#define pcamax_           PCAMAX
#define pcrot_            PCROT

#define pzswap_           PZSWAP
#define pzscal_           PZSCAL
#define pzdscal_          PZDSCAL
#define pzcopy_           PZCOPY
#define pzaxpy_           PZAXPY
#define pzdotu_           PZDOTU
#define pzdotc_           PZDOTC
#define pdznrm2_          PDZNRM2
#define pdzasum_          PDZASUM
#define pzamax_           PZAMAX
#define pzrot_            PZROT
                                                    /* Level-2 PBLAS */
#define pcgemv_           PCGEMV
#define pcgeru_           PCGERU
#define pcgerc_           PCGERC
#define pchemv_           PCHEMV
#define pcher_            PCHER
#define pcher2_           PCHER2
#define pctrmv_           PCTRMV
#define pctrsv_           PCTRSV

#define pdgemv_           PDGEMV
#define pdger_            PDGER
#define pdsymv_           PDSYMV
#define pdsyr_            PDSYR
#define pdsyr2_           PDSYR2
#define pdtrmv_           PDTRMV
#define pdtrsv_           PDTRSV

#define psgemv_           PSGEMV
#define psger_            PSGER
#define pssymv_           PSSYMV
#define pssyr_            PSSYR
#define pssyr2_           PSSYR2
#define pstrmv_           PSTRMV
#define pstrsv_           PSTRSV

#define pzgemv_           PZGEMV
#define pzgeru_           PZGERU
#define pzgerc_           PZGERC
#define pzhemv_           PZHEMV
#define pzher_            PZHER
#define pzher2_           PZHER2
#define pztrmv_           PZTRMV
#define pztrsv_           PZTRSV
                                                    /* Level-3 PBLAS */
#define pcgemm_           PCGEMM
#define pchemm_           PCHEMM
#define pcher2k_          PCHER2K
#define pcherk_           PCHERK
#define pcsymm_           PCSYMM
#define pcsyr2k_          PCSYR2K
#define pcsyrk_           PCSYRK
#define pctrmm_           PCTRMM
#define pctrsm_           PCTRSM
#define pctranu_          PCTRANU
#define pctranc_          PCTRANC

#define pdgemm_           PDGEMM
#define pdsymm_           PDSYMM
#define pdsyr2k_          PDSYR2K
#define pdsyrk_           PDSYRK
#define pdtrmm_           PDTRMM
#define pdtrsm_           PDTRSM
#define pdtran_           PDTRAN

#define psgemm_           PSGEMM
#define pssymm_           PSSYMM
#define pssyr2k_          PSSYR2K
#define pssyrk_           PSSYRK
#define pstrmm_           PSTRMM
#define pstrsm_           PSTRSM
#define pstran_           PSTRAN

#define pzgemm_           PZGEMM
#define pzhemm_           PZHEMM
#define pzher2k_          PZHER2K
#define pzherk_           PZHERK
#define pzsymm_           PZSYMM
#define pzsyr2k_          PZSYR2K
#define pzsyrk_           PZSYRK
#define pztrmm_           PZTRMM
#define pztrsm_           PZTRSM
#define pztranu_          PZTRANU
#define pztranc_          PZTRANC

#endif

#if( _F2C_CALL_ == _F2C_NOCHANGE )
/*
* These defines set up the naming scheme required to have a FORTRAN
* routine call a C routine (which is what the PBLAS are written in)
* for following FORTRAN to C interface:
*           FORTRAN CALL               C DECLARATION
*           call pdgemm(...)           void pdgemm(...)
*/
                                                            /* TOOLS */
#define ilcm_             ilcm
#define infog2l_          infog2l
#define numroc_           numroc
#define pstreecomb_       pstreecomb
#define pdtreecomb_       pdtreecomb
#define pctreecomb_       pctreecomb
#define pztreecomb_       pztreecomb
#define scombamax_        scombamax
#define dcombamax_        dcombamax
#define ccombamax_        ccombamax
#define zcombamax_        zcombamax
#define scombnrm2_        scombnrm2
#define dcombnrm2_        dcombnrm2
                                                            /* BLACS */
#define blacs_abort_      blacs_abort
#define blacs_gridinfo_   blacs_gridinfo

#define igesd2d_          igesd2d
#define igebs2d_          igebs2d
#define itrsd2d_          itrsd2d
#define itrbs2d_          itrbs2d
#define igerv2d_          igerv2d
#define igebr2d_          igebr2d
#define itrrv2d_          itrrv2d
#define itrbr2d_          itrbr2d
#define igamx2d_          igamx2d
#define igamn2d_          igamn2d
#define igsum2d_          igsum2d

#define sgesd2d_          sgesd2d
#define sgebs2d_          sgebs2d
#define strsd2d_          strsd2d
#define strbs2d_          strbs2d
#define sgerv2d_          sgerv2d
#define sgebr2d_          sgebr2d
#define strrv2d_          strrv2d
#define strbr2d_          strbr2d
#define sgamx2d_          sgamx2d
#define sgamn2d_          sgamn2d
#define sgsum2d_          sgsum2d

#define dgesd2d_          dgesd2d
#define dgebs2d_          dgebs2d
#define dtrsd2d_          dtrsd2d
#define dtrbs2d_          dtrbs2d
#define dgerv2d_          dgerv2d
#define dgebr2d_          dgebr2d
#define dtrrv2d_          dtrrv2d
#define dtrbr2d_          dtrbr2d
#define dgamx2d_          dgamx2d
#define dgamn2d_          dgamn2d
#define dgsum2d_          dgsum2d

#define cgesd2d_          cgesd2d
#define cgebs2d_          cgebs2d
#define ctrsd2d_          ctrsd2d
#define ctrbs2d_          ctrbs2d
#define cgerv2d_          cgerv2d
#define cgebr2d_          cgebr2d
#define ctrrv2d_          ctrrv2d
#define ctrbr2d_          ctrbr2d
#define cgamx2d_          cgamx2d
#define cgamn2d_          cgamn2d
#define cgsum2d_          cgsum2d

#define zgesd2d_          zgesd2d
#define zgebs2d_          zgebs2d
#define ztrsd2d_          ztrsd2d
#define ztrbs2d_          ztrbs2d
#define zgerv2d_          zgerv2d
#define zgebr2d_          zgebr2d
#define ztrrv2d_          ztrrv2d
#define ztrbr2d_          ztrbr2d
#define zgamx2d_          zgamx2d
#define zgamn2d_          zgamn2d
#define zgsum2d_          zgsum2d
                                                     /* Level-1 BLAS */
#define srotg_            srotg
#define srotmg_           srotmg
#define srot_             srot
#define srotm_            srotm
#define sswap_            sswap
#define sscal_            sscal
#define scopy_            scopy
#define saxpy_            saxpy
#define ssdot_            ssdot
#define isamax_           isamax

#define drotg_            drotg
#define drotmg_           drotmg
#define drot_             drot
#define drotm_            drotm
#define dswap_            dswap
#define dscal_            dscal
#define dcopy_            dcopy
#define daxpy_            daxpy
#define dddot_            dddot
#define dnrm2_            dnrm2
#define dsnrm2_           dsnrm2
#define dasum_            dasum
#define dsasum_           dsasum
#define idamax_           idamax

#define cswap_            cswap
#define cscal_            cscal
#define csscal_           csscal
#define ccopy_            ccopy
#define caxpy_            caxpy
#define ccdotu_           ccdotu
#define ccdotc_           ccdotc
#define icamax_           icamax

#define zswap_            zswap
#define zscal_            zscal
#define zdscal_           zdscal
#define zcopy_            zcopy
#define zaxpy_            zaxpy
#define zzdotu_           zzdotu
#define zzdotc_           zzdotc
#define dscnrm2_          dscnrm2
#define dznrm2_           dznrm2
#define dscasum_          dscasum
#define dzasum_           dzasum
#define izamax_           izamax
                                                     /* Level-2 BLAS */
#define sgemv_            sgemv
#define ssymv_            ssymv
#define strmv_            strmv
#define strsv_            strsv
#define sger_             sger
#define ssyr_             ssyr
#define ssyr2_            ssyr2

#define dgemv_            dgemv
#define dsymv_            dsymv
#define dtrmv_            dtrmv
#define dtrsv_            dtrsv
#define dger_             dger
#define dsyr_             dsyr
#define dsyr2_            dsyr2

#define cgemv_            cgemv
#define chemv_            chemv
#define ctrmv_            ctrmv
#define ctrsv_            ctrsv
#define cgeru_            cgeru
#define cgerc_            cgerc
#define cher_             cher
#define cher2_            cher2

#define zgemv_            zgemv
#define zhemv_            zhemv
#define ztrmv_            ztrmv
#define ztrsv_            ztrsv
#define zgeru_            zgeru
#define zgerc_            zgerc
#define zher_             zher
#define zher2_            zher2
                                                     /* Level-3 BLAS */
#define sgemm_            sgemm
#define ssymm_            ssymm
#define ssyrk_            ssyrk
#define ssyr2k_           ssyr2k
#define strmm_            strmm
#define strsm_            strsm

#define dgemm_            dgemm
#define dsymm_            dsymm
#define dsyrk_            dsyrk
#define dsyr2k_           dsyr2k
#define dtrmm_            dtrmm
#define dtrsm_            dtrsm

#define cgemm_            cgemm
#define chemm_            chemm
#define csymm_            csymm
#define csyrk_            csyrk
#define cherk_            cherk
#define csyr2k_           csyr2k
#define cher2k_           cher2k
#define ctrmm_            ctrmm
#define ctrsm_            ctrsm

#define zgemm_            zgemm
#define zhemm_            zhemm
#define zsymm_            zsymm
#define zsyrk_            zsyrk
#define zherk_            zherk
#define zsyr2k_           zsyr2k
#define zher2k_           zher2k
#define ztrmm_            ztrmm
#define ztrsm_            ztrsm
                                   /* absolute value auxiliary PBLAS */
#define psatrmv_          psatrmv
#define pdatrmv_          pdatrmv
#define pcatrmv_          pcatrmv
#define pzatrmv_          pzatrmv
#define psagemv_          psagemv
#define pdagemv_          pdagemv
#define pcagemv_          pcagemv
#define pzagemv_          pzagemv
#define psasymv_          psasymv
#define pdasymv_          pdasymv
#define pcahemv_          pcahemv
#define pzahemv_          pzahemv
                                                /* Auxiliary PB-BLAS */
#define pbcmatadd_        pbcmatadd
#define pbdmatadd_        pbdmatadd
#define pbsmatadd_        pbsmatadd
#define pbzmatadd_        pbzmatadd
                                                   /* Level-2 PBBLAS */
#define pbcgemv_          pbcgemv
#define pbcgeru_          pbcgeru
#define pbcgerc_          pbcgerc
#define pbchemv_          pbchemv
#define pbcher_           pbcher
#define pbcher2_          pbcher2
#define pbctrmv_          pbctrmv
#define pbctrnv_          pbctrnv
#define pbctrsv_          pbctrsv

#define pbdgemv_          pbdgemv
#define pbdger_           pbdger
#define pbdsymv_          pbdsymv
#define pbdsyr_           pbdsyr
#define pbdsyr2_          pbdsyr2
#define pbdtrmv_          pbdtrmv
#define pbdtrnv_          pbdtrnv
#define pbdtrsv_          pbdtrsv

#define pbsgemv_          pbsgemv
#define pbsger_           pbsger
#define pbssymv_          pbssymv
#define pbssyr_           pbssyr
#define pbssyr2_          pbssyr2
#define pbstrmv_          pbstrmv
#define pbstrnv_          pbstrnv
#define pbstrsv_          pbstrsv

#define pbzgemv_          pbzgemv
#define pbzgeru_          pbzgeru
#define pbzgerc_          pbzgerc
#define pbzhemv_          pbzhemv
#define pbzher_           pbzher
#define pbzher2_          pbzher2
#define pbztrmv_          pbztrmv
#define pbztrnv_          pbztrnv
#define pbztrsv_          pbztrsv
                                                   /* Level-3 PBBLAS */
#define pbcgemm_          pbcgemm
#define pbchemm_          pbchemm
#define pbcher2k_         pbcher2k
#define pbcherk_          pbcherk
#define pbcsymm_          pbcsymm
#define pbcsyr2k_         pbcsyr2k
#define pbcsyrk_          pbcsyrk
#define pbctrmm_          pbctrmm
#define pbctrsm_          pbctrsm
#define pbctran_          pbctran

#define pbdgemm_          pbdgemm
#define pbdsymm_          pbdsymm
#define pbdsyr2k_         pbdsyr2k
#define pbdsyrk_          pbdsyrk
#define pbdtrmm_          pbdtrmm
#define pbdtrsm_          pbdtrsm
#define pbdtran_          pbdtran

#define pbsgemm_          pbsgemm
#define pbssymm_          pbssymm
#define pbssyr2k_         pbssyr2k
#define pbssyrk_          pbssyrk
#define pbstrmm_          pbstrmm
#define pbstrsm_          pbstrsm
#define pbstran_          pbstran

#define pbzgemm_          pbzgemm
#define pbzhemm_          pbzhemm
#define pbzher2k_         pbzher2k
#define pbzherk_          pbzherk
#define pbzsymm_          pbzsymm
#define pbzsyr2k_         pbzsyr2k
#define pbzsyrk_          pbzsyrk
#define pbztrmm_          pbztrmm
#define pbztrsm_          pbztrsm
#define pbztran_          pbztran
                                                 /* Auxilliary PBLAS */
#define pberror_          pberror
#define pbfreebuf_        pbfreebuf
#define ptopget_          ptopget
#define ptopset_          ptopset
                                                    /* Level-1 PBLAS */
#define psrotg_           psrotg
#define psrotmg_          psrotmg
#define psrot_            psrot
#define psrotm_           psrotm
#define psswap_           psswap
#define psscal_           psscal
#define pscopy_           pscopy
#define psaxpy_           psaxpy
#define psdot_            psdot
#define psnrm2_           psnrm2
#define psasum_           psasum
#define psamax_           psamax

#define pdrotg_           pdrotg
#define pdrotmg_          pdrotmg
#define pdrot_            pdrot
#define pdrotm_           pdrotm
#define pdswap_           pdswap
#define pdscal_           pdscal
#define pdcopy_           pdcopy
#define pdaxpy_           pdaxpy
#define pddot_            pddot
#define pdnrm2_           pdnrm2
#define pdasum_           pdasum
#define pdamax_           pdamax

#define pcswap_           pcswap
#define pcscal_           pcscal
#define pcsscal_          pcsscal
#define pccopy_           pccopy
#define pcaxpy_           pcaxpy
#define pcdotu_           pcdotu
#define pcdotc_           pcdotc
#define pscnrm2_          pscnrm2
#define pscasum_          pscasum
#define pcamax_           pcamax
#define pcrot_            pcrot

#define pzswap_           pzswap
#define pzscal_           pzscal
#define pzdscal_          pzdscal
#define pzcopy_           pzcopy
#define pzaxpy_           pzaxpy
#define pzdotu_           pzdotu
#define pzdotc_           pzdotc
#define pdznrm2_          pdznrm2
#define pdzasum_          pdzasum
#define pzamax_           pzamax
#define pzrot_            pzrot
                                                    /* Level-2 PBLAS */
#define pcgemv_           pcgemv
#define pcgeru_           pcgeru
#define pcgerc_           pcgerc
#define pchemv_           pchemv
#define pcher_            pcher
#define pcher2_           pcher2
#define pctrmv_           pctrmv
#define pctrsv_           pctrsv

#define pdgemv_           pdgemv
#define pdger_            pdger
#define pdsymv_           pdsymv
#define pdsyr_            pdsyr
#define pdsyr2_           pdsyr2
#define pdtrmv_           pdtrmv
#define pdtrsv_           pdtrsv

#define psgemv_           psgemv
#define psger_            psger
#define pssymv_           pssymv
#define pssyr_            pssyr
#define pssyr2_           pssyr2
#define pstrmv_           pstrmv
#define pstrsv_           pstrsv

#define pzgemv_           pzgemv
#define pzgeru_           pzgeru
#define pzgerc_           pzgerc
#define pzhemv_           pzhemv
#define pzher_            pzher
#define pzher2_           pzher2
#define pztrmv_           pztrmv
#define pztrsv_           pztrsv
                                                    /* Level-3 PBLAS */
#define pcgemm_           pcgemm
#define pchemm_           pchemm
#define pcher2k_          pcher2k
#define pcherk_           pcherk
#define pcsymm_           pcsymm
#define pcsyr2k_          pcsyr2k
#define pcsyrk_           pcsyrk
#define pctrmm_           pctrmm
#define pctrsm_           pctrsm
#define pctranu_          pctranu
#define pctranc_          pctranc

#define pdgemm_           pdgemm
#define pdsymm_           pdsymm
#define pdsyr2k_          pdsyr2k
#define pdsyrk_           pdsyrk
#define pdtrmm_           pdtrmm
#define pdtrsm_           pdtrsm
#define pdtran_           pdtran

#define psgemm_           psgemm
#define pssymm_           pssymm
#define pssyr2k_          pssyr2k
#define pssyrk_           pssyrk
#define pstrmm_           pstrmm
#define pstrsm_           pstrsm
#define pstran_           pstran

#define pzgemm_           pzgemm
#define pzhemm_           pzhemm
#define pzher2k_          pzher2k
#define pzherk_           pzherk
#define pzsymm_           pzsymm
#define pzsyr2k_          pzsyr2k
#define pzsyrk_           pzsyrk
#define pztrmm_           pztrmm
#define pztrsm_           pztrsm
#define pztranu_          pztranu
#define pztranc_          pztranc

#endif

#if( _F2C_CALL_ == _F2C_F77ISF2C )
/*
* These defines set up the naming scheme required to have a FORTRAN
* routine call a C routine (which is what the PBLAS are written in)
* for systems where the fortran "compiler" is actually f2c (a Fortran
* to C conversion utility).
*/

/*
* Initialization routines
*/
#define blacs_pinfo_    blacs_pinfo__
#define blacs_setup_    blacs_setup__
#define blacs_set_      blacs_set__
#define blacs_get_      blacs_get__
#define blacs_gridinit_ blacs_gridinit__
#define blacs_gridmap_  blacs_gridmap__
/*
* Destruction routines
*/
#define blacs_freebuff_ blacs_freebuff__
#define blacs_gridexit_ blacs_gridexit__
#define blacs_abort_    blacs_abort__
#define blacs_exit_     blacs_exit__
/*
* Informational & misc.
*/
#define blacs_gridinfo_ blacs_gridinfo__
#define blacs_pnum_     blacs_pnum__
#define blacs_pcoord_   blacs_pcoord__
#define blacs_barrier_  blacs_barrier__
#endif
SHAR_EOF
fi # end of overwriting check
if test -f 'pzaxpy.c'
then
	echo shar: will not over-write existing file "'pzaxpy.c'"
else
cat << "SHAR_EOF" > 'pzaxpy.c'
/* ---------------------------------------------------------------------
*
*  Mark R. Fahey
*  August 2000
*  This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0
*  which fixes a bug in the incx=1 and incy=1 case.
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "pblas.h"

void pzaxpy_( n, alpha, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y,
              incy )
/*
*  .. Scalar Arguments ..
*/
   int         * incx, * incy, * ix, * iy, * jx, * jy, * n;
   complex16   * alpha;
/* ..
*  .. Array Arguments ..
*/
   int         desc_X[], desc_Y[];
   complex16   X[], Y[];
{
/*
*  Purpose
*  =======
*
*  PZAXPY adds one distributed vector to another,
*
*     sub( Y ) := sub( Y ) + alpha * sub( X )
*
*  where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
*                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
*
*        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
*                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector descA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
*                                 DT_A = 1.
*  CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) descA[ M_ ]    The number of rows in the global
*                                 array A.
*  N_A    (global) descA[ N_ ]    The number of columns in the global
*                                 array A.
*  MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
*                                 te the rows of the array.
*  NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
*                                 te the columns of the array.
*  RSRC_A (global) descA[ RSRC_ ] The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) descA[ CSRC_ ] The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Because vectors may be seen as particular matrices, a distributed
*  vector is considered to be a distributed matrix.
*
*  If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
*  process column having the first entries of sub( Y ) must also contain
*  the first entries of sub( X ). Moreover, the quantity
*  MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
*
*  If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
*  Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
*  MOD( IY-1, MB_Y ).
*
*  If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
*  Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
*  MOD( JY-1, NB_Y ).
*
*  If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
*  equal to MB_Y, and the process row having the first entries of
*  sub( Y ) must also contain the first entries of sub( X ). Moreover,
*  the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
*
*  Parameters
*  ==========
*
*  N       (global input) pointer to INTEGER.
*          The length of the distributed vectors to be added. N >= 0.
*
*  ALPHA   (global input) pointer to COMPLEX*16
*          The scalar used to multiply each component of sub( X ).
*
*  X       (local input) COMPLEX*16 array containing the local
*          pieces of a distributed matrix of dimension of at least
*              ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
*          This array contains the entries of the distributed vector
*          sub( X ).
*
*  IX      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix X.
*
*  INCX    (global input) pointer to INTEGER
*          The global increment for the elements of X. Only two values
*          of INCX are supported in this version, namely 1 and M_X.
*
*  Y       (local input/local output) COMPLEX*16 array
*          containing the local pieces of a distributed matrix of
*          dimension of at least
*                ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
*          This array contains the entries of the distributed vector
*          sub( Y ).
*          On exit sub( Y ) is overwritten by sub( Y ) + alpha*sub( X ).
*
*  IY      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  JY      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  DESCY   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix Y.
*
*  INCY    (global input) pointer to INTEGER
*          The global increment for the elements of Y. Only two values
*          of INCY are supported in this version, namely 1 and M_Y.
*
*  =====================================================================
*
*  .. Local Scalars ..
*/
   int         ictxt, info, iix, iiy, ixcol, ixrow, iycol, iyrow, jjx,
               jjy, lcm, lcmp, lcmq, mycol, myrow, nn, np, np0, nprow,
               npcol, nq, nq0, nz, ione=1, tmp1, wksz;
   complex16   one, tmp, zero;
/* ..
*  .. PBLAS Buffer ..
*/
   complex16   * buff;
/* ..
*  .. External Functions ..
*/
   void        blacs_gridinfo_();
   void        zgerv2d_();
   void        zgesd2d_();
   void        pbchkvect();
   void        pberror_();
   char        * getpbbuf();
   F_VOID_FCT  zaxpy_();
   F_VOID_FCT  zcopy_();
   F_VOID_FCT  pbztrnv_();
   F_INTG_FCT  ilcm_();
   F_INTG_FCT  numroc_();
/* ..
*  .. Executable Statements ..
*
*  Get grid parameters
*/
   ictxt = desc_X[CTXT_];
   blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
/*
*  Test the input parameters
*/
   info = 0;
   if( nprow == -1 )
      info = -(600+CTXT_+1);
   else
   {
      pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx,
                 &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
      pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy,
                 &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );

      if( info == 0 )
      {
         if( *n != 1 )
         {
            if( *incx == desc_X[M_] )
            {                 /* X is distributed along a process row */
               if( *incy == desc_Y[M_] )
               {               /* Y is distributed over a process row */
                  if( ( ixcol != iycol ) ||
                      ( ( (*jx-1) % desc_X[NB_] ) !=
                        ( (*jy-1) % desc_Y[NB_] ) ) )
                     info = -10;
                  else if( desc_Y[NB_] != desc_X[NB_] )
                     info = -(1100+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
                     info = -9;
                  else if( desc_Y[MB_] != desc_X[NB_] )
                     info = -(1100+MB_+1);
               }
               else
               {
                  info = -12;
               }
            }
            else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
            {              /* X is distributed along a process column */
               if( *incy == desc_Y[M_] )
               {               /* Y is distributed over a process row */
                  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
                     info = -10;
                  else if( desc_Y[NB_] != desc_X[MB_] )
                     info = -(1100+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {               /* Y is distributed over a process column */
                  if( ( ixrow != iyrow ) ||
                      ( ( (*ix-1) % desc_X[MB_] ) !=
                        ( (*iy-1) % desc_Y[MB_] ) ) )
                     info = -9;
                  else if( desc_Y[MB_] != desc_X[MB_] )
                     info = -(1100+MB_+1);
               }
               else
               {
                  info = -12;
               }
            }
            else
            {
               info = -7;
            }
         }
         if( ictxt != desc_Y[CTXT_] )
            info = -(1100+CTXT_+1);
      }
   }
   if( info )
   {
      pberror_( &ictxt, "PZAXPY", &info );
      return;
   }
/*
*  Quick return if possible.
*/
   if( *n == 0 )
      return;
/*
*  y <- y + alpha * x
*/
   if( *n == 1 )
   {
      if( ( myrow == iyrow ) && ( mycol == iycol ) )
      {
         if( ( myrow != ixrow ) || ( mycol != ixcol ) )
            zgerv2d_( &ictxt, n, n, &tmp, n, &ixrow, &ixcol );
         else
            tmp = X[iix-1+(jjx-1)*desc_X[LLD_]];
         zaxpy_( n, alpha, &tmp, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n );
      }
      else if( ( myrow == ixrow ) && ( mycol == ixcol ) )
         zgesd2d_( &ictxt, n, n, &X[iix-1+(jjx-1)*desc_X[LLD_]], n,
                   &iyrow, &iycol );
      return;
   }

   one.re = ONE; one.im = ZERO;
   zero.re = ZERO; zero.im = ZERO;
   if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
   {               /* X and Y are both distributed over a process row */
      nz = (*jx-1) % desc_Y[NB_];
      nn = *n + nz;
      nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
      if( mycol == ixcol )
         nq -= nz;
      if( ixrow == iyrow )
      {
         if( myrow == ixrow )
            zaxpy_( &nq, alpha,
                    &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                    &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
      }
      else
      {
         if( myrow == ixrow )
            zgesd2d_( &ictxt, &ione, &nq,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &iyrow, &mycol );
         else if( myrow == iyrow )
         {
            buff = (complex16 *)getpbbuf( "PZAXPY", nq*sizeof(complex16) );
            zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow,
                      &mycol );
            zaxpy_( &nq, alpha, buff, &ione,
                    &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
         }
      }
   }
   else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
            ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
   {            /* X and Y are both distributed over a process column */
      nz = (*ix-1) % desc_X[MB_];
      nn = *n + nz;
      np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
      if( myrow == ixrow )
         np -= nz;
      if( ixcol == iycol )
      {
         if( mycol == ixcol )
            zaxpy_( &np, alpha,
                    &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                    &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
      }
      else
      {
         if( mycol == ixcol )
            zgesd2d_( &ictxt, &np, &ione,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &myrow, &iycol );
         else if( mycol == iycol )
         {
            buff = (complex16 *)getpbbuf( "PZAXPY", np*sizeof(complex16) );
            zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow,
                      &ixcol );
            zaxpy_( &np, alpha, buff, &ione,
                    &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
         }
      }
   }
   else       /* X and Y are not distributed along the same direction */
   {
      lcm = ilcm_( &nprow, &npcol );
      if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
      {                     /* X is distributed over a process column */
         lcmq = lcm / npcol;
         nz = (*ix-1) % desc_X[MB_];
         nn = *n + nz;
         np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
         nz = (*jy-1) % desc_Y[NB_];
         nn = *n + nz;
         tmp1 = nn / desc_Y[NB_];
         nq0 = MYROC0( tmp1, nn, desc_Y[NB_], npcol );
         tmp1 = nq0 / desc_Y[NB_];
         wksz = np + MYROC0( tmp1, nq0, desc_Y[NB_], lcmq );

         buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) );

         if( myrow == ixrow )
            np -= nz;

         if( mycol == ixcol )
         {
            zcopy_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                    buff, incx );
            zscal_( &np, alpha, buff, incx );
         }
         pbztrnv_( &ictxt, C2F_CHAR( "C" ), C2F_CHAR( "T" ), n,
                   &desc_X[MB_], &nz, buff, incx, &one,
                   &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                   &ixrow, &ixcol, &iyrow, &iycol, buff+np );
      }
      else                  /* Y is distributed over a process column */
      {
         lcmp = lcm / nprow;
         nz = (*iy-1) % desc_Y[MB_];
         nn = *n + nz;
         tmp1 = nn / desc_Y[MB_];
         np = numroc_( &nn, &desc_Y[MB_], &myrow, &iyrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
         tmp1 = np0 / desc_Y[MB_];
         wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
         wksz = np + wksz;

         buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) );

         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                   &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
                   &iyrow, &iycol, buff+np );
         if( mycol == iycol )
         {
            if( myrow == iyrow )
               np -= nz;
            zaxpy_( &np, alpha, buff, &ione,
                    &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
         }
      }
   }
}
SHAR_EOF
fi # end of overwriting check
if test -f 'pzdotc.c'
then
	echo shar: will not over-write existing file "'pzdotc.c'"
else
cat << "SHAR_EOF" > 'pzdotc.c'
/* ---------------------------------------------------------------------
*
*  Mark R. Fahey
*  August 2000
*  This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0
*  which fixes a bug in the incx=1 and incy=1 case.
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "pblas.h"

void pzdotc_( n, dotc, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y,
              incy )
/*
*  .. Scalar Arguments ..
*/
   int         * incx, * incy, * ix, * iy, * jx, * jy, * n;
   complex16   * dotc;
/* ..
*  .. Array Arguments ..
*/
   int         desc_X[], desc_Y[];
   complex16   X[], Y[];
{
/*
*  Purpose
*  =======
*
*  PZDOTC forms the dot product of two distributed vectors,
*
*     dotc := sub( X )**H * sub( Y )
*
*  where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
*                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
*
*        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
*                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector descA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
*                                 DT_A = 1.
*  CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) descA[ M_ ]    The number of rows in the global
*                                 array A.
*  N_A    (global) descA[ N_ ]    The number of columns in the global
*                                 array A.
*  MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
*                                 te the rows of the array.
*  NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
*                                 te the columns of the array.
*  RSRC_A (global) descA[ RSRC_ ] The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) descA[ CSRC_ ] The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Because vectors may be seen as particular matrices, a distributed
*  vector is considered to be a distributed matrix.
*
*  If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
*  process column having the first entries of sub( Y ) must also contain
*  the first entries of sub( X ). Moreover, the quantity
*  MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
*
*  If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
*  Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
*  MOD( IY-1, MB_Y ).
*
*  If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
*  Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
*  MOD( JY-1, NB_Y ).
*
*  If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
*  equal to MB_Y, and the process row having the first entries of
*  sub( Y ) must also contain the first entries of sub( X ). Moreover,
*  the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
*
*
*  Parameters
*  ==========
*
*  N       (global input) pointer to INTEGER
*          The length of the distributed vectors to be multiplied.
*          N >= 0.
*
*  DOTC    (local output) pointer to COMPLEX*16
*          The dot product of sub( X ) and sub( Y ) only in their scope.
*
*  X       (local input) COMPLEX*16 array containing the local
*          pieces of a distributed matrix of dimension of at least
*              ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
*          This array contains the entries of the distributed vector
*          sub( X ).
*
*  IX      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix X.
*
*  INCX    (global input) pointer to INTEGER
*          The global increment for the elements of X. Only two values
*          of INCX are supported in this version, namely 1 and M_X.
*
*  Y       (local input) COMPLEX*16 array containing the local
*          pieces of a distributed matrix of dimension of at least
*              ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
*          This array contains the entries of the distributed vector
*          sub( Y ).
*
*  IY      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  JY      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  DESCY   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix Y.
*
*  INCY    (global input) pointer to INTEGER
*          The global increment for the elements of Y. Only two values
*          of INCY are supported in this version, namely 1 and M_Y.
*
*  =====================================================================
*
*  .. Local Scalars ..
*/
   char        * cbtop, * cctop, * rbtop, * rctop;
   int         ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
               jjy, lcm, lcmp, mone=-1, mycol, myrow, nn, np, np0,
               nprow, npcol, nq, nz, ione=1, tmp1, wksz;
   complex16   xwork[1], ywork[1], zero;
/* ..
*  .. PBLAS Buffer ..
*/
   complex16   * buff;
/* ..
*  .. External Functions ..
*/
   void        blacs_gridinfo_();
   void        zgebr2d_();
   void        zgebs2d_();
   void        zgerv2d_();
   void        zgesd2d_();
   void        zgsum2d_();
   void        pbchkvect();
   void        pberror_();
   char        * getpbbuf();
   char        * ptop();
   F_VOID_FCT  pbztrnv_();
   F_VOID_FCT  zzdotc_();
   F_INTG_FCT  ilcm_();
/* ..
*  .. Executable Statements ..
*
*  Get grid parameters
*/
   ictxt = desc_X[CTXT_];
   blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
/*
*  Test the input parameters
*/
   info = 0;
   if( nprow == -1 )
      info = -(600+CTXT_+1);
   else
   {
      pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx,
                 &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
      pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy,
                 &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );

      if( info == 0 )
      {
         if( *n != 1 )
         {
            if( *incx == desc_X[M_] )
            {                 /* X is distributed along a process row */
               if( *incy == desc_Y[M_] )
               {               /* Y is distributed over a process row */
                  if( ( ixcol != iycol ) ||
                      ( ( (*jx-1) % desc_X[NB_] ) !=
                        ( (*jy-1) % desc_Y[NB_] ) ) )
                     info = -10;
                  else if( desc_Y[NB_] != desc_X[NB_] )
                     info = -(1100+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
                     info = -9;
                  else if( desc_Y[MB_] != desc_X[NB_] )
                     info = -(1100+MB_+1);
               }
               else
               {
                  info = -12;
               }
            }
            else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
            {              /* X is distributed along a process column */
               if( *incy == desc_Y[M_] )
               {                  /* Y is distributed over a process row */
                  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
                     info = -10;
                  else if( desc_Y[NB_] != desc_X[MB_] )
                     info = -(1100+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( ixrow != iyrow ) ||
                      ( ( (*ix-1) % desc_X[MB_] ) !=
                        ( (*iy-1) % desc_Y[MB_] ) ) )
                     info = -9;
                  else if( desc_Y[MB_] != desc_X[MB_] )
                     info = -(1100+MB_+1);
               }
               else
               {
                  info = -12;
               }
            }
            else
            {
               info = -7;
            }
         }
         if( ictxt != desc_Y[CTXT_] )
            info = -(1100+CTXT_+1);
      }
   }
   if( info )
   {
      pberror_( &ictxt, "PZDOTC", &info );
      return;
   }
/*
*  Quick return if possible.
*/
   dotc->re = ZERO;
   dotc->im = ZERO;
   zero.re  = ZERO;
   zero.im  = ZERO;
   if( *n == 0 ) return;
/*
*  dot <- x^{h} * y
*/
   if( *n == 1 )
   {
      if( ( myrow == ixrow ) && ( mycol == ixcol ) )
      {
         buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
         if( ( myrow != iyrow ) || ( mycol != iycol ) )
         {
            zgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol );
            zgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol );
         }
         else
            *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
         zzdotc_( n, dotc, buff, n, ywork, n );
      }
      else if( ( myrow == iyrow ) && ( mycol == iycol ) )
      {
         zgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n,
                   &ixrow, &ixcol );
         zgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol );
         zzdotc_( n, dotc, xwork, n,
                  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n );
      }

      if( ( *incx == desc_X[M_] ) && ( desc_X[M_] != 1 ) )
      {
         if( myrow == ixrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == ixcol )
            {
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotc, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotc, &ione, &myrow, &ixcol );
            }
         }
      }
      else if( ( *incx == 1 ) && ( desc_X[M_] != 1 ) )
      {
         if( mycol == ixcol )
         {
            cbtop = ptop( BROADCAST, COLUMN, TOPGET );
            if( myrow == ixrow )
            {
               zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotc, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotc, &ione, &ixrow, &mycol );
            }
         }
      }

      if( ( *incy == desc_Y[M_] ) && ( desc_Y[M_] != 1 ) )
      {
         if( myrow == iyrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == iycol )
            {
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotc, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotc, &ione, &myrow, &iycol );
            }
         }
      }
      else if( ( *incy == 1 ) && ( desc_Y[M_] != 1 ) )
      {
         if( mycol == iycol )
         {
            cbtop = ptop( BROADCAST, COLUMN, TOPGET );
            if( myrow == iyrow )
            {
               zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotc, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotc, &ione, &iyrow, &mycol );
            }
         }
      }
      return;
   }

   if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
   {               /* X and Y are both distributed over a process row */
      nz = (*jx-1) % desc_Y[NB_];
      nn = *n + nz;
      nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
      if( mycol == ixcol )
         nq -= nz;
      if( ixrow == iyrow )
      {
         if( myrow == ixrow )
         {
            rctop = ptop( COMBINE, ROW, TOPGET );
            zzdotc_( &nq, dotc,
                     &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
            zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione,
                      &ione, dotc, &ione, &mone, &mycol );
         }
      }
      else
      {
         if( myrow == ixrow )
         {
            rctop = ptop( COMBINE, ROW, TOPGET );
            zgesd2d_( &ictxt, &ione, &nq,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &iyrow, &mycol );
            buff = (complex16 *)getpbbuf( "PZDOTC", nq*sizeof(complex16) );
            zgerv2d_( &ictxt, &nq, &ione, buff, &ione,
                      &ixrow, &mycol );
            zzdotc_( &nq, dotc, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                          &desc_X[LLD_], buff, &ione );
            zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione,
                      &ione, dotc, &ione, &mone, &mycol );
         }
         else if( myrow == iyrow )
         {
            rctop = ptop( COMBINE, ROW, TOPGET );
            zgesd2d_( &ictxt, &ione, &nq,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &ixrow, &mycol );
            buff = (complex16 *)getpbbuf( "PZDOTC", nq*sizeof(complex16) );
            zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow,
                      &mycol );
            zzdotc_( &nq, dotc,
                     buff, &ione,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
            zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione,
                      &ione, dotc, &ione, &mone, &mycol );
         }
      }
   }
   else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
            ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
   {            /* X and Y are both distributed over a process column */
      nz = (*ix-1) % desc_X[MB_];
      nn = *n + nz;
      np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
      if( myrow == ixrow )
         np -= nz;
      if( ixcol == iycol )
      {
         if( mycol == ixcol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zzdotc_( &np, dotc,
                     &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotc, &ione, &mone, &mycol );
         }
      }
      else
      {
         if( mycol == ixcol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zgesd2d_( &ictxt, &np, &ione,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &myrow, &iycol );
            buff = (complex16 *)getpbbuf( "PZDOTC", np*sizeof(complex16) );
            zgerv2d_( &ictxt, &np, &ione, buff, &ione,
                      &myrow, &iycol );
            zzdotc_( &np, dotc,
                     &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                     buff, &ione );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotc, &ione, &mone, &mycol );
         }
         else if( mycol == iycol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            buff = (complex16 *)getpbbuf( "PZDOTC", np*sizeof(complex16) );
            zgerv2d_( &ictxt, &np, &ione, buff, &ione,
                      &myrow, &ixcol );
            zgesd2d_( &ictxt, &np, &ione,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &myrow, &ixcol );
            zzdotc_( &np, dotc,
                     buff, &ione,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotc, &ione, &mone, &mycol );
         }
      }
   }
   else       /* X and Y are not distributed along the same direction */
   {
      lcm = ilcm_( &nprow, &npcol );
      if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
      {                     /* X is distributed over a process column */
         lcmp = lcm / nprow;
         nz = (*jy-1) % desc_Y[NB_];
         nn = *n + nz;
         tmp1 = nn / desc_Y[MB_];
         np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
         tmp1 = np0 / desc_X[MB_];
         wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp );
         wksz = np + wksz;

         buff = (complex16 *)getpbbuf( "PZDOTC", wksz*sizeof(complex16) );

         if( mycol == iycol )
            jjy -= nz;
         if( myrow == ixrow )
            np -= nz;
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
                   &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol,
                   &ixrow, &ixcol, buff+np );
         if( mycol == ixcol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zzdotc_( &np, dotc, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                     incx, buff, &ione );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotc, &ione, &mone, &mycol );
         }
         if( myrow == iyrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == ixcol )
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                        &ione, &ione, dotc, &ione );
            else
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                        &ione, &ione, dotc, &ione, &myrow, &ixcol );
         }
      }
      else                  /* Y is distributed over a process column */
      {
         lcmp = lcm / nprow;
         nz = (*jx-1) % desc_X[NB_];
         nn = *n + nz;
         tmp1 = nn / desc_X[MB_];
         np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
         tmp1 = np0 / desc_Y[MB_];
         wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
         wksz = np + wksz;

         buff = (complex16 *)getpbbuf( "PZDOTC", wksz*sizeof(complex16) );

         if( myrow == iyrow )
            np -= nz;
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                   &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
                   &iyrow, &iycol, buff+np );
         if( mycol == iycol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zzdotc_( &np, dotc, buff, &ione,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotc, &ione, &mone, &mycol );
         }
         if( myrow == ixrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == iycol )
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotc, &ione );
            else
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotc, &ione, &myrow, &iycol );
         }
      }
   }
}
SHAR_EOF
fi # end of overwriting check
if test -f 'pzdotu.c'
then
	echo shar: will not over-write existing file "'pzdotu.c'"
else
cat << "SHAR_EOF" > 'pzdotu.c'
/* ---------------------------------------------------------------------
*
*  Mark R. Fahey
*  August 2000
*  This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0
*  which fixes a bug in the incx=1 and incy=1 case.
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "pblas.h"

void pzdotu_( n, dotu, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y,
              incy )
/*
*  .. Scalar Arguments ..
*/
   int         * incx, * incy, * ix, * iy, * jx, * jy, * n;
   complex16   * dotu;
/* ..
*  .. Array Arguments ..
*/
   int         desc_X[], desc_Y[];
   complex16   X[], Y[];
{
/*
*  Purpose
*  =======
*
*  PZDOTU forms the dot product of two distributed vectors,
*
*     dotu := sub( X )**T * sub( Y )
*
*  where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
*                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
*
*        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
*                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector descA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
*                                 DT_A = 1.
*  CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) descA[ M_ ]    The number of rows in the global
*                                 array A.
*  N_A    (global) descA[ N_ ]    The number of columns in the global
*                                 array A.
*  MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
*                                 te the rows of the array.
*  NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
*                                 te the columns of the array.
*  RSRC_A (global) descA[ RSRC_ ] The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) descA[ CSRC_ ] The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Because vectors may be seen as particular matrices, a distributed
*  vector is considered to be a distributed matrix.
*
*  If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
*  process column having the first entries of sub( Y ) must also contain
*  the first entries of sub( X ). Moreover, the quantity
*  MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
*
*  If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
*  Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
*  MOD( IY-1, MB_Y ).
*
*  If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
*  Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
*  MOD( JY-1, NB_Y ).
*
*  If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
*  equal to MB_Y, and the process row having the first entries of
*  sub( Y ) must also contain the first entries of sub( X ). Moreover,
*  the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
*
*
*  Parameters
*  ==========
*
*  N       (global input) pointer to INTEGER
*          The length of the distributed vectors to be multiplied.
*          N >= 0.
*
*  DOTU    (local output) pointer to COMPLEX*16
*          The dot product of sub( X ) and sub( Y ) only in their scope.
*
*  X       (local input) COMPLEX*16 array containing the local
*          pieces of a distributed matrix of dimension of at least
*              ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
*          This array contains the entries of the distributed vector
*          sub( X ).
*
*  IX      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix X.
*
*  INCX    (global input) pointer to INTEGER
*          The global increment for the elements of X. Only two values
*          of INCX are supported in this version, namely 1 and M_X.
*
*  Y       (local input) COMPLEX*16 array containing the local
*          pieces of a distributed matrix of dimension of at least
*              ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
*          This array contains the entries of the distributed vector
*          sub( Y ).
*
*  IY      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  JY      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  DESCY   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix Y.
*
*  INCY    (global input) pointer to INTEGER
*          The global increment for the elements of Y. Only two values
*          of INCY are supported in this version, namely 1 and M_Y.
*
*  =====================================================================
*
*  .. Local Scalars ..
*/
   char        * cbtop, * cctop, * rbtop, * rctop;
   int         ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
               jjy, lcm, lcmp, mone=-1, mycol, myrow, nn, np, np0,
               nprow, npcol, nq, nz, ione=1, tmp1, wksz;
   complex16   xwork[1], ywork[1], zero;
/* ..
*  .. PBLAS Buffer ..
*/
   complex16   * buff;
/* ..
*  .. External Functions ..
*/
   void        blacs_gridinfo_();
   void        zgebr2d_();
   void        zgebs2d_();
   void        zgerv2d_();
   void        zgesd2d_();
   void        zgsum2d_();
   void        pbchkvect();
   void        pberror_();
   char        * getpbbuf();
   char        * ptop();
   F_VOID_FCT  pbztrnv_();
   F_VOID_FCT  zzdotu_();
   F_INTG_FCT  ilcm_();
/* ..
*  .. Executable Statements ..
*
*  Get grid parameters
*/
   ictxt = desc_X[CTXT_];
   blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
/*
*  Test the input parameters
*/
   info = 0;
   if( nprow == -1 )
      info = -(600+CTXT_+1);
   else
   {
      pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx,
                 &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
      pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy,
                 &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );

      if( info == 0 )
      {
         if( *n != 1 )
         {
            if( *incx == desc_X[M_] )
            {                 /* X is distributed along a process row */
               if( *incy == desc_Y[M_] )
               {               /* Y is distributed over a process row */
                  if( ( ixcol != iycol ) ||
                      ( ( (*jx-1) % desc_X[NB_] ) !=
                        ( (*jy-1) % desc_Y[NB_] ) ) )
                     info = -10;
                  else if( desc_Y[NB_] != desc_X[NB_] )
                     info = -(1100+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
                     info = -9;
                  else if( desc_Y[MB_] != desc_X[NB_] )
                     info = -(1100+MB_+1);
               }
               else
               {
                  info = -12;
               }
            }
            else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
            {              /* X is distributed along a process column */
               if( *incy == desc_Y[M_] )
               {                  /* Y is distributed over a process row */
                  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
                     info = -10;
                  else if( desc_Y[NB_] != desc_X[MB_] )
                     info = -(1100+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( ixrow != iyrow ) ||
                      ( ( (*ix-1) % desc_X[MB_] ) !=
                        ( (*iy-1) % desc_Y[MB_] ) ) )
                     info = -9;
                  else if( desc_Y[MB_] != desc_X[MB_] )
                     info = -(1100+MB_+1);
               }
               else
               {
                  info = -12;
               }
            }
            else
            {
               info = -7;
            }
         }
         if( ictxt != desc_Y[CTXT_] )
            info = -(1100+CTXT_+1);
      }
   }
   if( info )
   {
      pberror_( &ictxt, "PZDOTU", &info );
      return;
   }
/*
*  Quick return if possible.
*/
   dotu->re = ZERO;
   dotu->im = ZERO;
   zero.re  = ZERO;
   zero.im  = ZERO;
   if( *n == 0 ) return;
/*
*  dot <- x^{t} * y
*/
   if( *n == 1 )
   {
      if( ( myrow == ixrow ) && ( mycol == ixcol ) )
      {
         buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
         if( ( myrow != iyrow ) || ( mycol != iycol ) )
         {
            zgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol );
            zgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol );
         }
         else
            *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
         zzdotu_( n, dotu, buff, n, ywork, n );
      }
      else if( ( myrow == iyrow ) && ( mycol == iycol ) )
      {
         zgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n,
                   &ixrow, &ixcol );
         zgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol );
         zzdotu_( n, dotu, xwork, n,
                  &Y[iiy-1+(jjx-1)*desc_X[LLD_]], n );
      }

      if( ( *incx == desc_X[M_] ) && ( desc_X[M_] != 1 ) )
      {
         if( myrow == ixrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == ixcol )
            {
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotu, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotu, &ione, &myrow, &ixcol );
            }
         }
      }
      else if( ( *incx == 1 ) && ( desc_X[M_] != 1 ) )
      {
         if( mycol == ixcol )
         {
            cbtop = ptop( BROADCAST, COLUMN, TOPGET );
            if( myrow == ixrow )
            {
               zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotu, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotu, &ione, &ixrow, &mycol );
            }
         }
      }

      if( ( *incy == desc_Y[M_] ) && ( desc_Y[M_] != 1 ) )
      {
         if( myrow == iyrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == iycol )
            {
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotu, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotu, &ione, &myrow, &iycol );
            }
         }
      }
      else if( ( *incy == 1 ) && ( desc_Y[M_] != 1 ) )
      {
         if( mycol == iycol )
         {
            cbtop = ptop( BROADCAST, COLUMN, TOPGET );
            if( myrow == iyrow )
            {
               zgebs2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotu, &ione );
            }
            else
            {
               zgebr2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cbtop ),
                         &ione, &ione, dotu, &ione, &iyrow, &mycol );
            }
         }
      }
      return;
   }

   if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
   {               /* X and Y are both distributed over a process row */
      nz = (*jx-1) % desc_Y[NB_];
      nn = *n + nz;
      nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
      if( mycol == ixcol )
         nq -= nz;
      if( ixrow == iyrow )
      {
         if( myrow == ixrow )
         {
            rctop = ptop( COMBINE, ROW, TOPGET );
            zzdotu_( &nq, dotu,
                     &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
            zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione,
                      &ione, dotu, &ione, &mone, &mycol );
         }
      }
      else
      {
         if( myrow == ixrow )
         {
            rctop = ptop( COMBINE, ROW, TOPGET );
            zgesd2d_( &ictxt, &ione, &nq,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &iyrow, &mycol );
            buff = (complex16 *)getpbbuf( "PZDOTU", nq*sizeof(complex16) );
            zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow,
                      &mycol );
            zzdotu_( &nq, dotu,
                     &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                     buff, &ione );
            zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione,
                      &ione, dotu, &ione, &mone, &mycol );
         }
         else if( myrow == iyrow )
         {
            rctop = ptop( COMBINE, ROW, TOPGET );
            zgesd2d_( &ictxt, &ione, &nq,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &ixrow, &mycol );
            buff = (complex16 *)getpbbuf( "PZDOTU", nq*sizeof(complex16) );
            zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow,
                      &mycol );
            zzdotu_( &nq, dotu,
                     buff, &ione,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
            zgsum2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rctop ), &ione,
                      &ione, dotu, &ione, &mone, &mycol );
         }
      }
   }
   else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
            ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
   {            /* X and Y are both distributed over a process column */
      nz = (*ix-1) % desc_X[MB_];
      nn = *n + nz;
      np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
      if( myrow == ixrow )
         np -= nz;
      if( ixcol == iycol )
      {
         if( mycol == ixcol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zzdotu_( &np, dotu,
                     &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotu, &ione, &mone, &mycol );
         }
      }
      else
      {
         if( mycol == ixcol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zgesd2d_( &ictxt, &np, &ione,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &myrow, &iycol );
            buff = (complex16 *)getpbbuf( "PZDOTU", np*sizeof(complex16) );
            zgerv2d_( &ictxt, &np, &ione, buff, &ione,
                      &myrow, &iycol );
            zzdotu_( &np, dotu,
                     &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                     buff, &ione );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotu, &ione, &mone, &mycol );
         }
         else if( mycol == iycol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zgesd2d_( &ictxt, &np, &ione,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &myrow, &ixcol );
            buff = (complex16 *)getpbbuf( "PZDOTU", np*sizeof(complex16) );
            zgerv2d_( &ictxt, &np, &ione, buff, &ione,
                      &myrow, &ixcol );
            zzdotu_( &np, dotu,
                     buff, &ione,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotu, &ione, &mone, &mycol );
         }
      }
   }
   else       /* X and Y are not distributed along the same direction */
   {
      lcm = ilcm_( &nprow, &npcol );
      if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
      {                     /* X is distributed over a process column */
         lcmp = lcm / nprow;
         nz = (*jy-1) % desc_Y[NB_];
         nn = *n + nz;
         tmp1 = nn / desc_Y[MB_];
         np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
         tmp1 = np0 / desc_X[MB_];
         wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp );
         wksz = np + wksz;

         buff = (complex16 *)getpbbuf( "PZDOTU", wksz*sizeof(complex16) );

         if( mycol == iycol )
            jjy -= nz;
         if( myrow == ixrow )
            np -= nz;
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
                   &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol,
                   &ixrow, &ixcol, buff+np );
         if( mycol == ixcol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zzdotu_( &np, dotu, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                     incx, buff, &ione );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotu, &ione, &mone, &mycol );
         }
         if( myrow == iyrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == ixcol )
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                        &ione, &ione, dotu, &ione );
            else
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                        &ione, &ione, dotu, &ione, &myrow, &ixcol );
         }
      }
      else                  /* Y is distributed over a process column */
      {
         lcmp = lcm / nprow;
         nz = (*jx-1) % desc_X[NB_];
         nn = *n + nz;
         tmp1 = nn / desc_X[MB_];
         np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
         tmp1 = np0 / desc_Y[MB_];
         wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
         wksz = np + wksz;

         buff = (complex16 *)getpbbuf( "PZDOTU", wksz*sizeof(complex16) );

         if( myrow == iyrow )
            np -= nz;
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                   &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
                   &iyrow, &iycol, buff+np );
         if( mycol == iycol )
         {
            cctop = ptop( COMBINE, COLUMN, TOPGET );
            zzdotu_( &np, dotu, buff, &ione,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
            zgsum2d_( &ictxt, C2F_CHAR( COLUMN ), C2F_CHAR( cctop ),
                      &ione, &ione, dotu, &ione, &mone, &mycol );
         }
         if( myrow == ixrow )
         {
            rbtop = ptop( BROADCAST, ROW, TOPGET );
            if( mycol == iycol )
               zgebs2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotu, &ione );
            else
               zgebr2d_( &ictxt, C2F_CHAR( ROW ), C2F_CHAR( rbtop ),
                         &ione, &ione, dotu, &ione, &myrow, &iycol );
         }
      }
   }
}
SHAR_EOF
fi # end of overwriting check
if test -f 'pzlaconsb.f'
then
	echo shar: will not over-write existing file "'pzlaconsb.f'"
else
cat << "SHAR_EOF" > 'pzlaconsb.f'
      SUBROUTINE PZLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF,
     $                      LWORK )
*
*  Mark R. Fahey
*  May 28, 1999
*
*
*     .. Scalar Arguments ..
      INTEGER            I, L, LWORK, M
      COMPLEX*16         H33, H43H34, H44
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * )
      COMPLEX*16         A( * ), BUF( * )
*     ..
*
*  Purpose
*  =======
*
*  PZLACONSB looks for two consecutive small subdiagonal elements by
*     seeing the effect of starting a double shift QR iteration
*     given by H44, H33, & H43H34 and see if this would make a
*     subdiagonal negligible.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  A       (global input) COMPLEX*16 array, dimension
*          (DESCA(LLD_),*)
*          On entry, the Hessenberg matrix whose tridiagonal part is
*          being scanned.
*          Unchanged on exit.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  I       (global input) INTEGER
*          The global location of the bottom of the unreduced
*          submatrix of A.
*          Unchanged on exit.
*
*  L       (global input) INTEGER
*          The global location of the top of the unreduced submatrix
*          of A.
*          Unchanged on exit.
*
*  M       (global output) INTEGER
*          On exit, this yields the starting location of the QR double
*          shift.  This will satisfy: L <= M  <= I-2.
*
*  H44
*  H33
*  H43H34  (global input) COMPLEX*16
*          These three values are for the double shift QR iteration.
*
*  BUF     (local output) COMPLEX*16 array of size LWORK.
*
*  LWORK   (global input) INTEGER
*          On exit, LWORK is the size of the work buffer.
*          This must be at least 7*Ceil( Ceil( (I-L)/HBL ) /
*                                        LCM(NPROW,NPCOL) )
*          Here LCM is least common multiple, and NPROWxNPCOL is the
*          logical grid size.
*
*  Logic:
*  ======
*
*        Two consecutive small subdiagonal elements will stall
*        convergence of a double shift if their product is small
*        relatively even if each is not very small.  Thus it is
*        necessary to scan the "tridiagonal portion of the matrix."  In
*        the LAPACK algorithm ZLAHQR, a loop of M goes from I-2 down to
*        L and examines
*        H(m,m),H(m+1,m+1),H(m+1,m),H(m,m+1),H(m-1,m-1),H(m,m-1), and
*        H(m+2,m-1).  Since these elements may be on separate
*        processors, the first major loop (10) goes over the tridiagonal
*        and has each node store whatever values of the 7 it has that
*        the node owning H(m,m) does not.  This will occur on a border
*        and can happen in no more than 3 locations per block assuming
*        square blocks.  There are 5 buffers that each node stores these
*        values:  a buffer to send diagonally down and right, a buffer
*        to send up, a buffer to send left, a buffer to send diagonally
*        up and left and a buffer to send right.  Each of these buffers
*        is actually stored in one buffer BUF where BUF(ISTR1+1) starts
*        the first buffer, BUF(ISTR2+1) starts the second, etc..  After
*        the values are stored, if there are any values that a node
*        needs, they will be sent and received.  Then the next major
*        loop passes over the data and searches for two consecutive
*        small subdiagonals.
*
*  Notes:
*
*     This routine does a global maximum and must be called by all
*     processes.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*     ..
*     .. Local Scalars ..
      INTEGER            CONTXT, DOWN, HBL, IBUF1, IBUF2, IBUF3, IBUF4,
     $                   IBUF5, ICOL1, II, IRCV1, IRCV2, IRCV3, IRCV4,
     $                   IRCV5, IROW1, ISRC, ISTR1, ISTR2, ISTR3, ISTR4,
     $                   ISTR5, JJ, JSRC, LDA, LEFT, MODKM1, MYCOL,
     $                   MYROW, NPCOL, NPROW, NUM, RIGHT, UP
      DOUBLE PRECISION   S, TST1, ULP
      COMPLEX*16         CDUM, H00, H10, H11, H12, H21, H22, H33S, H44S, 
     $                   V1, V2, V3
*     ..
*     .. External Functions ..
      INTEGER            ILCM
      DOUBLE PRECISION   PDLAMCH
      EXTERNAL           ILCM, PDLAMCH
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, IGAMX2D, INFOG2L, PXERBLA, 
     $                   ZGERV2D, ZGESD2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, DIMAG, MOD
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
      HBL = DESCA( MB_ )
      CONTXT = DESCA( CTXT_ )
      LDA = DESCA( LLD_ )
      ULP = PDLAMCH( CONTXT, 'PRECISION' )
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
      LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
      RIGHT = MOD( MYCOL+1, NPCOL )
      UP = MOD( MYROW+NPROW-1, NPROW )
      DOWN = MOD( MYROW+1, NPROW )
      NUM = NPROW*NPCOL
*
*     BUFFER1 starts at BUF(ISTR1+1) and will contain IBUF1 elements
*     BUFFER2 starts at BUF(ISTR2+1) and will contain IBUF2 elements
*     BUFFER3 starts at BUF(ISTR3+1) and will contain IBUF3 elements
*     BUFFER4 starts at BUF(ISTR4+1) and will contain IBUF4 elements
*     BUFFER5 starts at BUF(ISTR5+1) and will contain IBUF5 elements
*
      ISTR1 = 0
      ISTR2 = ( ( I-L-1 ) / HBL )
      IF( ISTR2*HBL.LT.( I-L-1 ) )
     $   ISTR2 = ISTR2 + 1
      II = ISTR2 / ILCM( NPROW, NPCOL )
      IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN
         ISTR2 = II + 1
      ELSE
         ISTR2 = II
      END IF
      IF( LWORK.LT.7*ISTR2 ) THEN
         CALL PXERBLA( CONTXT, 'PZLACONSB', 10 )
         RETURN
      END IF
      ISTR3 = 3*ISTR2
      ISTR4 = ISTR3 + ISTR2
      ISTR5 = ISTR3 + ISTR3
      CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1,
     $              ICOL1, II, JJ )
      MODKM1 = MOD( I-3+HBL, HBL )
*
*     Copy our relevant pieces of triadiagonal that we owe into
*     5 buffers to send to whomever owns H(M,M) as M moves diagonally
*     up the tridiagonal
*
      IBUF1 = 0
      IBUF2 = 0
      IBUF3 = 0
      IBUF4 = 0
      IBUF5 = 0
      IRCV1 = 0
      IRCV2 = 0
      IRCV3 = 0
      IRCV4 = 0
      IRCV5 = 0
      DO 10 M = I - 2, L, -1
         IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND.
     $       ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN
*
*           We must pack H(M-1,M-1) and send it diagonal down
*
            IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN
               CALL INFOG2L( M-1, M-1, DESCA, NPROW, NPCOL, MYROW,
     $                       MYCOL, IROW1, ICOL1, ISRC, JSRC )
               IBUF1 = IBUF1 + 1
               BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 )
            END IF
         END IF
         IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND.
     $       ( RIGHT.EQ.JJ ) .AND. ( M.GT.L ) ) THEN
*
*           We must pack H(M  ,M-1) and send it right
*
            IF( NPCOL.GT.1 ) THEN
               CALL INFOG2L( M, M-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW1, ICOL1, ISRC, JSRC )
               IBUF5 = IBUF5 + 1
               BUF( ISTR5+IBUF5 ) = A( ( ICOL1-1 )*LDA+IROW1 )
            END IF
         END IF
         IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND.
     $       ( MYCOL.EQ.JJ ) ) THEN
*
*           We must pack H(M+1,M) and send it up
*
            IF( NPROW.GT.1 ) THEN
               CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW1, ICOL1, ISRC, JSRC )
               IBUF2 = IBUF2 + 1
               BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 )
            END IF
         END IF
         IF( ( MODKM1.EQ.HBL-1 ) .AND. ( MYROW.EQ.II ) .AND.
     $       ( LEFT.EQ.JJ ) ) THEN
*
*           We must pack H(M  ,M+1) and send it left
*
            IF( NPCOL.GT.1 ) THEN
               CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW1, ICOL1, ISRC, JSRC )
               IBUF3 = IBUF3 + 1
               BUF( ISTR3+IBUF3 ) = A( ( ICOL1-1 )*LDA+IROW1 )
            END IF
         END IF
         IF( ( MODKM1.EQ.HBL-1 ) .AND. ( UP.EQ.II ) .AND.
     $       ( LEFT.EQ.JJ ) ) THEN
*
*           We must pack H(M+1,M+1) & H(M+2,M+1) and send it
*           diagonally up
*
            IF( ( UP.NE.MYROW ) .OR. ( LEFT.NE.MYCOL ) ) THEN
               CALL INFOG2L( M+1, M+1, DESCA, NPROW, NPCOL, MYROW,
     $                       MYCOL, IROW1, ICOL1, ISRC, JSRC )
               IBUF4 = IBUF4 + 2
               BUF( ISTR4+IBUF4-1 ) = A( ( ICOL1-1 )*LDA+IROW1 )
               BUF( ISTR4+IBUF4 ) = A( ( ICOL1-1 )*LDA+IROW1+1 )
            END IF
         END IF
         IF( ( MODKM1.EQ.HBL-2 ) .AND. ( UP.EQ.II ) .AND.
     $       ( MYCOL.EQ.JJ ) ) THEN
*
*           We must pack H(M+2,M+1) and send it up
*
            IF( NPROW.GT.1 ) THEN
               CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW,
     $                       MYCOL, IROW1, ICOL1, ISRC, JSRC )
               IBUF2 = IBUF2 + 1
               BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 )
            END IF
         END IF
*
*        Add up the receives
*
         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
            IF( ( MODKM1.EQ.0 ) .AND. ( M.GT.L ) .AND.
     $          ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN
*
*              We must receive H(M-1,M-1) from diagonal up
*
               IRCV1 = IRCV1 + 1
            END IF
            IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) .AND. ( M.GT.L ) )
     $           THEN
*
*              We must receive H(M  ,M-1) from left
*
               IRCV5 = IRCV5 + 1
            END IF
            IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPROW.GT.1 ) ) THEN
*
*              We must receive H(M+1,M  ) from down
*
               IRCV2 = IRCV2 + 1
            END IF
            IF( ( MODKM1.EQ.HBL-1 ) .AND. ( NPCOL.GT.1 ) ) THEN
*
*              We must receive H(M  ,M+1) from right
*
               IRCV3 = IRCV3 + 1
            END IF
            IF( ( MODKM1.EQ.HBL-1 ) .AND.
     $          ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.1 ) ) ) THEN
*
*              We must receive H(M+1:M+2,M+1) from diagonal down
*
               IRCV4 = IRCV4 + 2
            END IF
            IF( ( MODKM1.EQ.HBL-2 ) .AND. ( NPROW.GT.1 ) ) THEN
*
*              We must receive H(M+2,M+1) from down
*
               IRCV2 = IRCV2 + 1
            END IF
         END IF
*
*        Possibly change owners (occurs only when MOD(M-1,HBL) = 0)
*
         IF( MODKM1.EQ.0 ) THEN
            II = II - 1
            JJ = JJ - 1
            IF( II.LT.0 )
     $         II = NPROW - 1
            IF( JJ.LT.0 )
     $         JJ = NPCOL - 1
         END IF
         MODKM1 = MODKM1 - 1
         IF( MODKM1.LT.0 )
     $      MODKM1 = HBL - 1
   10 CONTINUE
*
*
*     Send data on to the appropriate node if there is any data to send
*
      IF( IBUF1.GT.0 ) THEN
         CALL ZGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN,
     $                 RIGHT )
      END IF
      IF( IBUF2.GT.0 ) THEN
         CALL ZGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, UP,
     $                 MYCOL )
      END IF
      IF( IBUF3.GT.0 ) THEN
         CALL ZGESD2D( CONTXT, IBUF3, 1, BUF( ISTR3+1 ), IBUF3, MYROW,
     $                 LEFT )
      END IF
      IF( IBUF4.GT.0 ) THEN
         CALL ZGESD2D( CONTXT, IBUF4, 1, BUF( ISTR4+1 ), IBUF4, UP,
     $                 LEFT )
      END IF
      IF( IBUF5.GT.0 ) THEN
         CALL ZGESD2D( CONTXT, IBUF5, 1, BUF( ISTR5+1 ), IBUF5, MYROW,
     $                 RIGHT )
      END IF
*
*     Receive appropriate data if there is any
*
      IF( IRCV1.GT.0 ) THEN
         CALL ZGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP,
     $                 LEFT )
      END IF
      IF( IRCV2.GT.0 ) THEN
         CALL ZGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, DOWN,
     $                 MYCOL )
      END IF
      IF( IRCV3.GT.0 ) THEN
         CALL ZGERV2D( CONTXT, IRCV3, 1, BUF( ISTR3+1 ), IRCV3, MYROW,
     $                 RIGHT )
      END IF
      IF( IRCV4.GT.0 ) THEN
         CALL ZGERV2D( CONTXT, IRCV4, 1, BUF( ISTR4+1 ), IRCV4, DOWN,
     $                 RIGHT )
      END IF
      IF( IRCV5.GT.0 ) THEN
         CALL ZGERV2D( CONTXT, IRCV5, 1, BUF( ISTR5+1 ), IRCV5, MYROW,
     $                 LEFT )
      END IF
*
*     Start main loop
*
      IBUF1 = 0
      IBUF2 = 0
      IBUF3 = 0
      IBUF4 = 0
      IBUF5 = 0
      CALL INFOG2L( I-2, I-2, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1,
     $              ICOL1, II, JJ )
      MODKM1 = MOD( I-3+HBL, HBL )
      IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND.
     $    ( MODKM1.NE.HBL-1 ) ) THEN
         CALL INFOG2L( I-2, I-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                 IROW1, ICOL1, ISRC, JSRC )
      END IF
*
*     Look for two consecutive small subdiagonal elements.
*
      DO 20 M = I - 2, L, -1
*
*        Determine the effect of starting the double-shift QR
*        iteration at row M, and see if this would make H(M,M-1)
*        negligible.
*
         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
            IF( MODKM1.EQ.0 ) THEN
               H22 = A( ( ICOL1-1 )*LDA+IROW1+1 )
               H11 = A( ( ICOL1-2 )*LDA+IROW1 )
               V3 = A( ( ICOL1-1 )*LDA+IROW1+2 )
               H21 = A( ( ICOL1-2 )*LDA+IROW1+1 )
               H12 = A( ( ICOL1-1 )*LDA+IROW1 )
               IF( M.GT.L ) THEN
                  IF( NUM.GT.1 ) THEN
                     IBUF1 = IBUF1 + 1
                     H00 = BUF( ISTR1+IBUF1 )
                  ELSE
                     H00 = A( ( ICOL1-3 )*LDA+IROW1-1 )
                  END IF
                  IF( NPCOL.GT.1 ) THEN
                     IBUF5 = IBUF5 + 1
                     H10 = BUF( ISTR5+IBUF5 )
                  ELSE
                     H10 = A( ( ICOL1-3 )*LDA+IROW1 )
                  END IF
               END IF
            END IF
            IF( MODKM1.EQ.HBL-1 ) THEN
               CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW1, ICOL1, ISRC, JSRC )
               H11 = A( ( ICOL1-1 )*LDA+IROW1 )
               IF( NUM.GT.1 ) THEN
                  IBUF4 = IBUF4 + 2
                  H22 = BUF( ISTR4+IBUF4-1 )
                  V3 = BUF( ISTR4+IBUF4 )
               ELSE
                  H22 = A( ICOL1*LDA+IROW1+1 )
                  V3 = A( ( ICOL1+1 )*LDA+IROW1+1 )
               END IF
               IF( NPROW.GT.1 ) THEN
                  IBUF2 = IBUF2 + 1
                  H21 = BUF( ISTR2+IBUF2 )
               ELSE
                  H21 = A( ( ICOL1-1 )*LDA+IROW1+1 )
               END IF
               IF( NPCOL.GT.1 ) THEN
                  IBUF3 = IBUF3 + 1
                  H12 = BUF( ISTR3+IBUF3 )
               ELSE
                  H12 = A( ICOL1*LDA+IROW1 )
               END IF
               IF( M.GT.L ) THEN
                  H00 = A( ( ICOL1-2 )*LDA+IROW1-1 )
                  H10 = A( ( ICOL1-2 )*LDA+IROW1 )
               END IF
*
*              Adjust ICOL1 for next iteration where MODKM1=HBL-2
*
               ICOL1 = ICOL1 + 1
            END IF
            IF( MODKM1.EQ.HBL-2 ) THEN
               H22 = A( ( ICOL1-1 )*LDA+IROW1+1 )
               H11 = A( ( ICOL1-2 )*LDA+IROW1 )
               IF( NPROW.GT.1 ) THEN
                  IBUF2 = IBUF2 + 1
                  V3 = BUF( ISTR2+IBUF2 )
               ELSE
                  V3 = A( ( ICOL1-1 )*LDA+IROW1+2 )
               END IF
               H21 = A( ( ICOL1-2 )*LDA+IROW1+1 )
               H12 = A( ( ICOL1-1 )*LDA+IROW1 )
               IF( M.GT.L ) THEN
                  H00 = A( ( ICOL1-3 )*LDA+IROW1-1 )
                  H10 = A( ( ICOL1-3 )*LDA+IROW1 )
               END IF
            END IF
            IF( ( MODKM1.LT.HBL-2 ) .AND. ( MODKM1.GT.0 ) ) THEN
               H22 = A( ( ICOL1-1 )*LDA+IROW1+1 )
               H11 = A( ( ICOL1-2 )*LDA+IROW1 )
               V3 = A( ( ICOL1-1 )*LDA+IROW1+2 )
               H21 = A( ( ICOL1-2 )*LDA+IROW1+1 )
               H12 = A( ( ICOL1-1 )*LDA+IROW1 )
               IF( M.GT.L ) THEN
                  H00 = A( ( ICOL1-3 )*LDA+IROW1-1 )
                  H10 = A( ( ICOL1-3 )*LDA+IROW1 )
               END IF
            END IF
            H44S = H44 - H11
            H33S = H33 - H11
            V1 = ( H33S*H44S-H43H34 ) / H21 + H12
            V2 = H22 - H11 - H33S - H44S
            S = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 )
            V1 = V1 / S
            V2 = V2 / S
            V3 = V3 / S
            IF( M.EQ.L )
     $         GO TO 30
            TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+
     $                           CABS1( H22 ) )
            IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).LE.ULP*TST1 )
     $         GO TO 30
*
*           Slide indices diagonally up one for next iteration
*
            IROW1 = IROW1 - 1
            ICOL1 = ICOL1 - 1
         END IF
         IF( M.EQ.L ) THEN
*
*           Stop regardless of which node we are
*
            GO TO 30
         END IF
*
*        Possibly change owners if on border
*
         IF( MODKM1.EQ.0 ) THEN
            II = II - 1
            JJ = JJ - 1
            IF( II.LT.0 )
     $         II = NPROW - 1
            IF( JJ.LT.0 )
     $         JJ = NPCOL - 1
         END IF
         MODKM1 = MODKM1 - 1
         IF( MODKM1.LT.0 )
     $      MODKM1 = HBL - 1
   20 CONTINUE
   30 CONTINUE
*
      CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, M, 1, L, L, -1, -1, -1 )
*
      RETURN
*
*     End of PZLACONSB
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzlacp3.f'
then
	echo shar: will not over-write existing file "'pzlacp3.f'"
else
cat << "SHAR_EOF" > 'pzlacp3.f'
      SUBROUTINE PZLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV )
*
*  Mark R. Fahey
*  May 28, 1999
*
*     .. Scalar Arguments ..
      INTEGER            I, II, JJ, LDB, M, REV
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * )
      COMPLEX*16         A( * ), B( LDB, * )
*     ..
*
*  Purpose
*  =======
*
*  PZLACP3 is an auxiliary routine that copies from a global parallel
*    array into a local replicated array or vise versa.  Notice that
*    the entire submatrix that is copied gets placed on one node or
*    more.  The receiving node can be specified precisely, or all nodes
*    can receive, or just one row or column of nodes.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  M       (global input) INTEGER
*          M is the order of the square submatrix that is copied.
*          M >= 0.
*          Unchanged on exit
*
*  I       (global input) INTEGER
*          A(I,I) is the global location that the copying starts from.
*          Unchanged on exit.
*
*  A       (global input/output) COMPLEX*16 array, dimension
*          (DESCA(LLD_),*)
*          On entry, the parallel matrix to be copied into or from.
*          On exit, if REV=1, the copied data.
*          Unchanged on exit if REV=0.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  B       (local input/output) COMPLEX*16 array of size (LDB,M)
*          If REV=0, this is the global portion of the array
*             A(I:I+M-1,I:I+M-1).
*          If REV=1, this is the unchanged on exit.
*
*  LDB     (local input) INTEGER
*          The leading dimension of B.
*
*  II      (global input) INTEGER
*          By using REV 0 & 1, data can be sent out and returned again.
*          If REV=0, then II is destination row index for the node(s)
*             receiving the replicated B.  
*             If II>=0,JJ>=0, then node (II,JJ) receives the data
*             If II=-1,JJ>=0, then all rows in column JJ receive the data
*             If II>=0,JJ=-1, then all cols in row II receive the data
*             If II=-1,JJ=-1, then all nodes receive the data
*          If REV<>0, then II is the source row index for the node(s)
*             sending the replicated B.
*
*  JJ      (global input) INTEGER
*          Similar description as II above
*
*  REV     (global input) INTEGER
*          Use REV = 0 to send global A into locally replicated B 
*             (on node (II,JJ)).
*          Use REV <> 0 to send locally replicated B from node (II,JJ)
*             to its owner (which changes depending on its location in A)
*             into the global A.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      COMPLEX*16         ZERO
      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
*     ..
*     .. Local Scalars ..
      INTEGER            COL, CONTXT, HBL, ICOL1, ICOL2, IDI, IDJ, IFIN,
     $                   III, IROW1, IROW2, ISTOP, ISTOPI, ISTOPJ, ITMP,
     $                   JJJ, LDA, MYCOL, MYROW, NPCOL, NPROW, ROW
*     ..
*     .. External Functions ..
      INTEGER            NUMROC
      EXTERNAL           NUMROC
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, INFOG1L, ZGEBR2D, ZGEBS2D, 
     $                   ZGERV2D, ZGESD2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MIN, MOD
*     ..
*     .. Executable Statements ..
*
      IF( M.LE.0 )
     $   RETURN
*
      HBL = DESCA( MB_ )
      CONTXT = DESCA( CTXT_ )
      LDA = DESCA( LLD_ )
*
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
*
      IF( REV.EQ.0 ) THEN
         DO 20 IDI = 1, M
            DO 10 IDJ = 1, M
               B( IDI, IDJ ) = ZERO
   10       CONTINUE
   20    CONTINUE
      END IF
*
      IFIN = I + M - 1
*
      IF( MOD( I+HBL, HBL ).NE.0 ) THEN
         ISTOP = MIN( I+HBL-MOD( I+HBL, HBL ), IFIN )
      ELSE
         ISTOP = I
      END IF
      IDJ = I
      ISTOPJ = ISTOP
      IF( IDJ.LE.IFIN ) THEN
   30    CONTINUE
         IDI = I
         ISTOPI = ISTOP
         IF( IDI.LE.IFIN ) THEN
   40       CONTINUE
            ROW = MOD( ( IDI-1 ) / HBL, NPROW )
            COL = MOD( ( IDJ-1 ) / HBL, NPCOL )
            CALL INFOG1L( IDI, HBL, NPROW, ROW, 0, IROW1, ITMP )
            IROW2 = NUMROC( ISTOPI, HBL, ROW, 0, NPROW )
            CALL INFOG1L( IDJ, HBL, NPCOL, COL, 0, ICOL1, ITMP )
            ICOL2 = NUMROC( ISTOPJ, HBL, COL, 0, NPCOL )
            IF( ( MYROW.EQ.ROW ) .AND. ( MYCOL.EQ.COL ) ) THEN
               IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN
*
*                 Send the message to everyone
*
                  IF( REV.EQ.0 ) THEN
                     CALL ZGEBS2D( CONTXT, 'All', ' ', IROW2-IROW1+1,
     $                             ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+
     $                             IROW1 ), LDA )
                  END IF
               END IF
               IF( ( II.EQ.-1 ) .AND. ( JJ.NE.-1 ) ) THEN
*
*                 Send the message to Column MYCOL which better be JJ
*
                  IF( REV.EQ.0 ) THEN
                     CALL ZGEBS2D( CONTXT, 'Col', ' ', IROW2-IROW1+1,
     $                             ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+
     $                             IROW1 ), LDA )
                  END IF
               END IF
               IF( ( II.NE.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN
*
*                 Send the message to Row MYROW which better be II
*
                  IF( REV.EQ.0 ) THEN
                     CALL ZGEBS2D( CONTXT, 'Row', ' ', IROW2-IROW1+1,
     $                             ICOL2-ICOL1+1, A( ( ICOL1-1 )*LDA+
     $                             IROW1 ), LDA )
                  END IF
               END IF
               IF( ( II.NE.-1 ) .AND. ( JJ.NE.-1 ) .AND.
     $             ( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) ) THEN
*
*                 Recv/Send the message to (II,JJ)
*
                  IF( REV.EQ.0 ) THEN
                     CALL ZGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
     $                             A( ( ICOL1-1 )*LDA+IROW1 ), LDA, II,
     $                             JJ )
                  ELSE
                     CALL ZGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
     $                             B( IDI-I+1, IDJ-I+1 ), LDB, II, JJ )
                  END IF
               END IF
               IF( REV.EQ.0 ) THEN
                  DO 60 JJJ = ICOL1, ICOL2
                     DO 50 III = IROW1, IROW2
                        B( IDI+III-IROW1+1-I, IDJ+JJJ-ICOL1+1-I )
     $                     = A( ( JJJ-1 )*LDA+III )
   50                CONTINUE
   60             CONTINUE
               ELSE
                  DO 80 JJJ = ICOL1, ICOL2
                     DO 70 III = IROW1, IROW2
                        A( ( JJJ-1 )*LDA+III ) = B( IDI+III-IROW1+1-I,
     $                     IDJ+JJJ-ICOL1+1-I )
   70                CONTINUE
   80             CONTINUE
               END IF
            ELSE
               IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.-1 ) ) THEN
                  IF( REV.EQ.0 ) THEN
                     CALL ZGEBR2D( CONTXT, 'All', ' ', IROW2-IROW1+1,
     $                             ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ),
     $                             LDB, ROW, COL )
                  END IF
               END IF
               IF( ( II.EQ.-1 ) .AND. ( JJ.EQ.MYCOL ) ) THEN
                  IF( REV.EQ.0 ) THEN
                     CALL ZGEBR2D( CONTXT, 'Col', ' ', IROW2-IROW1+1,
     $                             ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ),
     $                             LDB, ROW, COL )
                  END IF
               END IF
               IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.-1 ) ) THEN
                  IF( REV.EQ.0 ) THEN
                     CALL ZGEBR2D( CONTXT, 'Row', ' ', IROW2-IROW1+1,
     $                             ICOL2-ICOL1+1, B( IDI-I+1, IDJ-I+1 ),
     $                             LDB, ROW, COL )
                  END IF
               END IF
               IF( ( II.EQ.MYROW ) .AND. ( JJ.EQ.MYCOL ) ) THEN
                  IF( REV.EQ.0 ) THEN
                     CALL ZGERV2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
     $                             B( IDI-I+1, IDJ-I+1 ), LDB, ROW,
     $                             COL )
                  ELSE
                     CALL ZGESD2D( CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
     $                             B( IDI-I+1, IDJ-I+1 ), LDB, ROW,
     $                             COL )
*                    CALL ZGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
*    $                            A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL)
                  END IF
               END IF
            END IF
            IDI = ISTOPI + 1
            ISTOPI = MIN( ISTOPI+HBL, IFIN )
            IF( IDI.LE.IFIN )
     $         GO TO 40
         END IF
         IDJ = ISTOPJ + 1
         ISTOPJ = MIN( ISTOPJ+HBL, IFIN )
         IF( IDJ.LE.IFIN )
     $      GO TO 30
      END IF
      RETURN
*
*     End of PZLACP3
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzlahqr.f'
then
	echo shar: will not over-write existing file "'pzlahqr.f'"
else
cat << "SHAR_EOF" > 'pzlahqr.f'
      SUBROUTINE PZLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, W, ILOZ, 
     $                    IHIZ, Z, DESCZ, WORK, LWORK, IWORK, ILWORK, 
     $                    INFO )
*
*  Mark R. Fahey
*  June 22, 2000
*
*     .. Scalar Arguments ..
      LOGICAL            WANTT, WANTZ
      INTEGER            IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCZ( * ), IWORK( * )
      COMPLEX*16         A( * ), W( * ), WORK( * ), Z( * )
*     ..
*
*  Purpose
*  =======
*
*  PZLAHQR is an auxiliary routine used to find the Schur decomposition
*    and or eigenvalues of a matrix already in Hessenberg form from
*    cols ILO to IHI.
*  If Z = I, and WANTT=WANTZ=.TRUE., H gets replaced with Z'HZ, 
*    with Z'Z=I, and H in Schur form.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCp(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCp( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCq( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCp() and LOCq() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  WANTT   (global input) LOGICAL
*          = .TRUE. : the full Schur form T is required;
*          = .FALSE.: only eigenvalues are required.
*
*  WANTZ   (global input) LOGICAL
*          = .TRUE. : the matrix of Schur vectors Z is required;
*          = .FALSE.: Schur vectors are not required.
*
*  N       (global input) INTEGER
*          The order of the Hessenberg matrix A (and Z if WANTZ).
*          N >= 0.
*
*  ILO     (global input) INTEGER
*  IHI     (global input) INTEGER
*          It is assumed that A is already upper quasi-triangular in
*          rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
*          ILO = 1). PZLAHQR works primarily with the Hessenberg
*          submatrix in rows and columns ILO to IHI, but applies
*          transformations to all of H if WANTT is .TRUE..
*          1 <= ILO <= max(1,IHI); IHI <= N.
*
*  A       (global input/output) COMPLEX*16 array, dimension
*          (DESCA(LLD_),*)
*          On entry, the upper Hessenberg matrix A.
*          On exit, if WANTT is .TRUE., A is upper triangular in rows 
*          and columns ILO:IHI.  If WANTT is .FALSE., the contents of 
*          A are unspecified on exit.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  W      (global replicated output) COMPLEX*16 array, dimension (N)
*          The computed eigenvalues ILO to IHI are stored in the 
*          corresponding elements of W.  If WANTT is .TRUE., the
*          eigenvalues are stored in the same order as on the diagonal
*          of the Schur form returned in A.  A may be returned with
*          larger diagonal blocks until the next release.
*
*  ILOZ    (global input) INTEGER
*  IHIZ    (global input) INTEGER
*          Specify the rows of Z to which transformations must be
*          applied if WANTZ is .TRUE..
*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
*
*  Z       (global input/output) COMPLEX*16 array.
*          If WANTZ is .TRUE., on entry Z must contain the current
*          matrix Z of transformations accumulated by PZHSEQR, and on
*          exit Z has been updated; transformations are applied only to
*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).
*          If WANTZ is .FALSE., Z is not referenced.
*
*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix Z.
*
*  WORK    (local output) COMPLEX*16 array of size LWORK
*          (Unless LWORK=-1, in which case WORK must be at least size 1)
*
*  LWORK   (local input) INTEGER
*          WORK(LWORK) is a local array and LWORK is assumed big enough
*          so that LWORK >= 3*N +
*                MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCq(N),
*                     7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) + 
*                MAX( 2*N, (8*LCM(NPROW,NPCOL)+2)**2 )
*          If LWORK=-1, then WORK(1) gets set to the above number and
*          the code returns immediately.
*
*  IWORK   (global and local input) INTEGER array of size ILWORK
*          This will hold some of the IBLK integer arrays.
*          This is held as a place holder for a future release.
*          Currently unreferenced.
*
*  ILWORK  (local input) INTEGER
*          This will hold the size of the IWORK array.
*          This is held as a place holder for a future release.
*          Currently unreferenced.
*
*  INFO    (global output) INTEGER
*          < 0: parameter number -INFO incorrect or inconsistent
*          = 0: successful exit
*          > 0: PZLAHQR failed to compute all the eigenvalues ILO to IHI
*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
*               elements i+1:ihi of W contains those eigenvalues
*               which have been successfully computed.
*
*  Logic:
*       This algorithm is very similar to DLAHQR.  Unlike DLAHQR,
*       instead of sending one double shift through the largest
*       unreduced submatrix, this algorithm sends multiple double shifts
*       and spaces them apart so that there can be parallelism across
*       several processor row/columns.  Another critical difference is
*       that this algorithm aggregrates multiple transforms together in
*       order to apply them in a block fashion.
*
*  Important Local Variables:
*       IBLK = The maximum number of bulges that can be computed.
*           Currently fixed.  Future releases this won't be fixed.
*       HBL  = The square block size (HBL=DESCA(MB_)=DESCA(NB_))
*       ROTN = The number of transforms to block together
*       NBULGE = The number of bulges that will be attempted on the
*           current submatrix.
*       IBULGE = The current number of bulges started.
*       K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).
*
*  Subroutines:
*       From LAPACK, this routine calls:
*           ZLAHQR     -> Serial QR used to determine shifts and eigenvalues
*           ZLARFG     -> Determine the Householder transforms
*
*       This ScaLAPACK, this routine calls:
*           PZLACONSB  -> To determine where to start each iteration
*           ZLAMSH     -> Sends multiple shifts through a small submatrix to
*                         see how the consecutive subdiagonals change (if
*                         PZLACONSB indicates we can start a run in the middle)
*           PZLAWIL    -> Given the shift, get the transformation
*           PZLACP3    -> Parallel array to local replicated array copy & back.
*           ZLAREF     -> Row/column reflector applier.  Core routine here.
*           PZLASMSUB  -> Finds negligible subdiagonal elements.
*
*  Current Notes and/or Restrictions:
*       1.) This code requires the distributed block size to be square
*           and at least six (6); unlike simpler codes like LU, this
*           algorithm is extremely sensitive to block size.  Unwise
*           choices of too small a block size can lead to bad
*           performance.
*       2.) This code requires A and Z to be distributed identically
*           and have identical contxts.  A future version may allow Z to
*           have a different contxt to 1D row map it to all nodes (so no
*           communication on Z is necessary.)
*       3.) This code does not currently block the initial transforms
*           so that none of the rows or columns for any bulge are
*           completed until all are started.  To offset pipeline
*           start-up it is recommended that at least 2*LCM(NPROW,NPCOL)
*           bulges are used (if possible)
*       4.) The maximum number of bulges currently supported is fixed at
*           32.  In future versions this will be limited only by the
*           incoming WORK and IWORK array.
*       5.) The matrix A must be in upper Hessenberg form.  If elements
*           below the subdiagonal are nonzero, the resulting transforms
*           may be nonsimilar.  This is also true with the LAPACK
*           routine ZLAHQR.
*       6.) For this release, this code has only been tested for 
*           RSRC_=CSRC_=0, but it has been written for the general case.
*       7.) Currently, all the eigenvalues are distributed to all the
*           nodes.  Future releases will probably distribute the
*           eigenvalues by the column partitioning.
*       8.) The internals of this routine are subject to change.
*       9.) To optimize this for your architecture, try tuning ZLAREF.
*       10.) This code has only been tested for WANTZ = .TRUE. and may 
*           behave unpredictably for WANTZ set to .FALSE.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      DOUBLE PRECISION   RZERO, RONE, HALF
      PARAMETER          ( RZERO = 0.0D+0, RONE = 1.0D+0, HALF = 0.5D+0)
      COMPLEX*16         ZERO, ONE
      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ), 
     $                     ONE = ( 1.0D+0, 0.0D+0 ) )
      DOUBLE PRECISION   CONST
      PARAMETER          ( CONST = 1.50D+0 )
      INTEGER            IBLK
      PARAMETER          ( IBLK = 32 )
*     ..
*     .. Local Scalars ..
      LOGICAL            SKIP
      INTEGER            CONTXT, DOWN, HBL, I, I1, I2, IAFIRST,
     $                   IBULGE, ICBUF, ICOL, ICOL1, ICOL2, IDIA,
     $                   IERR, II, IRBUF, IROW, IROW1, IROW2, ISPEC,
     $                   ISTART, ISTARTCOL, ISTARTROW, ISTOP, ISUB,
     $                   ISUP, ITERMAX, ITMP1, ITMP2, ITN, ITS,
     $                   IZBUF, J, JAFIRST, JBLK, JJ, K, KI, L, LCMRC,
     $                   LDA, LDZ, LEFT, LIHIH, LIHIZ, LILOH, LILOZ,
     $                   LOCALI1, LOCALI2, LOCALK, LOCALM, M, MODKM1,
     $                   MYCOL, MYROW, NBULGE, NH, NODE, NPCOL,
     $                   NPROW, NQ, NR, NUM, NZ, RIGHT, ROTN, UP, 
     $                   VECSIDX
      DOUBLE PRECISION   CS, OVFL, S, SMLNUM, ULP, UNFL
      COMPLEX*16         CDUM, H00, H10, H11, H12, H21, H22, H33, H44,
     $                   H43H34, SN, SUM, T1, T1COPY, T2, T3, V1SAVE, 
     $                   V2, V2SAVE, V3, V3SAVE
*     ..
*     .. Local Arrays ..
      INTEGER            ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ),
     $                   K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ),
     $                   KP2ROW( IBLK ), KROW( IBLK )
      COMPLEX*16         S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ),
     $                   VCOPY( 3 )
*     ..
*     .. External Functions ..
      INTEGER            ILCM, NUMROC
      DOUBLE PRECISION   PDLAMCH
      EXTERNAL           ILCM, NUMROC, PDLAMCH
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, IGAMN2D, IGEBR2D, IGEBS2D, 
     $                   INFOG1L, INFOG2L, PDLABAD, PXERBLA, PZLACONSB, 
     $                   PZLACP3, PZLASMSUB, PZLAWIL, ZCOPY, ZGEBR2D,
     $                   ZGEBS2D, ZGERV2D, ZGESD2D, ZGSUM2D, ZLAHQR,
     $                   ZLAMSH, ZLAREF, ZLARFG
*     ..
*     .. Intrinsic Functions ..
*
      INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD, SQRT
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
      INFO = 0
*
      ITERMAX = 30*( IHI-ILO+1 )
      IF( N.EQ.0 )
     $   RETURN
*
*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)
*
      HBL = DESCA( MB_ )
      CONTXT = DESCA( CTXT_ )
      LDA = DESCA( LLD_ )
      IAFIRST = DESCA( RSRC_ )
      JAFIRST = DESCA( CSRC_ )
      LDZ = DESCZ( LLD_ )
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
      NODE = MYROW*NPCOL + MYCOL
      NUM = NPROW*NPCOL
      LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
      RIGHT = MOD( MYCOL+1, NPCOL )
      UP = MOD( MYROW+NPROW-1, NPROW )
      DOWN = MOD( MYROW+1, NPROW )
      LCMRC = ILCM( NPROW, NPCOL )
      IF( ( NPROW.LE.3 ) .OR. ( NPCOL.LE.3 ) ) THEN
         SKIP = .TRUE.
      ELSE
         SKIP = .FALSE.
      END IF
*
*     Determine the number of columns we have so we can check workspace
*
      NQ = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
      JJ = N / HBL
      IF( JJ*HBL.LT.N )
     $   JJ = JJ + 1
      JJ = 7*JJ / LCMRC
      JJ = 3*N+MAX( 2*MAX( LDA, LDZ )+2*NQ, JJ )
      JJ = JJ + MAX( 2*N, (8*LCMRC+2)**2 )
      IF ( LWORK .EQ. -1 ) THEN
         WORK(1) = JJ
         RETURN
      END IF
      IF ( LWORK .LT. JJ ) THEN
         INFO = -14
      END IF
      IF( DESCZ( CTXT_ ).NE.DESCA( CTXT_ ) ) THEN
         INFO = -( 1300+CTXT_ )
      END IF
      IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN
         INFO = -( 700+NB_ )
      END IF
      IF( DESCZ( MB_ ).NE.DESCZ( NB_ ) ) THEN
         INFO = -( 1300+NB_ )
      END IF
      IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN
         INFO = -( 1300+MB_ )
      END IF
      IF( ( DESCA( RSRC_ ).NE.0 ) .OR. ( DESCA( CSRC_ ).NE.0 ) ) THEN
         INFO = -( 700+RSRC_ )
      END IF
      IF( ( DESCZ( RSRC_ ).NE.0 ) .OR. ( DESCZ( CSRC_ ).NE.0 ) ) THEN
         INFO = -( 1300+RSRC_ )
      END IF
      IF( ( ILO.GT.N ) .OR. ( ILO.LT.1 ) ) THEN
         INFO = -4
      END IF
      IF( ( IHI.GT.N ) .OR. ( IHI.LT.1 ) ) THEN
         INFO = -5
      END IF
      IF( HBL.LT.5 ) THEN
         INFO = -( 700+MB_ )
      END IF
      CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1,
     $              -1, -1 )
      IF( INFO.LT.0 ) THEN
         CALL PXERBLA( CONTXT, 'PZLAHQR', -INFO )
         RETURN
      END IF
*
*     Set work array indices
*
      VECSIDX = 0
      IDIA = 3*N
      ISUB = 3*N
      ISUP = 3*N
      IRBUF = 3*N
      ICBUF = 3*N
      IZBUF = 5*N
*
*     Find a value for ROTN
*
      ROTN = HBL / 3
      ROTN = MIN( ROTN, HBL-2 )
      ROTN = MAX( ROTN, 1 )
*
      IF( ILO.EQ.IHI ) THEN
         CALL INFOG2L( ILO, ILO, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                 IROW, ICOL, II, JJ )
         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
            W( ILO ) = A( ( ICOL-1 )*LDA+IROW )
         ELSE 
            W( ILO ) = ZERO
         END IF
         RETURN
      END IF
*
      NH = IHI - ILO + 1
      NZ = IHIZ - ILOZ + 1
*
      CALL INFOG1L( ILOZ, HBL, NPROW, MYROW, IAFIRST, LILOZ, LIHIZ )
      LIHIZ = NUMROC( IHIZ, HBL, MYROW, IAFIRST, NPROW )
*
*     Set machine-dependent constants for the stopping criterion.
*     If NORM(H) <= SQRT(OVFL), overflow should not occur.
*
      UNFL = PDLAMCH( CONTXT, 'SAFE MINIMUM' )
      OVFL = RONE / UNFL
      CALL PDLABAD( CONTXT, UNFL, OVFL )
      ULP = PDLAMCH( CONTXT, 'PRECISION' )
      SMLNUM = UNFL*( NH / ULP )
*
*     I1 and I2 are the indices of the first row and last column of H
*     to which transformations must be applied. If eigenvalues only are
*     being computed, I1 and I2 are set inside the main loop.
*
      IF( WANTT ) THEN
         I1 = 1
         I2 = N
      END IF
*
*     ITN is the total number of QR iterations allowed.
*
      ITN = ITERMAX
*
*     The main loop begins here. I is the loop index and decreases from
*     IHI to ILO in steps of our schur block size (<=2*IBLK). Each
*     iteration of the loop works  with the active submatrix in rows
*     and columns L to I.   Eigenvalues I+1 to IHI have already
*     converged. Either L = ILO or the global A(L,L-1) is negligible
*     so that the matrix splits.
*
      I = IHI
   10 CONTINUE
      L = ILO
      IF( I.LT.ILO )
     $   GO TO 670
*
*     Perform QR iterations on rows and columns ILO to I until a
*     submatrix of order 1 or 2 splits off at the bottom because a
*     subdiagonal element has become negligible.
*
      DO 640 ITS = 0, ITN
*
*        Look for a single small subdiagonal element.
*
         CALL PZLASMSUB( A, DESCA, I, L, K, SMLNUM, WORK( IRBUF+1 ),
     $                   LWORK-IRBUF )
         L = K
*
         IF( L.GT.ILO ) THEN
*
*           H(L,L-1) is negligible
*
            CALL INFOG2L( L, L-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, ITMP1, ITMP2 )
            IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
               A( ( ICOL-1 )*LDA+IROW ) = ZERO
            END IF
            WORK( ISUB+L-1 ) = ZERO
         END IF
*
*        Exit from loop if a submatrix of order 1 or 2 has split off.
*
         IF ( WANTT ) THEN
*           For Schur form, use 2x2 blocks
            IF ( L .GE. I-1 ) THEN
               GO TO 650
            END IF
         ELSE 
*           If we don't want the Schur form, use bigger blocks.
            IF ( L .GE. I-( 2*IBLK-1) ) THEN
               GO TO 650
            END IF
         END IF
*
*        Now the active submatrix is in rows and columns L to I. If
*        eigenvalues only are being computed, only the active submatrix
*        need be transformed.
*
         IF( .NOT.WANTT ) THEN
            I1 = L
            I2 = I
         END IF
*
*        Copy submatrix of size 2*JBLK and prepare to do generalized
*           Wilkinson shift or an exceptional shift
*
         JBLK = MIN( IBLK, ( ( I-L+1 ) / 2 )-1 )
         IF( JBLK.GT.LCMRC ) THEN
*
*           Make sure it's divisible by LCM (we want even workloads!)
*
            JBLK = JBLK - MOD( JBLK, LCMRC )
         END IF
         JBLK = MIN( JBLK, 2*LCMRC )
         JBLK = MAX( JBLK, 1 )
*
         CALL PZLACP3( 2*JBLK, I-2*JBLK+1, A, DESCA, S1, 2*IBLK, -1, -1,
     $                 0 )
         IF( (ITS.EQ.20 .OR. ITS.EQ.40) .AND. (JBLK.GT.1) ) THEN
*
*           Exceptional shift.
*
            DO 20 II = 2*JBLK, 1, -1
               S1( II, II ) = CONST*( CABS1( S1( II, II ) )+
     $                        CABS1( S1( II, II-1 ) ) )
               S1( II, II-1 ) = ZERO
               S1( II-1, II ) = ZERO
   20       CONTINUE
         ELSE
            CALL ZLAHQR( .FALSE., .FALSE., 2*JBLK, 1, 2*JBLK, S1, 2*
     $                  IBLK, WORK( IRBUF+1 ), 1, 2*JBLK, Z, LDZ, IERR )
*
*           Prepare to use Wilkinson's double shift
*
            H44 = S1( 2*JBLK, 2*JBLK )
            H33 = S1( 2*JBLK-1, 2*JBLK-1 )
            H43H34 = S1( 2*JBLK-1, 2*JBLK )*S1( 2*JBLK, 2*JBLK-1 )
*
         END IF
*
*        Look for two consecutive small subdiagonal elements:
*           PZLACONSB is the routine that does this.
*
         CALL PZLACONSB( A, DESCA, I, L, M, H44, H33, H43H34,
     $                   WORK( IRBUF+1 ), LWORK-IRBUF )
*
*        Double-shift QR step
*
*        NBULGE is the number of bulges that will be attempted
*
         ISTOP = MIN( M+ROTN-1-MOD( M-( M / HBL )*HBL-1, ROTN ), I-2 )
         ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) )
         ISTOP = MIN( ISTOP, I2-2 )
         ISTOP = MAX( ISTOP, M )
         NBULGE = ( I-1-ISTOP ) / HBL
*
*        Do not exceed maximum determined.
*
         NBULGE = MIN( NBULGE, JBLK )
         IF( NBULGE.GT.LCMRC ) THEN
*
*           Make sure it's divisible by LCM (we want even workloads!)
*
            NBULGE = NBULGE - MOD( NBULGE, LCMRC )
         END IF
         NBULGE = MAX( NBULGE, 1 )
*
*        If we are starting in the middle because of consecutive small
*           subdiagonal elements, we need to see how many bulges we 
*           can send through without breaking the consecutive small 
*           subdiagonal property.
*
         IF( ( NBULGE.GT.1 ) .AND. ( M.GT.L ) ) THEN
*
*           Copy a chunk of elements from global A(M-1:,M-1:)
*
            CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW1, ICOL1, ITMP1, ITMP2 )
            II = MIN( 4*NBULGE+2, N-M+2 )
            CALL PZLACP3( II, M-1, A, DESCA, WORK( IRBUF+1 ), II, ITMP1,
     $                    ITMP2, 0 )
            IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
*
*              Find a new NBULGE based on the bulges we have.
*
               CALL ZLAMSH( S1, 2*IBLK, NBULGE, JBLK, WORK( IRBUF+1 ),
     $                      II, II, ULP )
               IF( NUM.GT.1 ) THEN
                  CALL IGEBS2D( CONTXT, 'ALL', ' ', 1, 1, NBULGE, 1 )
               END IF
            ELSE
*
*              Everyone needs to receive the new NBULGE
*
               CALL IGEBR2D( CONTXT, 'ALL', ' ', 1, 1, NBULGE, 1, ITMP1,
     $                       ITMP2 )
            END IF
         END IF
*
*        IBULGE is the number of bulges going so far
*
         IBULGE = 1
*
*        "A" row defs : main row transforms from LOCALK to LOCALI2
*
         CALL INFOG1L( M, HBL, NPCOL, MYCOL, JAFIRST, ITMP1, LOCALK )
         LOCALK = NQ
         CALL INFOG1L( 1, HBL, NPCOL, MYCOL, JAFIRST, ICOL1, LOCALI2 )
         LOCALI2 = NUMROC( I2, HBL, MYCOL, JAFIRST, NPCOL )
*
*        "A" col defs : main col transforms from LOCALI1 to LOCALM
*
         CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST, LOCALI1, 
     $                 ICOL1 )
         CALL INFOG1L( 1, HBL, NPROW, MYROW, IAFIRST, LOCALM, ICOL1 )
         ICOL1 = NUMROC( MIN( M+3, I ), HBL, MYROW, IAFIRST, NPROW )
*
*        Which row & column will start the bulges
*
         ISTARTROW = MOD( ( M+1 ) / HBL, NPROW ) + IAFIRST
         ISTARTCOL = MOD( ( M+1 ) / HBL, NPCOL ) + JAFIRST
*
         CALL INFOG1L( M, HBL, NPROW, MYROW, IAFIRST, II, ITMP2 )
         CALL INFOG1L( M, HBL, NPCOL, MYCOL, JAFIRST, JJ, ITMP2 )
         CALL INFOG1L( 1, HBL, NPROW, MYROW, IAFIRST, ISTOP, 
     $                 KP2ROW( 1 ) )
         KP2ROW( 1 ) = NUMROC( M+2, HBL, MYROW, IAFIRST, NPROW )
         CALL INFOG1L( 1, HBL, NPCOL, MYCOL, JAFIRST, ISTOP, 
     $                 KP2COL( 1 ) )
         KP2COL( 1 ) = NUMROC( M+2, HBL, MYCOL, JAFIRST, NPCOL )
*
*        Set all values for bulges.  All bulges are stored in
*          intermediate steps as loops over KI.  Their current "task"
*          over the global M to I-1 values is always K1(KI) to K2(KI).
*          However, because there are many bulges, K1(KI) & K2(KI) might
*          go past that range while later bulges (KI+1,KI+2,etc..) are
*          finishing up.  Even if ROTN=1, in order to minimize border
*          communication sometimes K1(KI)=HBL-2 & K2(KI)=HBL-1 so both
*          border messages can be handled at once.
*
*        Rules:
*              If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2
*              If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1
*              K2(KI)-K1(KI) <= ROTN
*
*        We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit
*        it again when MOD(K1(KI)-1,HBL)=HBL-1.
*
         DO 30 KI = 1, NBULGE
            K1( KI ) = M
            ISTOP = MIN( M+ROTN-1-MOD( M-( M / HBL )*HBL-1, ROTN ),
     $              I-2 )
            ISTOP = MIN( ISTOP, M+HBL-3-MOD( M-1, HBL ) )
            ISTOP = MIN( ISTOP, I2-2 )
            ISTOP = MAX( ISTOP, M )
            IF( ( MOD( M-1, HBL ).EQ.HBL-2 ) .AND.
     $          ( ISTOP.LT.MIN( I-2, I2-2 ) ) ) THEN
               ISTOP = ISTOP + 1
            END IF
            K2( KI ) = ISTOP
            ICURROW( KI ) = ISTARTROW
            ICURCOL( KI ) = ISTARTCOL
            KROW( KI ) = II
            KCOL( KI ) = JJ
            IF( KI.GT.1 )
     $         KP2ROW( KI ) = KP2ROW( 1 )
            IF( KI.GT.1 )
     $         KP2COL( KI ) = KP2COL( 1 )
   30    CONTINUE
*
*        Get first transform on node who owns M+2,M+2
*
         ITMP1 = ISTARTROW
         ITMP2 = ISTARTCOL
         CALL PZLAWIL( ITMP1, ITMP2, M, A, DESCA, H44, H33, H43H34,
     $                 VCOPY )
         V1SAVE = VCOPY( 1 )
         V2SAVE = VCOPY( 2 )
         V3SAVE = VCOPY( 3 )
*
*        The main implicit shift Francis loops over the bulges starts 
*           here!
*
         IF( K2( IBULGE ).LE.I-1 ) THEN
   40       CONTINUE
            IF( ( K1( IBULGE ).GE.M+5 ) .AND. ( IBULGE.LT.NBULGE ) )
     $           THEN
               IF( ( MOD( K2( IBULGE )+2, HBL ).EQ.MOD( K2( IBULGE+1 )+
     $             2, HBL ) ) .AND. ( K1( 1 ).LE.I-1 ) ) THEN
                  H44 = S1( 2*JBLK-2*IBULGE, 2*JBLK-2*IBULGE )
                  H33 = S1( 2*JBLK-2*IBULGE-1, 2*JBLK-2*IBULGE-1 )
                  H43H34 = S1( 2*JBLK-2*IBULGE-1, 2*JBLK-2*IBULGE )*
     $                     S1( 2*JBLK-2*IBULGE, 2*JBLK-2*IBULGE-1 )
                  ITMP1 = ISTARTROW
                  ITMP2 = ISTARTCOL
                  CALL PZLAWIL( ITMP1, ITMP2, M, A, DESCA, H44, H33,
     $                          H43H34, VCOPY )
                  V1SAVE = VCOPY( 1 )
                  V2SAVE = VCOPY( 2 )
                  V3SAVE = VCOPY( 3 )
                  IBULGE = IBULGE + 1
               END IF
            END IF
*
*        When we hit a border, there are row and column transforms that
*          overlap over several processors and the code gets very
*          "congested."  As a remedy, when we first hit a border, a 6x6
*          *local* matrix is generated on one node (called SMALLA) and
*          work is done on that.  At the end of the border, the data is
*          passed back and everything stays a lot simpler.
*
            DO 120 KI = 1, IBULGE
*
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               K = ISTART
               MODKM1 = MOD( K-1, HBL )
               IF( MODKM1.GE.HBL-2 ) THEN
                  IF( ( MODKM1.EQ.HBL-2 ) .AND. ( K.LT.I-1 ) ) THEN
*
*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4)
*
                     ITMP1 = ICURROW( KI )
                     ITMP2 = ICURCOL( KI )
                     CALL PZLACP3( MIN( 6, N-K+2 ), K-1, A, DESCA,
     $                             SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
     $                             0 )
                  END IF
                  IF( ( MODKM1.EQ.HBL-1 ) .AND. ( K.EQ.M ) ) THEN
*
*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3)
*
                     CALL INFOG2L( K+1, K+1, DESCA, NPROW, NPCOL, MYROW,
     $                             MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
                     CALL PZLACP3( MIN( 6, N-K+3 ), K-2, A, DESCA,
     $                             SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
     $                             0 )
                  END IF
               END IF
*
*
*           ZLAHQR used to have a single row application and a single
*              column application to H.  Here we do something a little
*              more clever.  We break each transformation down into 3
*              parts:
*                  1.) The minimum amount of work it takes to determine
*                        a group of ROTN transformations (this is on
*                        the critical path.) (Loops 50-120)
*                  (the data is broadcast now: loops 180-240)
*                  2.) The small work it takes so that each of the rows
*                        and columns is at the same place.  For example,
*                        all ROTN row transforms are all complete
*                        through some column TMP.  (Loops 250-260)
*                  3.) The majority of the row and column transforms
*                        are then applied in a block fashion.
*                        (row transforms are in loops 280-380)
*                        (col transforms are in loops 400-540)
*
*           Each of these three parts are further subdivided into 3
*           parts:
*               A.) Work at the start of a border when
*                       MOD(ISTART-1,HBL) = HBL-2
*               B.) Work at the end of a border when
*                       MOD(ISTART-1,HBL) = HBL-1
*               C.) Work in the middle of the block when
*                       MOD(ISTART-1,HBL) < HBL-2
*
*           Further optimization is met with the boolean SKIP.  A border 
*              communication can be broken into several parts for efficient
*              parallelism:
*                 Loop over all the bulges, just sending the data out
*                 Loop over all the bulges, just doing the work
*                 Loop over all the bulges, just sending the data back.
*              
*
               IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
     $             ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
     $             ( MODKM1.EQ.HBL-2 ) .AND.
     $             ( ISTART.LT.MIN( I-1, ISTOP+1 ) ) ) THEN
                  K = ISTART
                  NR = MIN( 3, I-K+1 )
                  IF( K.GT.M ) THEN
                     CALL ZCOPY( NR, SMALLA( 2, 1, KI ), 1, VCOPY, 1 )
                  ELSE
                     VCOPY( 1 ) = V1SAVE
                     VCOPY( 2 ) = V2SAVE
                     VCOPY( 3 ) = V3SAVE
                  END IF
                  CALL ZLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1, T1COPY )
                  IF( K.GT.M ) THEN
                     SMALLA( 2, 1, KI ) = VCOPY( 1 )
                     SMALLA( 3, 1, KI ) = ZERO
                     IF( K.LT.I-1 )
     $                  SMALLA( 4, 1, KI ) = ZERO
                  ELSE IF( M.GT.L ) THEN
*
*                 Following differs in comparison to pdlahqr. 
*
                      SMALLA( 2, 1, KI ) = SMALLA( 2, 1, KI ) - 
     $                    DCONJG( T1COPY )*SMALLA( 2, 1, KI )
                  END IF
                  V2 = VCOPY( 2 )
                  T2 = T1COPY*V2
                  WORK( VECSIDX+( K-1 )*3+1 ) = VCOPY( 2 )
                  WORK( VECSIDX+( K-1 )*3+2 ) = VCOPY( 3 )
                  WORK( VECSIDX+( K-1 )*3+3 ) = T1COPY
                  IF( NR.EQ.3 ) THEN
*
*                    Do some work so next step is ready...
*
                     T1 = T1COPY
                     V3 = VCOPY( 3 )
                     T3 = T1*V3
                     ITMP1 = MIN( 6, I2+2-K )
                     ITMP2 = MAX( I1-K+2, 1 )
                     DO 50 J = 2, ITMP1
                        SUM = DCONJG( T1 )*SMALLA( 2, J, KI ) +
     $                        DCONJG( T2 )*SMALLA( 3, J, KI ) +
     $                        DCONJG( T3 )*SMALLA( 4, J, KI )
                        SMALLA( 2, J, KI ) = SMALLA( 2, J, KI ) - SUM
                        SMALLA( 3, J, KI ) = SMALLA( 3, J, KI ) - SUM*V2
                        SMALLA( 4, J, KI ) = SMALLA( 4, J, KI ) - SUM*V3
   50                CONTINUE
                     DO 60 J = ITMP2, 5
                        SUM = T1*SMALLA( J, 2, KI ) +
     $                        T2*SMALLA( J, 3, KI ) +
     $                        T3*SMALLA( J, 4, KI )
                        SMALLA( J, 2, KI ) = SMALLA( J, 2, KI ) - SUM
                        SMALLA( J, 3, KI ) = SMALLA( J, 3, KI ) - SUM*
     $                                       DCONJG( V2 )
                        SMALLA( J, 4, KI ) = SMALLA( J, 4, KI ) - SUM*
     $                                       DCONJG( V3 )
   60                CONTINUE
                  END IF
               END IF
*
               IF( ( MOD( ISTOP-1, HBL ).EQ.HBL-1 ) .AND.
     $             ( MYROW.EQ.ICURROW( KI ) ) .AND.
     $             ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
     $             ( ISTART.LE.MIN( I, ISTOP ) ) ) THEN
                  K = ISTOP
                  NR = MIN( 3, I-K+1 )
                  IF( K.GT.M ) THEN
                     CALL ZCOPY( NR, SMALLA( 3, 2, KI ), 1, VCOPY, 1 )
                  ELSE
                     VCOPY( 1 ) = V1SAVE
                     VCOPY( 2 ) = V2SAVE
                     VCOPY( 3 ) = V3SAVE
                  END IF
                  CALL ZLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1, T1COPY )
                  IF( K.GT.M ) THEN
                     SMALLA( 3, 2, KI ) = VCOPY( 1 )
                     SMALLA( 4, 2, KI ) = ZERO
                     IF( K.LT.I-1 )
     $                  SMALLA( 5, 2, KI ) = ZERO
*
*                    Set a subdiagonal to zero now if it's possible
*
                     IF( ( K-2.GT.M ) .AND. ( MOD( K-1, HBL ).GT.1 ) )
     $                    THEN
                        H11 = SMALLA( 1, 1, KI )
                        H10 = SMALLA( 2, 1, KI )
                        H22 = SMALLA( 2, 2, KI )
                        S = CABS1( H11 ) + CABS1( H22 )
                        IF( CABS1( H10 ).LE.MAX( ULP*S, SMLNUM ) ) THEN
                           SMALLA( 2, 1, KI ) = ZERO
                        END IF
                     END IF
                  ELSE IF( M.GT.L ) THEN
*
*                 Following differs in comparison to pdlahqr. 
*
                      SMALLA( 3, 2, KI ) = SMALLA( 3, 2, KI ) - 
     $                   DCONJG( T1COPY )*SMALLA( 3, 2, KI )
                  END IF
                  V2 = VCOPY( 2 )
                  T2 = T1COPY*V2
                  WORK( VECSIDX+( K-1 )*3+1 ) = VCOPY( 2 )
                  WORK( VECSIDX+( K-1 )*3+2 ) = VCOPY( 3 )
                  WORK( VECSIDX+( K-1 )*3+3 ) = T1COPY
                  IF( NR.EQ.3 ) THEN
*
*                    Do some work so next step is ready...
*
                     T1 = T1COPY
                     V3 = VCOPY( 3 )
                     T3 = T1*V3
                     ITMP1 = MIN( 6, I2-K+3 )
                     ITMP2 = MAX( I1-K+3, 1 )
                     DO 70 J = 3, ITMP1
                        SUM = DCONJG( T1 )*SMALLA( 3, J, KI ) +
     $                        DCONJG( T2 )*SMALLA( 4, J, KI ) +
     $                        DCONJG( T3 )*SMALLA( 5, J, KI )
                        SMALLA( 3, J, KI ) = SMALLA( 3, J, KI ) - SUM
                        SMALLA( 4, J, KI ) = SMALLA( 4, J, KI ) - SUM*V2
                        SMALLA( 5, J, KI ) = SMALLA( 5, J, KI ) - SUM*V3
   70                CONTINUE
                     DO 80 J = ITMP2, 6
                        SUM = T1*SMALLA( J, 3, KI ) +
     $                        T2*SMALLA( J, 4, KI ) +
     $                        T3*SMALLA( J, 5, KI )
                        SMALLA( J, 3, KI ) = SMALLA( J, 3, KI ) - SUM
                        SMALLA( J, 4, KI ) = SMALLA( J, 4, KI ) - SUM*
     $                                       DCONJG( V2 )
                        SMALLA( J, 5, KI ) = SMALLA( J, 5, KI ) - SUM*
     $                                       DCONJG( V3 )
   80                CONTINUE
                  END IF
               END IF
*
               IF( ( MODKM1.EQ.0 ) .AND. ( ISTART.LE.I-1 ) .AND.
     $             ( MYROW.EQ.ICURROW( KI ) ) .AND.
     $             ( RIGHT.EQ.ICURCOL( KI ) ) ) THEN
*
*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
                  IROW1 = KROW( KI )
                  ICOL1 = KCOL( KI )
*
*                 The ELSE part of this IF needs updated VCOPY, this
*                 was not necessary in PDLAHQR.
*
                  IF( ISTART.GT.M ) THEN
                     VCOPY( 1 ) = SMALLA( 4, 3, KI )
                     VCOPY( 2 ) = SMALLA( 5, 3, KI )
                     VCOPY( 3 ) = SMALLA( 6, 3, KI )
                     NR = MIN( 3, I-ISTART+1 )
                     CALL ZLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1,
     $                            T1COPY )
                     A( ( ICOL1-2 )*LDA+IROW1 ) = VCOPY( 1 )
                     A( ( ICOL1-2 )*LDA+IROW1+1 ) = ZERO
                     IF( ISTART.LT.I-1 ) THEN
                        A( ( ICOL1-2 )*LDA+IROW1+2 ) = ZERO
                     END IF
                  ELSE
*
*                    If NPCOL.NE.1 THEN we need updated VCOPY.
*
                     NR = MIN( 3, I-ISTART+1 )
                     IF( NPCOL.EQ.1 ) THEN
                        VCOPY( 1 ) = V1SAVE
                        VCOPY( 2 ) = V2SAVE
                        VCOPY( 3 ) = V3SAVE
                     ELSE
*
*                    Get updated VCOPY from RIGHT
*
                        CALL ZGERV2D( CONTXT, 3, 1, VCOPY, 
     $                                1, MYROW, RIGHT )
                     ENDIF
                     CALL ZLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1,T1COPY)
                     IF( M.GT.L ) THEN
*
*                    Following differs in comparison to pdlahqr. 
*
                         A( ( ICOL1-2 )*LDA+IROW1 ) = A( ( ICOL1-2 )*
     $                      LDA+IROW1 )*DCONJG( ONE-T1COPY )
                     END IF
                  END IF
               END IF
*
               IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
     $             ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
     $             ( ( ( MODKM1.EQ.HBL-2 ) .AND. ( ISTART.EQ.I-
     $             1 ) ) .OR. ( ( MODKM1.LT.HBL-2 ) .AND. ( ISTART.LE.I-
     $             1 ) ) ) ) THEN
*
*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
                  IROW1 = KROW( KI )
                  ICOL1 = KCOL( KI )
                  DO 110 K = ISTART, ISTOP
*
*                    Create and do these transforms
*
                     NR = MIN( 3, I-K+1 )
                     IF( K.GT.M ) THEN
                        IF( MOD( K-1, HBL ).EQ.0 ) THEN
                           VCOPY( 1 ) = SMALLA( 4, 3, KI )
                           VCOPY( 2 ) = SMALLA( 5, 3, KI )
                           VCOPY( 3 ) = SMALLA( 6, 3, KI )
                        ELSE
                           VCOPY( 1 ) = A( ( ICOL1-2 )*LDA+IROW1 )
                           VCOPY( 2 ) = A( ( ICOL1-2 )*LDA+IROW1+1 )
                           IF( NR.EQ.3 ) THEN
                              VCOPY( 3 ) = A( ( ICOL1-2 )*LDA+IROW1+2 )
                           END IF
                        END IF
                     ELSE
                        VCOPY( 1 ) = V1SAVE
                        VCOPY( 2 ) = V2SAVE
                        VCOPY( 3 ) = V3SAVE
                     END IF
*
*                    Must send uptodate copy of VCOPY to left.
*
                     IF( NPCOL.GT.1 .AND. ISTART.LE.M .AND. 
     $                     MOD( K-1, HBL ).EQ.0 ) THEN
                        CALL ZGESD2D( CONTXT, 3, 1, VCOPY, 
     $                                1, MYROW, LEFT )
                     ENDIF
                     CALL ZLARFG( NR, VCOPY( 1 ), VCOPY( 2 ), 1,
     $                            T1COPY )
                     IF( K.GT.M ) THEN
                        IF( MOD( K-1, HBL ).GT.0 ) THEN
                           A( ( ICOL1-2 )*LDA+IROW1 ) = VCOPY( 1 )
                           A( ( ICOL1-2 )*LDA+IROW1+1 ) = ZERO
                           IF( K.LT.I-1 ) THEN
                              A( ( ICOL1-2 )*LDA+IROW1+2 ) = ZERO
                           END IF
*
*                       Set a subdiagonal to zero now if it's possible
*
                           IF( ( IROW1.GT.2 ) .AND. ( ICOL1.GT.2 ) .AND.
     $                         ( K-2.GT.M ) .AND. ( MOD( K-1,
     $                         HBL ).GT.1 ) ) THEN
                              H11 = A( ( ICOL1-3 )*LDA+IROW1-2 )
                              H10 = A( ( ICOL1-3 )*LDA+IROW1-1 )
                              H22 = A( ( ICOL1-2 )*LDA+IROW1-1 )
                              S = CABS1( H11 ) + CABS1( H22 )
                              IF( CABS1(H10).LE.MAX( ULP*S, SMLNUM ) )
     $                             THEN
                                 A( ( ICOL1-3 )*LDA+IROW1-1 ) = ZERO
                              END IF
                           END IF
                        END IF
                     ELSE IF( M.GT.L ) THEN
                        IF( MOD( K-1, HBL ).GT.0 ) THEN
*
*                       Following differs in comparison to pdlahqr. 
*
                            A( ( ICOL1-2 )*LDA+IROW1 ) = A( ( ICOL1-2 )*
     $                         LDA+IROW1 )*DCONJG( ONE-T1COPY )
                        END IF
                     END IF
                     V2 = VCOPY( 2 )
                     T2 = T1COPY*V2
                     WORK( VECSIDX+( K-1 )*3+1 ) = VCOPY( 2 )
                     WORK( VECSIDX+( K-1 )*3+2 ) = VCOPY( 3 )
                     WORK( VECSIDX+( K-1 )*3+3 ) = T1COPY
                     T1 = T1COPY
                     IF( K.LT.ISTOP ) THEN
*
*                       Do some work so next step is ready...
*
                        V3 = VCOPY( 3 )
                        T3 = T1*V3
                        DO 90 J = ( ICOL1-1 )*LDA + IROW1,
     $                          ( MIN( K2( KI )+1, I-1 )+ICOL1-K-1 )*
     $                          LDA + IROW1, LDA
                           SUM = DCONJG( T1 )*A( J ) + DCONJG( T2 )*
     $                           A( J+1 ) + DCONJG( T3 )*A( J+2 )
                           A( J ) = A( J ) - SUM
                           A( J+1 ) = A( J+1 ) - SUM*V2
                           A( J+2 ) = A( J+2 ) - SUM*V3
   90                   CONTINUE
                        DO 100 J = IROW1 + 1, IROW1 + 3
                           SUM = T1*A( ( ICOL1-1 )*LDA+J ) + 
     $                           T2*A( ICOL1*LDA+J ) + 
     $                           T3*A( ( ICOL1+1 )*LDA+J )
                           A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*
     $                                          LDA+J ) - SUM
                           A( ICOL1*LDA+J ) = A( ICOL1*LDA+J ) - 
     $                                          SUM*DCONJG( V2 )
                           A( ( ICOL1+1 )*LDA+J ) = A( ( ICOL1+1 )*LDA
     $                                          +J ) - SUM*DCONJG( V3 )
  100                   CONTINUE
                     END IF
                     IROW1 = IROW1 + 1
                     ICOL1 = ICOL1 + 1
  110             CONTINUE
               END IF
  120       CONTINUE
*
*           First part of applying the transforms is complete.
*           Broadcasts of the Householder data is done here.
*
            DO 180 KI = 1, IBULGE
*
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
*
*              Broadcast Householder information from the block
*
               IF( ( MYROW.EQ.ICURROW( KI ) ) .AND. ( NPCOL.GT.1 ) .AND.
     $             ( ISTART.LE.ISTOP ) ) THEN
                  IF( MYCOL.NE.ICURCOL( KI ) ) THEN
                     CALL ZGEBR2D( CONTXT, 'ROW', ' ',
     $                             3*( ISTOP-ISTART+1 ), 1,
     $                             WORK( VECSIDX+( ISTART-1 )*3+1 ),
     $                             3*( ISTOP-ISTART+1 ), MYROW,
     $                             ICURCOL( KI ) )
                  ELSE
                     CALL ZGEBS2D( CONTXT, 'ROW', ' ',
     $                             3*( ISTOP-ISTART+1 ), 1,
     $                             WORK( VECSIDX+( ISTART-1 )*3+1 ),
     $                             3*( ISTOP-ISTART+1 ) )
                  END IF
               END IF
  180       CONTINUE
*
*           Now do column transforms and finish work
*
            DO 240 KI = 1, IBULGE
*
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
*
               IF( ( MYCOL.EQ.ICURCOL( KI ) ) .AND. ( NPROW.GT.1 ) .AND.
     $             ( ISTART.LE.ISTOP ) ) THEN
                  IF( MYROW.NE.ICURROW( KI ) ) THEN
                     CALL ZGEBR2D( CONTXT, 'COL', ' ',
     $                             3*( ISTOP-ISTART+1 ), 1,
     $                             WORK( VECSIDX+( ISTART-1 )*3+1 ),
     $                             3*( ISTOP-ISTART+1 ), ICURROW( KI ),
     $                             MYCOL )
                  ELSE
                     CALL ZGEBS2D( CONTXT, 'COL', ' ',
     $                             3*( ISTOP-ISTART+1 ), 1,
     $                             WORK( VECSIDX+( ISTART-1 )*3+1 ),
     $                             3*( ISTOP-ISTART+1 ) )
                  END IF
               END IF
  240       CONTINUE
*
*
*           Now do make up work to have things in block fashion
*
            DO 260 KI = 1, IBULGE
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
*
               MODKM1 = MOD( ISTART-1, HBL )
               IF( ( MYROW.EQ.ICURROW( KI ) ) .AND.
     $             ( MYCOL.EQ.ICURCOL( KI ) ) .AND.
     $             ( ( ( MODKM1.EQ.HBL-2 ) .AND. ( ISTART.EQ.I-
     $             1 ) ) .OR. ( ( MODKM1.LT.HBL-2 ) .AND. ( ISTART.LE.I-
     $             1 ) ) ) ) THEN
*
*                 (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
                  IROW1 = KROW( KI )
                  ICOL1 = KCOL( KI )
                  DO 250 K = ISTART, ISTOP
*
*              Catch up on column & border work
*
                     NR = MIN( 3, I-K+1 )
                     V2 = WORK( VECSIDX+( K-1 )*3+1 )
                     V3 = WORK( VECSIDX+( K-1 )*3+2 )
                     T1 = WORK( VECSIDX+( K-1 )*3+3 )
                     T2 = T1*V2
                     IF( K.LT.ISTOP ) THEN
*
*                 Do some work so next step is ready...
*
                        T3 = T1*V3
                        CALL ZLAREF( 'Col', A, LDA, .FALSE., Z, LDZ,
     $                               .FALSE., ICOL1, ICOL1, ISTART,
     $                               ISTOP, MIN( ISTART+1, I )-K+IROW1,
     $                               IROW1, LILOZ, LIHIZ,
     $                               WORK( VECSIDX+1 ), V2, V3, T1, T2,
     $                               T3 )
                        IROW1 = IROW1 + 1
                        ICOL1 = ICOL1 + 1
                     ELSE
                        IF( ( NR.EQ.3 ) .AND. ( MOD( K-1,
     $                      HBL ).LT.HBL-2 ) ) THEN
                           T3 = T1*V3
                           CALL ZLAREF( 'Row', A, LDA, .FALSE., Z, LDZ,
     $                                  .FALSE., IROW1, IROW1, ISTART,
     $                                  ISTOP, ICOL1, MIN( MIN( K2( KI )
     $                                  +1, I-1 ), I2 )-K+ICOL1, LILOZ,
     $                                  LIHIZ, WORK( VECSIDX+1 ), V2,
     $                                  V3, T1, T2, T3 )
                        END IF
                     END IF
  250             CONTINUE
               END IF
*
*           Send SMALLA back again.
*
               K = ISTART
               MODKM1 = MOD( K-1, HBL )
               IF( ( MODKM1.GE.HBL-2 ) .AND. ( K.LE.I-1 ) ) THEN
                  IF( ( MODKM1.EQ.HBL-2 ) .AND. ( K.LT.I-1 ) ) THEN
*
*                    Copy 6 elements from global A(K-1:K+4,K-1:K+4)
*
                     ITMP1 = ICURROW(KI)
                     ITMP2 = ICURCOL(KI)
                     CALL PZLACP3( MIN( 6, N-K+2 ), K-1, A, DESCA,
     $                             SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
     $                             1 )
*
                  END IF
                  IF( MODKM1.EQ.HBL-1 ) THEN
*
*                    Copy 6 elements from global A(K-2:K+3,K-2:K+3)
*
                     ITMP1 = ICURROW( KI )
                     ITMP2 = ICURCOL( KI )
                     CALL PZLACP3( MIN( 6, N-K+3 ), K-2, A, DESCA,
     $                             SMALLA( 1, 1, KI ), 6, ITMP1, ITMP2,
     $                             1 )
                  END IF
               END IF
*
  260       CONTINUE
*
  270       CONTINUE
*
*           Now start major set of block ROW reflections
*
            DO 280 KI = 1, IBULGE
               IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
     $             ( DOWN.NE.ICURROW( KI ) ) )GO TO 280
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
*
               IF( ( ISTOP.GT.ISTART ) .AND.
     $             ( MOD( ISTART-1, HBL ).LT.HBL-2 ) .AND.
     $             ( ICURROW( KI ).EQ.MYROW ) ) THEN
                  IROW1 = MIN( K2( KI )+1, I-1 ) + 1
                  CALL INFOG1L( IROW1, HBL, NPCOL, MYCOL, JAFIRST, 
     $                          ITMP1, ITMP2 )
                  ITMP2 = LOCALI2
                  II = KROW( KI )
                  CALL ZLAREF( 'Row', A, LDA, WANTZ, Z, LDZ, .TRUE., II,
     $                         II, ISTART, ISTOP, ITMP1, ITMP2, LILOZ,
     $                         LIHIZ, WORK( VECSIDX+1 ), V2, V3, T1, T2,
     $                         T3 )
               END IF
  280       CONTINUE
*
            DO 320 KI = 1, IBULGE
               IF( KROW( KI ).GT.KP2ROW( KI ) )
     $            GO TO 320
               IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
     $             ( DOWN.NE.ICURROW( KI ) ) )GO TO 320
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               IF( ( ISTART.EQ.ISTOP ) .OR.
     $             ( MOD( ISTART-1, HBL ).GE.HBL-2 ) .OR.
     $             ( ICURROW( KI ).NE.MYROW ) ) THEN
                  DO 310 K = ISTART, ISTOP
                     V2 = WORK( VECSIDX+( K-1 )*3+1 )
                     V3 = WORK( VECSIDX+( K-1 )*3+2 )
                     T1 = WORK( VECSIDX+( K-1 )*3+3 )
                     NR = MIN( 3, I-K+1 )
                     T2 = T1*V2
                     IF( ( NR.EQ.3 ) .AND. ( KROW( KI ).LE.
     $                   KP2ROW( KI ) ) ) THEN
                        T3 = T1*V3
                        IF( ( K.LT.ISTOP ) .AND.
     $                      ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
                           ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
                        ELSE
                           IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                              ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
                           END IF
                           IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                              ITMP1 = MIN( K+4, I2 ) + 1
                           END IF
                           IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                              ITMP1 = MIN( K+3, I2 ) + 1
                           END IF
                        END IF
*
*                    Find local coor of rows K through K+2
*
                        IROW1 = KROW( KI )
                        IROW2 = KP2ROW( KI )
                        IF( ( K.GT.ISTART ) .AND.
     $                      ( MOD( K-1, HBL ).GE.HBL-2 ) ) THEN
                           IF( DOWN.EQ.ICURROW( KI ) ) THEN
                              IROW1 = IROW1 + 1
                           END IF
                           IF( MYROW.EQ.ICURROW( KI ) ) THEN
                              IROW2 = IROW2 + 1
                           END IF
                        END IF
                        CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, 
     $                                JAFIRST, ICOL1, ICOL2 )
                        ICOL2 = LOCALI2
                        IF( ( MOD( K-1, HBL ).LT.HBL-2 ) .OR.
     $                      ( NPROW.EQ.1 ) ) THEN
                           CALL ZLAREF( 'Row', A, LDA, WANTZ, Z, LDZ,
     $                                  .FALSE., IROW1, IROW1, ISTART,
     $                                  ISTOP, ICOL1, ICOL2, LILOZ,
     $                                  LIHIZ, WORK( VECSIDX+1 ), V2,
     $                                  V3, T1, T2, T3 )
                        END IF
                        IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
     $                      ( NPROW.GT.1 ) ) THEN
                           IF( IROW1.NE.IROW2 ) THEN
                              CALL ZGESD2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                      A( ( ICOL1-1 )*LDA+IROW1 ),
     $                                      LDA, DOWN, MYCOL )
                              IF( SKIP .AND. ( ISTART.EQ.ISTOP ) ) THEN
                                 CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         A( ( ICOL1-1 )*LDA+
     $                                         IROW1 ), LDA, DOWN,
     $                                         MYCOL )
                              END IF
                           ELSE IF( SKIP ) THEN
                              CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                      WORK( IRBUF+1 ), 2, UP,
     $                                      MYCOL )
                              DO 290 J = ICOL1, ICOL2
                                 SUM = DCONJG( T1 )*WORK( IRBUF+2*( 
     $                                 J-ICOL1 )+1 ) + DCONJG( T2 )*
     $                                 WORK( IRBUF+2*( J-ICOL1 )+2 )
     $                                 + DCONJG( T3 )*A( ( J-1 )*LDA+
     $                                 IROW1 )
                                 WORK( IRBUF+2*( J-ICOL1 )+1 )
     $                              = WORK( IRBUF+2*( J-ICOL1 )+1 ) -
     $                              SUM
                                 WORK( IRBUF+2*( J-ICOL1 )+2 )
     $                              = WORK( IRBUF+2*( J-ICOL1 )+2 ) -
     $                              SUM*V2
                                 A( ( J-1 )*LDA+IROW1 ) = A( ( J-1 )*
     $                              LDA+IROW1 ) - SUM*V3
  290                         CONTINUE
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGESD2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         WORK( IRBUF+1 ), 2, UP,
     $                                         MYCOL )
                              END IF
                           END IF
                        END IF
                        IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                      ( NPROW.GT.1 ) ) THEN
                           IF( IROW1.EQ.IROW2 ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGESD2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         A( ( ICOL1-1 )*LDA+IROW1-
     $                                         1 ), LDA, DOWN, MYCOL )
                              END IF
                              IF( SKIP ) THEN
                                 CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         A( ( ICOL1-1 )*LDA+IROW1-
     $                                         1 ), LDA, DOWN, MYCOL )
                              END IF
                           ELSE IF( SKIP ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         WORK( IRBUF+1 ), 2, UP,
     $                                         MYCOL )
                              END IF
                              DO 300 J = ICOL1, ICOL2
                                 SUM = DCONJG( T1 )*WORK( IRBUF+2*( 
     $                                 J-ICOL1 )+2 ) + DCONJG( T2 )*
     $                                 A( ( J-1 )*LDA+IROW1 ) +
     $                                 DCONJG( T3 )*A( ( J-1 )*LDA+
     $                                 IROW1+1 )
                                 WORK( IRBUF+2*( J-ICOL1 )+2 )
     $                              = WORK( IRBUF+2*( J-ICOL1 )+2 ) -
     $                              SUM
                                 A( ( J-1 )*LDA+IROW1 ) = A( ( J-1 )*
     $                              LDA+IROW1 ) - SUM*V2
                                 A( ( J-1 )*LDA+IROW1+1 ) = A( ( J-1 )*
     $                              LDA+IROW1+1 ) - SUM*V3
  300                         CONTINUE
                              CALL ZGESD2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                      WORK( IRBUF+1 ), 2, UP,
     $                                      MYCOL )
*
                           END IF
                        END IF
                     END IF
  310             CONTINUE
               END IF
  320       CONTINUE
*
            IF( SKIP )
     $         GO TO 390
*
            DO 360 KI = 1, IBULGE
               IF( KROW( KI ).GT.KP2ROW( KI ) )
     $            GO TO 360
               IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
     $             ( DOWN.NE.ICURROW( KI ) ) )GO TO 360
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               IF( ( ISTART.EQ.ISTOP ) .OR.
     $             ( MOD( ISTART-1, HBL ).GE.HBL-2 ) .OR.
     $             ( ICURROW( KI ).NE.MYROW ) ) THEN
                  DO 350 K = ISTART, ISTOP
                     V2 = WORK( VECSIDX+( K-1 )*3+1 )
                     V3 = WORK( VECSIDX+( K-1 )*3+2 )
                     T1 = WORK( VECSIDX+( K-1 )*3+3 )
                     NR = MIN( 3, I-K+1 )
                     T2 = T1*V2
                     IF( ( NR.EQ.3 ) .AND. ( KROW( KI ).LE.
     $                   KP2ROW( KI ) ) ) THEN
                        T3 = T1*V3
                        IF( ( K.LT.ISTOP ) .AND.
     $                      ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
                           ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
                        ELSE
                           IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                              ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
                           END IF
                           IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                              ITMP1 = MIN( K+4, I2 ) + 1
                           END IF
                           IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                              ITMP1 = MIN( K+3, I2 ) + 1
                           END IF
                        END IF
*
*                    Find local coor of rows K through K+2
*
                        IROW1 = KROW( KI )
                        IROW2 = KP2ROW( KI )
                        IF( ( K.GT.ISTART ) .AND.
     $                      ( MOD( K-1, HBL ).GE.HBL-2 ) ) THEN
                           IF( DOWN.EQ.ICURROW( KI ) ) THEN
                              IROW1 = IROW1 + 1
                           END IF
                           IF( MYROW.EQ.ICURROW( KI ) ) THEN
                              IROW2 = IROW2 + 1
                           END IF
                        END IF
                        CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, 
     $                                JAFIRST, ICOL1, ICOL2 )
                        ICOL2 = LOCALI2
                        IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
     $                      ( NPROW.GT.1 ) ) THEN
                           IF( IROW1.EQ.IROW2 ) THEN
                              CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                      WORK( IRBUF+1 ), 2, UP,
     $                                      MYCOL )
                              DO 330 J = ICOL1, ICOL2
                                 SUM = DCONJG( T1 )*WORK( IRBUF+2*( 
     $                                 J-ICOL1 )+1 ) + DCONJG( T2 )*
     $                                 WORK( IRBUF+2*( J-ICOL1 )+2 )
     $                                 + DCONJG( T3 )*A( ( J-1 )*LDA+
     $                                 IROW1 )
                                 WORK( IRBUF+2*( J-ICOL1 )+1 )
     $                              = WORK( IRBUF+2*( J-ICOL1 )+1 ) -
     $                              SUM
                                 WORK( IRBUF+2*( J-ICOL1 )+2 )
     $                              = WORK( IRBUF+2*( J-ICOL1 )+2 ) -
     $                              SUM*V2
                                 A( ( J-1 )*LDA+IROW1 ) = A( ( J-1 )*
     $                              LDA+IROW1 ) - SUM*V3
  330                         CONTINUE
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGESD2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         WORK( IRBUF+1 ), 2, UP,
     $                                         MYCOL )
                              END IF
                           END IF
                        END IF
                        IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                      ( NPROW.GT.1 ) ) THEN
                           IF( IROW1.NE.IROW2 ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         WORK( IRBUF+1 ), 2, UP,
     $                                         MYCOL )
                              END IF
                              DO 340 J = ICOL1, ICOL2
                                 SUM = DCONJG( T1 )*WORK( IRBUF+2*( 
     $                                 J-ICOL1 )+2 ) + DCONJG( T2 )*
     $                                 A( ( J-1 )*LDA+IROW1 ) +
     $                                 DCONJG( T3 )*A( ( J-1 )*LDA+
     $                                 IROW1+1 )
                                 WORK( IRBUF+2*( J-ICOL1 )+2 )
     $                              = WORK( IRBUF+2*( J-ICOL1 )+2 ) -
     $                              SUM
                                 A( ( J-1 )*LDA+IROW1 ) = A( ( J-1 )*
     $                              LDA+IROW1 ) - SUM*V2
                                 A( ( J-1 )*LDA+IROW1+1 ) = A( ( J-1 )*
     $                              LDA+IROW1+1 ) - SUM*V3
  340                         CONTINUE
                              CALL ZGESD2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                      WORK( IRBUF+1 ), 2, UP,
     $                                      MYCOL )
                           END IF
                        END IF
                     END IF
  350             CONTINUE
               END IF
  360       CONTINUE
*
            DO 380 KI = 1, IBULGE
               IF( KROW( KI ).GT.KP2ROW( KI ) )
     $            GO TO 380
               IF( ( MYROW.NE.ICURROW( KI ) ) .AND.
     $             ( DOWN.NE.ICURROW( KI ) ) )GO TO 380
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               IF( ( ISTART.EQ.ISTOP ) .OR.
     $             ( MOD( ISTART-1, HBL ).GE.HBL-2 ) .OR.
     $             ( ICURROW( KI ).NE.MYROW ) ) THEN
                  DO 370 K = ISTART, ISTOP
                     V2 = WORK( VECSIDX+( K-1 )*3+1 )
                     V3 = WORK( VECSIDX+( K-1 )*3+2 )
                     T1 = WORK( VECSIDX+( K-1 )*3+3 )
                     NR = MIN( 3, I-K+1 )
                     T2 = T1*V2
                     IF( ( NR.EQ.3 ) .AND. ( KROW( KI ).LE.
     $                   KP2ROW( KI ) ) ) THEN
                        T3 = T1*V3
                        IF( ( K.LT.ISTOP ) .AND.
     $                      ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
                           ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
                        ELSE
                           IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                              ITMP1 = MIN( K2( KI )+1, I-1 ) + 1
                           END IF
                           IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                              ITMP1 = MIN( K+4, I2 ) + 1
                           END IF
                           IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                              ITMP1 = MIN( K+3, I2 ) + 1
                           END IF
                        END IF
*
*                    Find local coor of rows K through K+2
*
                        IROW1 = KROW( KI )
                        IROW2 = KP2ROW( KI )
                        IF( ( K.GT.ISTART ) .AND.
     $                      ( MOD( K-1, HBL ).GE.HBL-2 ) ) THEN
                           IF( DOWN.EQ.ICURROW( KI ) ) THEN
                              IROW1 = IROW1 + 1
                           END IF
                           IF( MYROW.EQ.ICURROW( KI ) ) THEN
                              IROW2 = IROW2 + 1
                           END IF
                        END IF
                        CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL,
     $                                JAFIRST, ICOL1, ICOL2 )
                        ICOL2 = LOCALI2
                        IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
     $                      ( NPROW.GT.1 ) ) THEN
                           IF( IROW1.NE.IROW2 ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                         A( ( ICOL1-1 )*LDA+
     $                                         IROW1 ), LDA, DOWN,
     $                                         MYCOL )
                              END IF
                           END IF
                        END IF
                        IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                      ( NPROW.GT.1 ) ) THEN
                           IF( IROW1.EQ.IROW2 ) THEN
                              CALL ZGERV2D( CONTXT, 2, ICOL2-ICOL1+1,
     $                                      A( ( ICOL1-1 )*LDA+IROW1-
     $                                      1 ), LDA, DOWN, MYCOL )
                           END IF
                        END IF
                     END IF
  370             CONTINUE
               END IF
  380       CONTINUE
*
  390       CONTINUE
*
*           Now start major set of block COL reflections
*
            DO 400 KI = 1, IBULGE
               IF( ( MYCOL.NE.ICURCOL( KI ) ) .AND.
     $             ( RIGHT.NE.ICURCOL( KI ) ) )GO TO 400
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
*
               IF( ( ( MOD( ISTART-1, HBL ).LT.HBL-2 ) .OR. ( NPCOL.EQ.
     $             1 ) ) .AND. ( ICURCOL( KI ).EQ.MYCOL ) .AND.
     $             ( I-ISTOP+1.GE.3 ) ) THEN
                  K = ISTART
                  IF( ( K.LT.ISTOP ) .AND. ( MOD( K-1,
     $                HBL ).LT.HBL-2 ) ) THEN
                     ITMP1 = MIN( ISTART+1, I ) - 1
                  ELSE
                     IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                        ITMP1 = MIN( K+3, I )
                     END IF
                     IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                        ITMP1 = MAX( I1, K-1 ) - 1
                     END IF
                     IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                        ITMP1 = MAX( I1, K-2 ) - 1
                     END IF
                  END IF
*
                  ICOL1 = KCOL( KI )
                  CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST, 
     $                          IROW1, IROW2 )
                  IROW2 = NUMROC( ITMP1, HBL, MYROW, IAFIRST, NPROW )
                  IF( IROW1.LE.IROW2 ) THEN
                     ITMP2 = IROW2
                  ELSE
                     ITMP2 = -1
                  END IF
                  CALL ZLAREF( 'Col', A, LDA, WANTZ, Z, LDZ, .TRUE.,
     $                         ICOL1, ICOL1, ISTART, ISTOP, IROW1,
     $                         IROW2, LILOZ, LIHIZ, WORK( VECSIDX+1 ),
     $                         V2, V3, T1, T2, T3 )
                  K = ISTOP
                  IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
*
*                 Do from ITMP1+1 to MIN(K+3,I)
*
                     IF( MOD( K-1, HBL ).LT.HBL-3 ) THEN
                        IROW1 = ITMP2 + 1
                        IF( MOD( ( ITMP1 / HBL ), NPROW ).EQ.MYROW )
     $                       THEN
                           IF( ITMP2.GT.0 ) THEN
                              IROW2 = ITMP2 + MIN( K+3, I ) - ITMP1
                           ELSE
                              IROW2 = IROW1 - 1
                           END IF
                        ELSE
                           IROW2 = IROW1 - 1
                        END IF
                     ELSE
                        CALL INFOG1L( ITMP1+1, HBL, NPROW, MYROW, 
     $                                IAFIRST, IROW1, IROW2 )
                        IROW2 = NUMROC( MIN( K+3, I ), HBL, MYROW, 
     $                                IAFIRST, NPROW )
                     END IF
                     V2 = WORK( VECSIDX+( K-1 )*3+1 )
                     V3 = WORK( VECSIDX+( K-1 )*3+2 )
                     T1 = WORK( VECSIDX+( K-1 )*3+3 )
                     T2 = T1*V2
                     T3 = T1*V3
                     ICOL1 = KCOL( KI ) + ISTOP - ISTART
                     CALL ZLAREF( 'Col', A, LDA, .FALSE., Z, LDZ,
     $                            .FALSE., ICOL1, ICOL1, ISTART, ISTOP,
     $                            IROW1, IROW2, LILOZ, LIHIZ,
     $                            WORK( VECSIDX+1 ), V2, V3, T1, T2,
     $                            T3 )
                  END IF
               END IF
  400       CONTINUE
*
            DO 460 KI = 1, IBULGE
               IF( KCOL( KI ).GT.KP2COL( KI ) )
     $            GO TO 460
               IF( ( MYCOL.NE.ICURCOL( KI ) ) .AND.
     $             ( RIGHT.NE.ICURCOL( KI ) ) )GO TO 460
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               IF( MOD( ISTART-1, HBL ).GE.HBL-2 ) THEN
*
*              INFO is found in a buffer
*
                  ISPEC = 1
               ELSE
*
*              All INFO is local
*
                  ISPEC = 0
               END IF
               DO 450 K = ISTART, ISTOP
*
                  V2 = WORK( VECSIDX+( K-1 )*3+1 )
                  V3 = WORK( VECSIDX+( K-1 )*3+2 )
                  T1 = WORK( VECSIDX+( K-1 )*3+3 )
                  NR = MIN( 3, I-K+1 )
                  T2 = T1*V2
                  IF( ( NR.EQ.3 ) .AND. ( KCOL( KI ).LE.KP2COL( KI ) ) )
     $                 THEN
                     T3 = T1*V3
*
                     IF( ( K.LT.ISTOP ) .AND.
     $                   ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
                        ITMP1 = MIN( ISTART+1, I ) - 1
                     ELSE
                        IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                           ITMP1 = MIN( K+3, I )
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                           ITMP1 = MAX( I1, K-1 ) - 1
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                           ITMP1 = MAX( I1, K-2 ) - 1
                        END IF
                     END IF
                     IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                        ICOL1 = KCOL( KI ) + K - ISTART
                        ICOL2 = KP2COL( KI ) + K - ISTART
                     ELSE
                        ICOL1 = KCOL( KI )
                        ICOL2 = KP2COL( KI )
                        IF( K.GT.ISTART ) THEN
                           IF( RIGHT.EQ.ICURCOL( KI ) ) THEN
                              ICOL1 = ICOL1 + 1
                           END IF
                           IF( MYCOL.EQ.ICURCOL( KI ) ) THEN
                              ICOL2 = ICOL2 + 1
                           END IF
                        END IF
                     END IF
                     CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST, 
     $                             IROW1, IROW2 )
                     IROW2 = NUMROC( ITMP1, HBL, MYROW, IAFIRST, 
     $                               NPROW )
                     IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
     $                   ( NPCOL.GT.1 ) ) THEN
                        IF( ICOL1.NE.ICOL2 ) THEN
                           CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                   A( ( ICOL1-1 )*LDA+IROW1 ),
     $                                   LDA, MYROW, RIGHT )
                           IF( ( ISTART.EQ.ISTOP ) .AND. SKIP ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      A( ( ICOL1-1 )*LDA+IROW1 ),
     $                                      LDA, MYROW, RIGHT )
                           END IF
                        ELSE IF( SKIP ) THEN
                           CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                   WORK( ICBUF+1 ), IROW2-IROW1+1,
     $                                   MYROW, LEFT )
                           II = ICBUF - IROW1 + 1
                           JJ = ICBUF + IROW2 - 2*IROW1 + 2
                           DO 410 J = IROW1, IROW2
                              SUM = T1*WORK( II+J ) + T2*WORK( JJ+J ) +
     $                              T3*A( ( ICOL1-1 )*LDA+J )
                              WORK( II+J ) = WORK( II+J ) - SUM
                              WORK( JJ+J ) = WORK( JJ+J ) - SUM*
     $                                      DCONJG( V2 )
                              A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*
     $                                      LDA+J ) - SUM*DCONJG( V3 )
  410                      CONTINUE
                           IF( ISTART.EQ.ISTOP ) THEN
                              CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( ICBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                           END IF
                        END IF
                     END IF
                     IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                   ( NPCOL.GT.1 ) ) THEN
                        IF( ICOL1.EQ.ICOL2 ) THEN
                           IF( ISTART.EQ.ISTOP ) THEN
                              CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      A( ( ICOL1-2 )*LDA+IROW1 ),
     $                                      LDA, MYROW, RIGHT )
                           END IF
                           IF( SKIP ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      A( ( ICOL1-2 )*LDA+IROW1 ),
     $                                      LDA, MYROW, RIGHT )
                           END IF
                        ELSE IF( SKIP ) THEN
                           IF( ISTART.EQ.ISTOP ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( ICBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                           END IF
                           II = ICBUF + IROW2 - 2*IROW1 + 2
                           DO 420 J = IROW1, IROW2
                              SUM = T1*WORK( J+II ) +
     $                              T2*A( ( ICOL1-1 )*LDA+J ) +
     $                              T3*A( ICOL1*LDA+J )
                              WORK( J+II ) = WORK( J+II ) - SUM
                              A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*
     $                           LDA+J ) - SUM*DCONJG( V2 )
                              A( ICOL1*LDA+J ) = A( ICOL1*LDA+J ) -
     $                                           SUM*DCONJG( V3 )
  420                      CONTINUE
                           CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                   WORK( ICBUF+1 ), IROW2-IROW1+1,
     $                                   MYROW, LEFT )
                        END IF
                     END IF
*
*                    If we want Z and we haven't already done any Z
*
                     IF( ( WANTZ ) .AND. ( MOD( K-1,
     $                   HBL ).GE.HBL-2 ) .AND. ( NPCOL.GT.1 ) ) THEN
*
*                       Accumulate transformations in the matrix Z
*
                        IROW1 = LILOZ
                        IROW2 = LIHIZ
                        IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                           IF( ICOL1.NE.ICOL2 ) THEN
                              CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      Z( ( ICOL1-1 )*LDZ+IROW1 ),
     $                                      LDZ, MYROW, RIGHT )
                              IF( ( ISTART.EQ.ISTOP ) .AND. SKIP ) THEN
                                 CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         Z( ( ICOL1-1 )*LDZ+
     $                                         IROW1 ), LDZ, MYROW,
     $                                         RIGHT )
                              END IF
                           ELSE IF( SKIP ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( IZBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                              ICOL1 = ( ICOL1-1 )*LDZ
                              II = IZBUF - IROW1 + 1
                              JJ = IZBUF + IROW2 - 2*IROW1 + 2
                              DO 430 J = IROW1, IROW2
                                 SUM = T1*WORK( II+J ) + T2*WORK( JJ+J )
     $                                 + T3*Z( ICOL1+J )
                                 WORK( II+J ) = WORK( II+J ) - SUM
                                 WORK( JJ+J ) = WORK( JJ+J ) - SUM*
     $                                                     DCONJG( V2 )
                                 Z( ICOL1+J ) = Z( ICOL1+J ) - SUM*
     $                                                     DCONJG( V3 )
  430                         CONTINUE
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         WORK( IZBUF+1 ),
     $                                         IROW2-IROW1+1, MYROW,
     $                                         LEFT )
                              END IF
                           END IF
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                           IF( ICOL1.EQ.ICOL2 ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         Z( ( ICOL1-2 )*LDZ+
     $                                         IROW1 ), LDZ, MYROW,
     $                                         RIGHT )
                              END IF
                              IF( SKIP ) THEN
                                 CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         Z( ( ICOL1-2 )*LDZ+
     $                                         IROW1 ), LDZ, MYROW,
     $                                         RIGHT )
                              END IF
                           ELSE IF( SKIP ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         WORK( IZBUF+1 ),
     $                                         IROW2-IROW1+1, MYROW,
     $                                         LEFT )
                              END IF
                              ICOL1 = ( ICOL1-1 )*LDZ
                              II = IZBUF + IROW2 - 2*IROW1 + 2
                              DO 440 J = IROW1, IROW2
                                 SUM = T1*WORK( II+J ) + T2*Z( J+ICOL1 )
     $                                 + T3*Z( J+ICOL1+LDZ )
                                 WORK( II+J ) = WORK( II+J ) - SUM
                                 Z( J+ICOL1 ) = Z( J+ICOL1 ) - SUM*
     $                                               DCONJG( V2 )
                                 Z( J+ICOL1+LDZ ) = Z( J+ICOL1+LDZ ) -
     $                                              SUM*DCONJG( V3 )
  440                         CONTINUE
                              CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( IZBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                           END IF
                        END IF
                     END IF
                  END IF
  450          CONTINUE
  460       CONTINUE
*
            IF( SKIP )
     $         GO TO 550
*
            DO 520 KI = 1, IBULGE
               IF( KCOL( KI ).GT.KP2COL( KI ) )
     $            GO TO 520
               IF( ( MYCOL.NE.ICURCOL( KI ) ) .AND.
     $             ( RIGHT.NE.ICURCOL( KI ) ) )GO TO 520
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               IF( MOD( ISTART-1, HBL ).GE.HBL-2 ) THEN
*
*                 INFO is found in a buffer
*
                  ISPEC = 1
               ELSE
*
*                 All INFO is local
*
                  ISPEC = 0
               END IF
               DO 510 K = ISTART, ISTOP
*
                  V2 = WORK( VECSIDX+( K-1 )*3+1 )
                  V3 = WORK( VECSIDX+( K-1 )*3+2 )
                  T1 = WORK( VECSIDX+( K-1 )*3+3 )
                  NR = MIN( 3, I-K+1 )
                  T2 = T1*V2
                  IF( ( NR.EQ.3 ) .AND. ( KCOL( KI ).LE.KP2COL( KI ) ) )
     $                 THEN
                     T3 = T1*V3
*
                     IF( ( K.LT.ISTOP ) .AND.
     $                   ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
                        ITMP1 = MIN( ISTART+1, I ) - 1
                     ELSE
                        IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                           ITMP1 = MIN( K+3, I )
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                           ITMP1 = MAX( I1, K-1 ) - 1
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                           ITMP1 = MAX( I1, K-2 ) - 1
                        END IF
                     END IF
                     IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                        ICOL1 = KCOL( KI ) + K - ISTART
                        ICOL2 = KP2COL( KI ) + K - ISTART
                     ELSE
                        ICOL1 = KCOL( KI )
                        ICOL2 = KP2COL( KI )
                        IF( K.GT.ISTART ) THEN
                           IF( RIGHT.EQ.ICURCOL( KI ) ) THEN
                              ICOL1 = ICOL1 + 1
                           END IF
                           IF( MYCOL.EQ.ICURCOL( KI ) ) THEN
                              ICOL2 = ICOL2 + 1
                           END IF
                        END IF
                     END IF
                     CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST, 
     $                             IROW1, IROW2 )
                     IROW2 = NUMROC( ITMP1, HBL, MYROW, IAFIRST, 
     $                               NPROW )
                     IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
     $                   ( NPCOL.GT.1 ) ) THEN
                        IF( ICOL1.EQ.ICOL2 ) THEN
                           CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                   WORK( ICBUF+1 ), IROW2-IROW1+1,
     $                                   MYROW, LEFT )
                           II = ICBUF - IROW1 + 1
                           JJ = ICBUF + IROW2 - 2*IROW1 + 2
                           DO 470 J = IROW1, IROW2
                              SUM = T1*WORK( II+J ) + T2*WORK( JJ+J )
     $                              + T3*A( ( ICOL1-1 )*LDA+J )
                              WORK( II+J ) = WORK( II+J ) - SUM
                              WORK( JJ+J ) = WORK( JJ+J ) - SUM*
     $                                  DCONJG( V2 )
                              A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*
     $                                  LDA+J ) - SUM*DCONJG( V3 )
  470                      CONTINUE
                           IF( ISTART.EQ.ISTOP ) THEN
                              CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( ICBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                           END IF
                        END IF
                     END IF
                     IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                   ( NPCOL.GT.1 ) ) THEN
                        IF( ICOL1.NE.ICOL2 ) THEN
                           IF( ISTART.EQ.ISTOP ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( ICBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                           END IF
                           II = ICBUF + IROW2 - 2*IROW1 + 2
                           DO 480 J = IROW1, IROW2
                              SUM = T1*WORK( J+II ) +
     $                              T2*A( ( ICOL1-1 )*LDA+J ) +
     $                              T3*A( ICOL1*LDA+J )
                              WORK( J+II ) = WORK( J+II ) - SUM
                              A( ( ICOL1-1 )*LDA+J ) = A( ( ICOL1-1 )*
     $                           LDA+J ) - SUM*DCONJG( V2 )
                              A( ICOL1*LDA+J ) = A( ICOL1*LDA+J ) -
     $                                           SUM*DCONJG( V3 )
  480                      CONTINUE
                           CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                   WORK( ICBUF+1 ), IROW2-IROW1+1,
     $                                   MYROW, LEFT )
                        END IF
                     END IF
*
*
*                 If we want Z and we haven't already done any Z
                     IF( ( WANTZ ) .AND. ( MOD( K-1,
     $                   HBL ).GE.HBL-2 ) .AND. ( NPCOL.GT.1 ) ) THEN
*
*                    Accumulate transformations in the matrix Z
*
                        IROW1 = LILOZ
                        IROW2 = LIHIZ
                        IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                           IF( ICOL1.EQ.ICOL2 ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( IZBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                              ICOL1 = ( ICOL1-1 )*LDZ
                              II = IZBUF - IROW1 + 1
                              JJ = IZBUF + IROW2 - 2*IROW1 + 2
                              DO 490 J = IROW1, IROW2
                                 SUM = T1*WORK( II+J ) + T2*WORK( JJ+J )
     $                                 + T3*Z( ICOL1+J )
                                 WORK( II+J ) = WORK( II+J ) - SUM
                                 WORK( JJ+J ) = WORK( JJ+J ) - SUM*
     $                                                    DCONJG( V2 )
                                 Z( ICOL1+J ) = Z( ICOL1+J ) - SUM*
     $                                                    DCONJG( V3 )
  490                         CONTINUE
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         WORK( IZBUF+1 ),
     $                                         IROW2-IROW1+1, MYROW,
     $                                         LEFT )
                              END IF
                           END IF
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                           IF( ICOL1.NE.ICOL2 ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         WORK( IZBUF+1 ),
     $                                         IROW2-IROW1+1, MYROW,
     $                                         LEFT )
                              END IF
                              ICOL1 = ( ICOL1-1 )*LDZ
                              II = IZBUF + IROW2 - 2*IROW1 + 2
                              DO 500 J = IROW1, IROW2
                                 SUM = T1*WORK( II+J ) + T2*Z( J+ICOL1 )
     $                                 + T3*Z( J+ICOL1+LDZ )
                                 WORK( II+J ) = WORK( II+J ) - SUM
                                 Z( J+ICOL1 ) = Z( J+ICOL1 ) - SUM*
     $                                                  DCONJG( V2 )
                                 Z( J+ICOL1+LDZ ) = Z( J+ICOL1+LDZ ) -
     $                                              SUM*DCONJG( V3 )
  500                         CONTINUE
                              CALL ZGESD2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      WORK( IZBUF+1 ),
     $                                      IROW2-IROW1+1, MYROW, LEFT )
                           END IF
                        END IF
                     END IF
                  END IF
  510          CONTINUE
  520       CONTINUE

            DO 540 KI = 1, IBULGE
               IF( KCOL( KI ).GT.KP2COL( KI ) )
     $            GO TO 540
               IF( ( MYCOL.NE.ICURCOL( KI ) ) .AND.
     $             ( RIGHT.NE.ICURCOL( KI ) ) )GO TO 540
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               IF( MOD( ISTART-1, HBL ).GE.HBL-2 ) THEN
*
*              INFO is found in a buffer
*
                  ISPEC = 1
               ELSE
*
*              All INFO is local
*
                  ISPEC = 0
               END IF
               DO 530 K = ISTART, ISTOP
*
                  V2 = WORK( VECSIDX+( K-1 )*3+1 )
                  V3 = WORK( VECSIDX+( K-1 )*3+2 )
                  T1 = WORK( VECSIDX+( K-1 )*3+3 )
                  NR = MIN( 3, I-K+1 )
                  T2 = T1*V2
                  IF( ( NR.EQ.3 ) .AND. ( KCOL( KI ).LE.KP2COL( KI ) ) )
     $                 THEN
                     T3 = T1*V3
*
                     IF( ( K.LT.ISTOP ) .AND.
     $                   ( MOD( K-1, HBL ).LT.HBL-2 ) ) THEN
                        ITMP1 = MIN( ISTART+1, I ) - 1
                     ELSE
                        IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                           ITMP1 = MIN( K+3, I )
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                           ITMP1 = MAX( I1, K-1 ) - 1
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                           ITMP1 = MAX( I1, K-2 ) - 1
                        END IF
                     END IF
                     IF( MOD( K-1, HBL ).LT.HBL-2 ) THEN
                        ICOL1 = KCOL( KI ) + K - ISTART
                        ICOL2 = KP2COL( KI ) + K - ISTART
                     ELSE
                        ICOL1 = KCOL( KI )
                        ICOL2 = KP2COL( KI )
                        IF( K.GT.ISTART ) THEN
                           IF( RIGHT.EQ.ICURCOL( KI ) ) THEN
                              ICOL1 = ICOL1 + 1
                           END IF
                           IF( MYCOL.EQ.ICURCOL( KI ) ) THEN
                              ICOL2 = ICOL2 + 1
                           END IF
                        END IF
                     END IF
                     CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST,
     $                             IROW1, IROW2 )
                     IROW2 = NUMROC( ITMP1, HBL, MYROW, IAFIRST, 
     $                               NPROW )
                     IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND.
     $                   ( NPCOL.GT.1 ) ) THEN
                        IF( ICOL1.NE.ICOL2 ) THEN
                           IF( ISTART.EQ.ISTOP ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      A( ( ICOL1-1 )*LDA+IROW1 ),
     $                                      LDA, MYROW, RIGHT )
                           END IF
                        END IF
                     END IF
                     IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                   ( NPCOL.GT.1 ) ) THEN
                        IF( ICOL1.EQ.ICOL2 ) THEN
                           CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                   A( ( ICOL1-2 )*LDA+IROW1 ),
     $                                   LDA, MYROW, RIGHT )
                        END IF
                     END IF
*
*                    If we want Z and we haven't already done any Z
*
                     IF( ( WANTZ ) .AND. ( MOD( K-1,
     $                   HBL ).GE.HBL-2 ) .AND. ( NPCOL.GT.1 ) ) THEN
*
*                       Accumulate transformations in the matrix Z
*
                        IROW1 = LILOZ
                        IROW2 = LIHIZ
                        IF( MOD( K-1, HBL ).EQ.HBL-2 ) THEN
                           IF( ICOL1.NE.ICOL2 ) THEN
                              IF( ISTART.EQ.ISTOP ) THEN
                                 CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                         Z( ( ICOL1-1 )*LDZ+
     $                                         IROW1 ), LDZ, MYROW,
     $                                         RIGHT )
                              END IF
                           END IF
                        END IF
                        IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                           IF( ICOL1.EQ.ICOL2 ) THEN
                              CALL ZGERV2D( CONTXT, IROW2-IROW1+1, 2,
     $                                      Z( ( ICOL1-2 )*LDZ+IROW1 ),
     $                                      LDZ, MYROW, RIGHT )
                           END IF
                        END IF
                     END IF
                  END IF
  530          CONTINUE
  540       CONTINUE
*
*           Column work done
*
  550       CONTINUE
*
*           Now do NR=2 work
*
            DO 630 KI = 1, IBULGE
               ISTART = MAX( K1( KI ), M )
               ISTOP = MIN( K2( KI ), I-1 )
               IF( MOD( ISTART-1, HBL ).GE.HBL-2 ) THEN
*
*                 INFO is found in a buffer
*
                  ISPEC = 1
               ELSE
*
*                 All INFO is local
*
                  ISPEC = 0
               END IF
*
               DO 620 K = ISTART, ISTOP
*
                  V2 = WORK( VECSIDX+( K-1 )*3+1 )
                  V3 = WORK( VECSIDX+( K-1 )*3+2 )
                  T1 = WORK( VECSIDX+( K-1 )*3+3 )
                  NR = MIN( 3, I-K+1 )
                  T2 = T1*V2
                  IF( NR.EQ.2 ) THEN
*
*              Apply G from the left to transform the rows of the matrix
*              in columns K to I2.
*
                     CALL INFOG1L( K, HBL, NPCOL, MYCOL, JAFIRST, 
     $                             LILOH, LIHIH )
                     LIHIH = LOCALI2
                     CALL INFOG1L( 1, HBL, NPROW, MYROW, IAFIRST, ITMP2,
     $                             ITMP1 )
                     ITMP1 = NUMROC( K+1, HBL, MYROW, IAFIRST, NPROW )
                     IF( ICURROW( KI ).EQ.MYROW ) THEN
                        IF( ( ISPEC.EQ.0 ) .OR. ( NPROW.EQ.1 ) .OR.
     $                      ( MOD( K-1, HBL ).EQ.HBL-2 ) ) THEN
                           ITMP1 = ITMP1 - 1
                           DO 560 J = ( LILOH-1 )*LDA,
     $                             ( LIHIH-1 )*LDA, LDA
                              SUM = DCONJG( T1 )*A( ITMP1+J ) +
     $                              DCONJG( T2 )*A( ITMP1+1+J )
                              A( ITMP1+J ) = A( ITMP1+J ) - SUM
                              A( ITMP1+1+J ) = A( ITMP1+1+J ) - SUM*V2
  560                      CONTINUE
                        ELSE
                           IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                              CALL ZGERV2D( CONTXT, 1, LIHIH-LILOH+1,
     $                                      WORK( IRBUF+1 ), 1, UP,
     $                                      MYCOL )
                              DO 570 J = LILOH, LIHIH
                                 SUM = DCONJG( T1 )*WORK( 
     $                                 IRBUF+J-LILOH+1 ) + DCONJG( T2 )
     $                                 *A( ( J-1 )*LDA+ITMP1 )
                                 WORK( IRBUF+J-LILOH+1 ) = WORK( IRBUF+
     $                              J-LILOH+1 ) - SUM
                                 A( ( J-1 )*LDA+ITMP1 ) = A( ( J-1 )*
     $                              LDA+ITMP1 ) - SUM*V2
  570                         CONTINUE
                              CALL ZGESD2D( CONTXT, 1, LIHIH-LILOH+1,
     $                                      WORK( IRBUF+1 ), 1, UP,
     $                                      MYCOL )
                           END IF
                        END IF
                     ELSE
                        IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                      ( ICURROW( KI ).EQ.DOWN ) ) THEN
                           CALL ZGESD2D( CONTXT, 1, LIHIH-LILOH+1,
     $                                   A( ( LILOH-1 )*LDA+ITMP1 ),
     $                                   LDA, DOWN, MYCOL )
                           CALL ZGERV2D( CONTXT, 1, LIHIH-LILOH+1,
     $                                   A( ( LILOH-1 )*LDA+ITMP1 ),
     $                                   LDA, DOWN, MYCOL )
                        END IF
                     END IF
*
*              Apply G from the right to transform the columns of the
*              matrix in rows I1 to MIN(K+3,I).
*
                     CALL INFOG1L( I1, HBL, NPROW, MYROW, IAFIRST, 
     $                             LILOH, LIHIH )
                     LIHIH = NUMROC( I, HBL, MYROW, IAFIRST, NPROW )
*
                     IF( ICURCOL( KI ).EQ.MYCOL ) THEN
*                       LOCAL A(LILOZ:LIHIZ,KCOL:KCOL+2)
                        IF( ( ISPEC.EQ.0 ) .OR. ( NPCOL.EQ.1 ) .OR.
     $                      ( MOD( K-1, HBL ).EQ.HBL-2 ) ) THEN
                           CALL INFOG1L( K, HBL, NPCOL, MYCOL, JAFIRST,
     $                                   ITMP1, ITMP2 )
                           ITMP2 = NUMROC( K+1, HBL, MYCOL, JAFIRST, 
     $                                     NPCOL )
                           DO 580 J = LILOH, LIHIH
                              SUM = T1*A( ( ITMP1-1 )*LDA+J ) +
     $                              T2*A( ITMP1*LDA+J )
                              A( ( ITMP1-1 )*LDA+J ) = A( ( ITMP1-1 )*
     $                           LDA+J ) - SUM
                              A( ITMP1*LDA+J ) = A( ITMP1*LDA+J ) -
     $                                           SUM*DCONJG( V2 )
  580                      CONTINUE
                        ELSE
                           ITMP1 = KCOL( KI )
                           IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                              CALL ZGERV2D( CONTXT, LIHIH-LILOH+1, 1,
     $                                      WORK( ICBUF+1 ),
     $                                      LIHIH-LILOH+1, MYROW, LEFT )
                              DO 590 J = LILOH, LIHIH
                                 SUM = T1*WORK( ICBUF+J ) +
     $                                 T2*A( ( ITMP1-1 )*LDA+J )
                                 WORK( ICBUF+J ) = WORK( ICBUF+J ) -
     $                                             SUM
                                 A( ( ITMP1-1 )*LDA+J )
     $                              = A( ( ITMP1-1 )*LDA+J ) - 
     $                                 SUM*DCONJG( V2 )
  590                         CONTINUE
                              CALL ZGESD2D( CONTXT, LIHIH-LILOH+1, 1,
     $                                      WORK( ICBUF+1 ),
     $                                      LIHIH-LILOH+1, MYROW, LEFT )
                           END IF
                        END IF
                     ELSE
                        IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                      ( ICURCOL( KI ).EQ.RIGHT ) ) THEN
                           ITMP1 = KCOL( KI )
                           CALL ZGESD2D( CONTXT, LIHIH-LILOH+1, 1,
     $                                   A( ( ITMP1-1 )*LDA+LILOH ),
     $                                   LDA, MYROW, RIGHT )
                           CALL INFOG1L( K, HBL, NPCOL, MYCOL, JAFIRST,
     $                                   ITMP1, ITMP2 )
                           ITMP2 = NUMROC( K+1, HBL, MYCOL, JAFIRST, 
     $                                     NPCOL )
                           CALL ZGERV2D( CONTXT, LIHIH-LILOH+1, 1,
     $                                   A( ( ITMP1-1 )*LDA+LILOH ),
     $                                   LDA, MYROW, RIGHT )
                        END IF
                     END IF
*
                     IF( WANTZ ) THEN
*
*                       Accumulate transformations in the matrix Z
*
                        IF( ICURCOL( KI ).EQ.MYCOL ) THEN
*                          LOCAL Z(LILOZ:LIHIZ,KCOL:KCOL+2)
                           IF( ( ISPEC.EQ.0 ) .OR. ( NPCOL.EQ.1 ) .OR.
     $                         ( MOD( K-1, HBL ).EQ.HBL-2 ) ) THEN
                              ITMP1 = KCOL( KI ) + K - ISTART
                              ITMP1 = ( ITMP1-1 )*LDZ
                              DO 600 J = LILOZ, LIHIZ
                                 SUM = T1*Z( J+ITMP1 ) +
     $                                 T2*Z( J+ITMP1+LDZ )
                                 Z( J+ITMP1 ) = Z( J+ITMP1 ) - SUM
                                 Z( J+ITMP1+LDZ ) = Z( J+ITMP1+LDZ ) -
     $                                              SUM*DCONJG( V2 )
  600                         CONTINUE
                           ELSE
                              ITMP1 = KCOL( KI )
*                             IF WE ACTUALLY OWN COLUMN K
                              IF( MOD( K-1, HBL ).EQ.HBL-1 ) THEN
                                 CALL ZGERV2D( CONTXT, LIHIZ-LILOZ+1, 1,
     $                                         WORK( IZBUF+1 ), LDZ,
     $                                         MYROW, LEFT )
                                 ITMP1 = ( ITMP1-1 )*LDZ
                                 DO 610 J = LILOZ, LIHIZ
                                    SUM = T1*WORK( IZBUF+J ) +
     $                                    T2*Z( J+ITMP1 )
                                    WORK( IZBUF+J ) = WORK( IZBUF+J ) -
     $                                 SUM
                                    Z( J+ITMP1 ) = Z( J+ITMP1 ) - 
     $                                            SUM*DCONJG( V2 )
  610                            CONTINUE
                                 CALL ZGESD2D( CONTXT, LIHIZ-LILOZ+1, 1,
     $                                         WORK( IZBUF+1 ), LDZ,
     $                                         MYROW, LEFT )
                              END IF
                           END IF
                        ELSE
*
*                          NO WORK BUT NEED TO UPDATE ANYWAY????
*
                           IF( ( MOD( K-1, HBL ).EQ.HBL-1 ) .AND.
     $                         ( ICURCOL( KI ).EQ.RIGHT ) ) THEN
                              ITMP1 = KCOL( KI )
                              ITMP1 = ( ITMP1-1 )*LDZ
                              CALL ZGESD2D( CONTXT, LIHIZ-LILOZ+1, 1,
     $                                      Z( LILOZ+ITMP1 ), LDZ,
     $                                      MYROW, RIGHT )
                              CALL ZGERV2D( CONTXT, LIHIZ-LILOZ+1, 1,
     $                                      Z( LILOZ+ITMP1 ), LDZ,
     $                                      MYROW, RIGHT )
                           END IF
                        END IF
                     END IF
                  END IF
  620          CONTINUE
*
*        Adjust local information for this bulge
*
               IF( NPROW.EQ.1 ) THEN
                  KROW( KI ) = KROW( KI ) + K2( KI ) - K1( KI ) + 1
                  KP2ROW( KI ) = KP2ROW( KI ) + K2( KI ) - K1( KI ) + 1
               END IF
               IF( ( MOD( K1( KI )-1, HBL ).LT.HBL-2 ) .AND.
     $             ( ICURROW( KI ).EQ.MYROW ) .AND. ( NPROW.GT.1 ) )
     $              THEN
                  KROW( KI ) = KROW( KI ) + K2( KI ) - K1( KI ) + 1
               END IF
               IF( ( MOD( K2( KI ), HBL ).LT.HBL-2 ) .AND.
     $             ( ICURROW( KI ).EQ.MYROW ) .AND. ( NPROW.GT.1 ) )
     $              THEN
                  KP2ROW( KI ) = KP2ROW( KI ) + K2( KI ) - K1( KI ) + 1
               END IF
               IF( ( MOD( K1( KI )-1, HBL ).GE.HBL-2 ) .AND.
     $             ( ( MYROW.EQ.ICURROW( KI ) ) .OR. ( DOWN.EQ.
     $             ICURROW( KI ) ) ) .AND. ( NPROW.GT.1 ) ) THEN
                  CALL INFOG1L( K2( KI )+1, HBL, NPROW, MYROW, IAFIRST,
     $                          KROW( KI ), ITMP2 )
               END IF
               IF( ( MOD( K2( KI ), HBL ).GE.HBL-2 ) .AND.
     $             ( ( MYROW.EQ.ICURROW( KI ) ) .OR. ( UP.EQ.
     $             ICURROW( KI ) ) ) .AND. ( NPROW.GT.1 ) ) THEN
                  KP2ROW( KI ) = NUMROC( K2( KI )+3, HBL, MYROW, 
     $                                   IAFIRST, NPROW )
               END IF
               IF( NPCOL.EQ.1 ) THEN
                  KCOL( KI ) = KCOL( KI ) + K2( KI ) - K1( KI ) + 1
                  KP2COL( KI ) = KP2COL( KI ) + K2( KI ) - K1( KI ) + 1
               END IF
               IF( ( MOD( K1( KI )-1, HBL ).LT.HBL-2 ) .AND.
     $             ( ICURCOL( KI ).EQ.MYCOL ) .AND. ( NPCOL.GT.1 ) )
     $              THEN
                  KCOL( KI ) = KCOL( KI ) + K2( KI ) - K1( KI ) + 1
               END IF
               IF( ( MOD( K2( KI ), HBL ).LT.HBL-2 ) .AND.
     $             ( ICURCOL( KI ).EQ.MYCOL ) .AND. ( NPCOL.GT.1 ) )
     $              THEN
                  KP2COL( KI ) = KP2COL( KI ) + K2( KI ) - K1( KI ) + 1
               END IF
               IF( ( MOD( K1( KI )-1, HBL ).GE.HBL-2 ) .AND.
     $             ( ( MYCOL.EQ.ICURCOL( KI ) ) .OR. ( RIGHT.EQ.
     $             ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN
                  CALL INFOG1L( K2( KI )+1, HBL, NPCOL, MYCOL, JAFIRST,
     $                          KCOL( KI ), ITMP2 )
               END IF
               IF( ( MOD( K2( KI ), HBL ).GE.HBL-2 ) .AND.
     $             ( ( MYCOL.EQ.ICURCOL( KI ) ) .OR. ( LEFT.EQ.
     $             ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN
                  KP2COL( KI ) = NUMROC( K2( KI )+3, HBL, MYCOL, 
     $                                   JAFIRST, NPCOL )
               END IF
               K1( KI ) = K2( KI ) + 1
               ISTOP = MIN( K1( KI )+ROTN-1, I-2 )
               ISTOP = MIN( ISTOP, K1( KI )+HBL-3-
     $                 MOD( K1( KI )-1, HBL ) )
               ISTOP = MIN( ISTOP, I2-2 )
               ISTOP = MAX( ISTOP, K1( KI ) )
               IF( ( MOD( K1( KI )-1, HBL ).EQ.HBL-2 ) .AND.
     $             ( ISTOP.LT.MIN( I-2, I2-2 ) ) ) THEN
                  ISTOP = ISTOP + 1
               END IF
               K2( KI ) = ISTOP
               IF( K1( KI ).LE.ISTOP ) THEN
                  IF( ( MOD( K1( KI )-1, HBL ).EQ.HBL-2 ) .AND.
     $                ( I-K1( KI ).GT.1 ) ) THEN
*
*                    Next step switches rows & cols
*
                     ICURROW( KI ) = MOD( ICURROW( KI )+1, NPROW )
                     ICURCOL( KI ) = MOD( ICURCOL( KI )+1, NPCOL )
                  END IF
               END IF
  630       CONTINUE
*
            IF( K2( IBULGE ).LE.I-1 )
     $         GO TO 40
         END IF
*
  640 CONTINUE
*
*     Failure to converge in remaining number of iterations
*
      INFO = I
      RETURN
*
  650 CONTINUE
*
      IF( L.EQ.I ) THEN
*
*        H(I,I-1) is negligible: one eigenvalue has converged.
*
         CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW,
     $                 ICOL, ITMP1, ITMP2 )
         IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
            W( I ) = A( ( ICOL-1 )*LDA+IROW )
         ELSE
            W( I ) = ZERO
         END IF
      ELSE IF( L.EQ.I-1 ) THEN
*
*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
*
         CALL PZLACP3( 2, I-1, A, DESCA, S1, 2*IBLK, -1, -1, 0 )
         CALL ZLANV2( S1( 1, 1 ), S1( 1, 2 ), S1( 2, 1 ), S1( 2, 2 ),
     $                W( I-1 ), W( I ), CS, SN )
         CALL PZLACP3( 2, I-1, A, DESCA, S1, 2*IBLK, 0, 0, 1 )
*
         IF( NODE.NE.0 ) THEN
*           Erase the eigenvalues other eigenvalues
            W( I-1 ) = ZERO
            W( I ) = ZERO
         END IF
*
         IF( WANTT ) THEN
*
*           Apply the transformation to A.
*
            IF( I2.GT.I ) THEN
               CALL PZROT( I2-I, A, I-1, I+1, DESCA, N, A, I, I+1,
     $                    DESCA, N, CS, SN )
            END IF
            CALL PZROT( I-I1-1, A, I1, I-1, DESCA, 1, A, I1, I, DESCA,
     $                 1, CS, DCONJG( SN ) )
         END IF
         IF( WANTZ ) THEN
*
*           Apply the transformation to Z.
*
            CALL PZROT( NZ, Z, ILOZ, I-1, DESCZ, 1, Z, ILOZ, I, DESCZ,
     $                 1, CS, DCONJG( SN ) )
         END IF
*
      ELSE
*
*        Find the eigenvalues in H(L:I,L:I), L < I-1
*
         JBLK = I - L + 1
         IF( JBLK.LE.2*IBLK ) THEN
            CALL PZLACP3( I-L+1, L, A, DESCA, S1, 2*IBLK, 0, 0, 0 )
            CALL ZLAHQR( .FALSE., .FALSE., JBLK, 1, JBLK, S1, 2*IBLK,
     $                   W( L ), 1, JBLK, Z, LDZ, IERR )
            IF( NODE.NE.0 ) THEN
*
*              Erase the eigenvalues
*
               DO 660 K = L, I
                  W( K ) = ZERO
  660          CONTINUE
            END IF
         END IF
      END IF
*
*     Decrement number of remaining iterations, and return to start of
*     the main loop with new value of I.
*
      ITN = ITN - ITS
      I = L - 1
      GO TO 10
*
  670 CONTINUE
      CALL ZGSUM2D( CONTXT, 'All', ' ', N, 1, W, N, -1, -1 )
      RETURN
*
*     END OF PZLAHQR
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzlasmsub.f'
then
	echo shar: will not over-write existing file "'pzlasmsub.f'"
else
cat << "SHAR_EOF" > 'pzlasmsub.f'
      SUBROUTINE PZLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK )
*
*  Mark R. Fahey
*  May 28, 1999
*
*     .. Scalar Arguments ..
      INTEGER            I, K, L, LWORK
      DOUBLE PRECISION   SMLNUM
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * )
      COMPLEX*16         A( * ), BUF( * )
*     ..
*
*  Purpose
*  =======
*
*  PZLASMSUB looks for a small subdiagonal element from the bottom
*     of the matrix that it can safely set to zero.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  A       (global input) COMPLEX*16 array, dimension (DESCA(LLD_),*)
*          On entry, the Hessenberg matrix whose tridiagonal part is
*          being scanned.
*          Unchanged on exit.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  I       (global input) INTEGER
*          The global location of the bottom of the unreduced
*          submatrix of A.
*          Unchanged on exit.
*
*  L       (global input) INTEGER
*          The global location of the top of the unreduced submatrix
*          of A.
*          Unchanged on exit.
*
*  K       (global output) INTEGER
*          On exit, this yields the bottom portion of the unreduced
*          submatrix.  This will satisfy: L <= M  <= I-1.
*
*  SMLNUM  (global input) DOUBLE PRECISION
*          On entry, a "small number" for the given matrix.
*          Unchanged on exit.
*
*  BUF     (local output) COMPLEX*16 array of size LWORK.
*
*  LWORK   (global input) INTEGER
*          On exit, LWORK is the size of the work buffer.
*          This must be at least 2*Ceil( Ceil( (I-L)/HBL ) /
*                                        LCM(NPROW,NPCOL) )
*          Here LCM is least common multiple, and NPROWxNPCOL is the
*          logical grid size.
*
*   Notes:
*
*     This routine does a global maximum and must be called by all
*     processes.
*
*     This code is basically a parallelization of the following snip
*     of LAPACK code from ZLAHQR:
*
*        Look for a single small subdiagonal element.
*
*        DO 20 K = I, L + 1, -1
*           TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
*           IF( TST1.EQ.ZERO )
*    $         TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
*           IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
*    $         GO TO 30
*  20    CONTINUE
*  30    CONTINUE
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      DOUBLE PRECISION   ZERO
      PARAMETER          ( ZERO = 0.0D+0 )
*     ..
*     .. Local Scalars ..
      INTEGER            CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2,
     $                   II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC,
     $                   ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA,
     $                   LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM,
     $                   RIGHT, UP
      DOUBLE PRECISION   TST1, ULP
      COMPLEX*16         CDUM, H10, H11, H22
*     ..
*     .. External Functions ..
      INTEGER            ILCM, NUMROC
      DOUBLE PRECISION   PDLAMCH
      EXTERNAL           ILCM, NUMROC, PDLAMCH
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, IGAMX2D, INFOG1L, INFOG2L,
     $                   ZGERV2D, ZGESD2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, DIMAG, MAX, MOD
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
      HBL = DESCA( MB_ )
      CONTXT = DESCA( CTXT_ )
      LDA = DESCA( LLD_ )
      ULP = PDLAMCH( CONTXT, 'PRECISION' )
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
      LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
      RIGHT = MOD( MYCOL+1, NPCOL )
      UP = MOD( MYROW+NPROW-1, NPROW )
      DOWN = MOD( MYROW+1, NPROW )
      NUM = NPROW*NPCOL
*
*     BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS
*     BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS
*
      ISTR1 = 0
      ISTR2 = ( ( I-L ) / HBL )
      IF( ISTR2*HBL.LT.( I-L ) )
     $   ISTR2 = ISTR2 + 1
      II = ISTR2 / ILCM( NPROW, NPCOL )
      IF( II*ILCM( NPROW, NPCOL ).LT.ISTR2 ) THEN
         ISTR2 = II + 1
      ELSE
         ISTR2 = II
      END IF
      IF( LWORK.LT.2*ISTR2 ) THEN
*
*        Error!
*
         RETURN
      END IF
      CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1,
     $              ICOL1, II, JJ )
      MODKM1 = MOD( I-1+HBL, HBL )
*
*     COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO
*     2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY
*     UP THE TRIDIAGONAL
*
      IBUF1 = 0
      IBUF2 = 0
      IRCV1 = 0
      IRCV2 = 0
      DO 10 K = I, L + 1, -1
         IF( ( MODKM1.EQ.0 ) .AND. ( DOWN.EQ.II ) .AND.
     $       ( RIGHT.EQ.JJ ) ) THEN
*
*           WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN
*
            IF( ( DOWN.NE.MYROW ) .OR. ( RIGHT.NE.MYCOL ) ) THEN
               CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW,
     $                       MYCOL, IROW1, ICOL1, ISRC, JSRC )
               IBUF1 = IBUF1 + 1
               BUF( ISTR1+IBUF1 ) = A( ( ICOL1-1 )*LDA+IROW1 )
            END IF
         END IF
         IF( ( MODKM1.EQ.0 ) .AND. ( MYROW.EQ.II ) .AND.
     $       ( RIGHT.EQ.JJ ) ) THEN
*
*           WE MUST PACK H(K  ,K-1) AND SEND IT RIGHT
*
            IF( NPCOL.GT.1 ) THEN
               CALL INFOG2L( K, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW1, ICOL1, ISRC, JSRC )
               IBUF2 = IBUF2 + 1
               BUF( ISTR2+IBUF2 ) = A( ( ICOL1-1 )*LDA+IROW1 )
            END IF
         END IF
*
*        ADD UP THE RECEIVES
*
         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
            IF( ( MODKM1.EQ.0 ) .AND. ( ( NPROW.GT.1 ) .OR. ( NPCOL.GT.
     $          1 ) ) ) THEN
*
*              WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP
*
               IRCV1 = IRCV1 + 1
            END IF
            IF( ( MODKM1.EQ.0 ) .AND. ( NPCOL.GT.1 ) ) THEN
*
*              WE MUST RECEIVE H(K  ,K-1) FROM LEFT
*
               IRCV2 = IRCV2 + 1
            END IF
         END IF
*
*        POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0)
*
         IF( MODKM1.EQ.0 ) THEN
            II = II - 1
            JJ = JJ - 1
            IF( II.LT.0 )
     $         II = NPROW - 1
            IF( JJ.LT.0 )
     $         JJ = NPCOL - 1
         END IF
         MODKM1 = MODKM1 - 1
         IF( MODKM1.LT.0 )
     $      MODKM1 = HBL - 1
   10 CONTINUE
*
*     SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND
*
      IF( IBUF1.GT.0 ) THEN
         CALL ZGESD2D( CONTXT, IBUF1, 1, BUF( ISTR1+1 ), IBUF1, DOWN,
     $                 RIGHT )
      END IF
      IF( IBUF2.GT.0 ) THEN
         CALL ZGESD2D( CONTXT, IBUF2, 1, BUF( ISTR2+1 ), IBUF2, MYROW,
     $                 RIGHT )
      END IF
*
*     RECEIVE APPROPRIATE DATA IF THERE IS ANY
*
      IF( IRCV1.GT.0 ) THEN
         CALL ZGERV2D( CONTXT, IRCV1, 1, BUF( ISTR1+1 ), IRCV1, UP,
     $                 LEFT )
      END IF
      IF( IRCV2.GT.0 ) THEN
         CALL ZGERV2D( CONTXT, IRCV2, 1, BUF( ISTR2+1 ), IRCV2, MYROW,
     $                 LEFT )
      END IF
*
*     START MAIN LOOP
*
      IBUF1 = 0
      IBUF2 = 0
      CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1,
     $              ICOL1, II, JJ )
      MODKM1 = MOD( I-1+HBL, HBL )
*
*        LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT.
*
*        Start loop for subdiagonal search
*
      DO 40 K = I, L + 1, -1
         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
            IF( MODKM1.EQ.0 ) THEN
*
*                 Grab information from WORK array
*
               IF( NUM.GT.1 ) THEN
                  IBUF1 = IBUF1 + 1
                  H11 = BUF( ISTR1+IBUF1 )
               ELSE
                  H11 = A( ( ICOL1-2 )*LDA+IROW1-1 )
               END IF
               IF( NPCOL.GT.1 ) THEN
                  IBUF2 = IBUF2 + 1
                  H10 = BUF( ISTR2+IBUF2 )
               ELSE
                  H10 = A( ( ICOL1-2 )*LDA+IROW1 )
               END IF
            ELSE
*
*                 Information is local
*
               H11 = A( ( ICOL1-2 )*LDA+IROW1-1 )
               H10 = A( ( ICOL1-2 )*LDA+IROW1 )
            END IF
            H22 = A( ( ICOL1-1 )*LDA+IROW1 )
            TST1 = CABS1( H11 ) + CABS1( H22 )
            IF( TST1.EQ.ZERO ) THEN
*
*                 FIND SOME NORM OF THE LOCAL H(L:I,L:I)
*
               CALL INFOG1L( L, HBL, NPROW, MYROW, 0, IROW1, III )
               IROW2 = NUMROC( I, HBL, MYROW, 0, NPROW )
               CALL INFOG1L( L, HBL, NPCOL, MYCOL, 0, ICOL1, III )
               ICOL2 = NUMROC( I, HBL, MYCOL, 0, NPCOL )
               DO 30 III = IROW1, IROW2
                  DO 20 JJJ = ICOL1, ICOL2
                     TST1 = TST1 + CABS1( A( ( JJJ-1 )*LDA+III ) )
   20             CONTINUE
   30          CONTINUE
            END IF
            IF( CABS1( H10 ).LE.MAX( ULP*TST1, SMLNUM ) )
     $         GO TO 50
            IROW1 = IROW1 - 1
            ICOL1 = ICOL1 - 1
         END IF
         MODKM1 = MODKM1 - 1
         IF( MODKM1.LT.0 )
     $      MODKM1 = HBL - 1
         IF( ( MODKM1.EQ.HBL-1 ) .AND. ( K.GT.2 ) ) THEN
            II = MOD( II+NPROW-1, NPROW )
            JJ = MOD( JJ+NPCOL-1, NPCOL )
            CALL INFOG2L( K-1, K-1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW1, ICOL1, ITMP1, ITMP2 )
         END IF
   40 CONTINUE
   50 CONTINUE
      CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1,
     $              -1, -1 )
      RETURN
*
*     End of PZLASMSUB
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzlatrs.f'
then
	echo shar: will not over-write existing file "'pzlatrs.f'"
else
cat << "SHAR_EOF" > 'pzlatrs.f'
      SUBROUTINE PZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, JA, 
     $                    DESCA, X, IX, JX, DESCX, SCALE, CNORM, INFO )
*
*  Mark R. Fahey
*  August 2000
*  This is very slow relative to PZTRSV.  This should only be used
*  when scaling is necessary to control overflow, or when it is modified
*  to scale better.
*
*     .. Scalar Arguments ..
      CHARACTER          DIAG, NORMIN, TRANS, UPLO
      INTEGER            IA, INFO, IX, JA, JX, N
      DOUBLE PRECISION   SCALE
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCX( * )
      DOUBLE PRECISION   CNORM( * )
      COMPLEX*16         A( * ), X( * )
*     ..
*
*  Purpose
*  =======
*
*  PZLATRS solves one of the triangular systems
*
*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
*
*  with scaling to prevent overflow.  Here A is an upper or lower
*  triangular matrix, A**T denotes the transpose of A, A**H denotes the
*  conjugate transpose of A, x and b are n-element vectors, and s is a
*  scaling factor, usually less than or equal to 1, chosen so that the
*  components of x will be less than the overflow threshold.  If the
*  unscaled problem will not cause overflow, the Level 2 PBLAS routine
*  PZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
*  then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension r x c.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the r processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the c processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  UPLO    (global input) CHARACTER*1
*          Specifies whether the matrix A is upper or lower triangular.
*          = 'U':  Upper triangular
*          = 'L':  Lower triangular
*
*  TRANS   (global input) CHARACTER*1
*          Specifies the operation applied to A.
*          = 'N':  Solve A * x = s*b     (No transpose)
*          = 'T':  Solve A**T * x = s*b  (Transpose)
*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
*
*  DIAG    (global input) CHARACTER*1
*          Specifies whether or not the matrix A is unit triangular.
*          = 'N':  Non-unit triangular
*          = 'U':  Unit triangular
*
*  NORMIN  (global input) CHARACTER*1
*          Specifies whether CNORM has been set or not.
*          = 'Y':  CNORM contains the column norms on entry
*          = 'N':  CNORM is not set on entry.  On exit, the norms will
*                  be computed and stored in CNORM.
*
*  N       (global input) INTEGER
*          The order of the matrix A.  N >= 0.
*
*  A       (local input) COMPLEX*16 array, dimension (DESCA(LLD_),*)
*          The triangular matrix A.  If UPLO = 'U', the leading n by n
*          upper triangular part of the array A contains the upper
*          triangular matrix, and the strictly lower triangular part of
*          A is not referenced.  If UPLO = 'L', the leading n by n lower
*          triangular part of the array A contains the lower triangular
*          matrix, and the strictly upper triangular part of A is not
*          referenced.  If DIAG = 'U', the diagonal elements of A are
*          also not referenced and are assumed to be 1.
*
*  IA      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix A to operate on.
*
*  JA      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix A to operate on.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  X       (local input/output) COMPLEX*16 array, 
*                                             dimension (DESCX(LLD_),*)
*          On entry, the right hand side b of the triangular system.
*          On exit, X is overwritten by the solution vector x.
*
*  IX      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix X.
*
*  SCALE   (global output) DOUBLE PRECISION
*          The scaling factor s for the triangular system
*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
*          If SCALE = 0, the matrix A is singular or badly scaled, and
*          the vector x is an exact or approximate solution to A*x = 0.
*
*  CNORM   (global input or global output) DOUBLE PRECISION array, 
*                                                       dimension (N)
*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
*          contains the norm of the off-diagonal part of the j-th column
*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
*          must be greater than or equal to the 1-norm.
*
*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
*          returns the 1-norm of the offdiagonal part of the j-th column
*          of A.
*
*  INFO    (global output) INTEGER
*          = 0:  successful exit
*          < 0:  if INFO = -k, the k-th argument had an illegal value
*
*  Further Details
*  ======= =======
*
*  A rough bound on x is computed; if that is less than overflow, PZTRSV
*  is called, otherwise, specific code is used which checks for possible
*  overflow or divide-by-zero at every operation.
*
*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
*  if A is lower triangular is
*
*       x[1:n] := b[1:n]
*       for j = 1, ..., n
*            x(j) := x(j) / A(j,j)
*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
*       end
*
*  Define bounds on the components of x after j iterations of the loop:
*     M(j) = bound on x[1:j]
*     G(j) = bound on x[j+1:n]
*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
*
*  Then for iteration j+1 we have
*     M(j+1) <= G(j) / | A(j+1,j+1) |
*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
*
*  where CNORM(j+1) is greater than or equal to the infinity-norm of
*  column j+1 of A, not counting the diagonal.  Hence
*
*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
*                  1<=i<=j
*  and
*
*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
*                                   1<=i< j
*
*  Since |x(j)| <= M(j), we use the Level 2 PBLAS routine PZTRSV if the
*  reciprocal of the largest M(j), j=1,..,n, is larger than
*  max(underflow, 1/overflow).
*
*  The bound on x(j) is also used to determine when a step in the
*  columnwise method can be performed without fear of overflow.  If
*  the computed bound is greater than a large constant, x is scaled to
*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
*
*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
*  A**H *x = b.  The basic algorithm for A upper triangular is
*
*       for j = 1, ..., n
*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
*       end
*
*  We simultaneously compute two bounds
*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
*       M(j) = bound on x(i), 1<=i<=j
*
*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
*  Then the bound on x(j) is
*
*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
*
*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
*                      1<=i<=j
*
*  and we can safely call PZTRSV if 1/M(n) and 1/G(n) are both greater
*  than max(underflow, 1/overflow).
*
*  =====================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION   ZERO, HALF, ONE, TWO
      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0,
     $                     TWO = 2.0D+0 )
      COMPLEX*16         CZERO, CONE
      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ), 
     $                     CONE = ( 1.0D+0, 0.0D+0 ) )
      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
     $                   MB_, NB_, RSRC_, CSRC_, LLD_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*     ..
*     .. Local Scalars ..
      LOGICAL            NOTRAN, NOUNIT, UPPER
      INTEGER            CONTXT, CSRC, I, ICOL, ICOLX, IMAX, IROW, IROWX, 
     $                   ITMP1, ITMP1X, ITMP2, ITMP2X, J, JFIRST, JINC, 
     $                   JLAST, LDA, LDX, MB, MYROW, MYCOL, NB, NPCOL,
     $                   NPROW, RSRC
      DOUBLE PRECISION   BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL,
     $                   XBND, XJ, XMAX
      COMPLEX*16         CSUMJ, TJJS, USCAL, XJTMP, ZDUM
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            IDAMAX
      DOUBLE PRECISION   PDLAMCH, PDZASUM
      COMPLEX*16         ZLADIV
      EXTERNAL           IDAMAX, LSAME, PDLAMCH, PDZASUM, ZLADIV
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DSCAL, DGSUM2D, INFOG2L, 
     $                   PDLABAD, PXERBLA, PZAMAX, PZAXPY, PZDOTC,
     $                   PZDOTU, PZDSCAL, PZLASET, PZSCAL, PZTRSV,
     $                   ZGEBS2D, ZGEBR2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1, CABS2
*     ..
*     .. Statement Function definitions ..
      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
      CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) +
     $                ABS( DIMAG( ZDUM ) / 2.D0 )
*     ..
*     .. Executable Statements ..
*
      INFO = 0
      UPPER = LSAME( UPLO, 'U' )
      NOTRAN = LSAME( TRANS, 'N' )
      NOUNIT = LSAME( DIAG, 'N' )
*
      CONTXT = DESCA( CTXT_ )
      RSRC = DESCA( RSRC_ )
      CSRC = DESCA( CSRC_ )
      MB = DESCA( MB_ )
      NB = DESCA( NB_ )
      LDA = DESCA( LLD_ )
      LDX = DESCX( LLD_ )
*
*     Test the input parameters.
*
      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
         INFO = -1
      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
     $         LSAME( TRANS, 'C' ) ) THEN
         INFO = -2
      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
         INFO = -3
      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
     $         LSAME( NORMIN, 'N' ) ) THEN
         INFO = -4
      ELSE IF( N.LT.0 ) THEN
         INFO = -5
      END IF
*
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
*
      IF( INFO.NE.0 ) THEN
         CALL PXERBLA( CONTXT, 'PZLATRS', -INFO )
         RETURN
      END IF
*
*     Quick return if possible
*
      IF( N.EQ.0 )
     $   RETURN
*
*     Determine machine dependent parameters to control overflow.
*
      SMLNUM = PDLAMCH( CONTXT, 'Safe minimum' )
      BIGNUM = ONE / SMLNUM
      CALL PDLABAD( CONTXT, SMLNUM, BIGNUM )
      SMLNUM = SMLNUM / PDLAMCH( CONTXT, 'Precision' )
      BIGNUM = ONE / SMLNUM
      SCALE = ONE

*
      IF( LSAME( NORMIN, 'N' ) ) THEN
*
*        Compute the 1-norm of each column, not including the diagonal.
*
         IF( UPPER ) THEN
*
*           A is upper triangular.
*
            CNORM( 1 ) = ZERO
            DO 10 J = 2, N
               CNORM( J ) = PDZASUM( J-1, A, IA, JA+J-1, DESCA, 1 )
   10       CONTINUE
         ELSE
*
*           A is lower triangular.
*
            DO 20 J = 1, N - 1
               CNORM( J ) = PDZASUM( N-J, A, IA+J, JA+J-1, DESCA, 1 )
   20       CONTINUE
            CNORM( N ) = ZERO
         END IF
         CALL DGSUM2D( CONTXT, 'Row', ' ', N, 1, CNORM, 1, -1, -1 )
      END IF
*
*     Scale the column norms by TSCAL if the maximum element in CNORM is
*     greater than BIGNUM/2.
*
      IMAX = IDAMAX( N, CNORM, 1 )
      TMAX = CNORM( IMAX )
      IF( TMAX.LE.BIGNUM*HALF ) THEN
         TSCAL = ONE
      ELSE
         TSCAL = HALF / ( SMLNUM*TMAX )
         CALL DSCAL( N, TSCAL, CNORM, 1 )
      END IF
*
*     Compute a bound on the computed solution vector to see if the
*     Level 2 PBLAS routine PZTRSV can be used.
*
      XMAX = ZERO
      CALL PZAMAX( N, ZDUM, IMAX, X, IX, JX, DESCX, 1 )
      XMAX = CABS2( ZDUM )
      CALL DGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, -1, -1 )
      XBND = XMAX
*
      IF( NOTRAN ) THEN
*
*        Compute the growth in A * x = b.
*
         IF( UPPER ) THEN
            JFIRST = N
            JLAST = 1
            JINC = -1
         ELSE
            JFIRST = 1
            JLAST = N
            JINC = 1
         END IF
*
         IF( TSCAL.NE.ONE ) THEN
            GROW = ZERO
            GO TO 60
         END IF
*
         IF( NOUNIT ) THEN
*
*           A is non-unit triangular.
*
*           Compute GROW = 1/G(j) and XBND = 1/M(j).
*           Initially, G(0) = max{x(i), i=1,...,n}.
*
            GROW = HALF / MAX( XBND, SMLNUM )
            XBND = GROW
            DO 40 J = JFIRST, JLAST, JINC
*
*              Exit the loop if the growth factor is too small.
*
               IF( GROW.LE.SMLNUM )
     $            GO TO 60
*
C              TJJS = A( J, J )
               CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, 
     $                      MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 )
               IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
                  TJJS = A( (ICOL-1)*LDA+IROW )
                  CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 )
               ELSE
                  CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, 
     $                          ITMP1, ITMP2 )
               END IF
               TJJ = CABS1( TJJS )
*
               IF( TJJ.GE.SMLNUM ) THEN
*
*                 M(j) = G(j-1) / abs(A(j,j))
*
                  XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
               ELSE
*
*                 M(j) could overflow, set XBND to 0.
*
                  XBND = ZERO
               END IF
*
               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
*
*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
*
                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
               ELSE
*
*                 G(j) could overflow, set GROW to 0.
*
                  GROW = ZERO
               END IF
   40       CONTINUE
            GROW = XBND
         ELSE
*
*           A is unit triangular.
*
*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
*
            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
            DO 50 J = JFIRST, JLAST, JINC
*
*              Exit the loop if the growth factor is too small.
*
               IF( GROW.LE.SMLNUM )
     $            GO TO 60
*
*              G(j) = G(j-1)*( 1 + CNORM(j) )
*
               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
   50       CONTINUE
         END IF
   60    CONTINUE
*
      ELSE
*
*        Compute the growth in A**T * x = b  or  A**H * x = b.
*
         IF( UPPER ) THEN
            JFIRST = 1
            JLAST = N
            JINC = 1
         ELSE
            JFIRST = N
            JLAST = 1
            JINC = -1
         END IF
*
         IF( TSCAL.NE.ONE ) THEN
            GROW = ZERO
            GO TO 90
         END IF
*
         IF( NOUNIT ) THEN
*
*           A is non-unit triangular.
*
*           Compute GROW = 1/G(j) and XBND = 1/M(j).
*           Initially, M(0) = max{x(i), i=1,...,n}.
*
            GROW = HALF / MAX( XBND, SMLNUM )
            XBND = GROW
            DO 70 J = JFIRST, JLAST, JINC
*
*              Exit the loop if the growth factor is too small.
*
               IF( GROW.LE.SMLNUM )
     $            GO TO 90
*
*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
*
               XJ = ONE + CNORM( J )
               GROW = MIN( GROW, XBND / XJ )
*
C              TJJS = A( J, J )
               CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, 
     $                       MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 )
               IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
                  TJJS = A( (ICOL-1)*LDA+IROW )
                  CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 )
               ELSE
                  CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, 
     $                          ITMP1, ITMP2 )
               END IF
               TJJ = CABS1( TJJS )
*
               IF( TJJ.GE.SMLNUM ) THEN
*
*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
*
                  IF( XJ.GT.TJJ )
     $               XBND = XBND*( TJJ / XJ )
               ELSE
*
*                 M(j) could overflow, set XBND to 0.
*
                  XBND = ZERO
               END IF
   70       CONTINUE
            GROW = MIN( GROW, XBND )
         ELSE
*
*           A is unit triangular.
*
*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
*
            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
            DO 80 J = JFIRST, JLAST, JINC
*
*              Exit the loop if the growth factor is too small.
*
               IF( GROW.LE.SMLNUM )
     $            GO TO 90
*
*              G(j) = ( 1 + CNORM(j) )*G(j-1)
*
               XJ = ONE + CNORM( J )
               GROW = GROW / XJ
   80       CONTINUE
         END IF
   90    CONTINUE
      END IF
*
      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
*
*        Use the Level 2 PBLAS solve if the reciprocal of the bound on
*        elements of X is not too small.
*
         CALL PZTRSV( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, 
     $                JX, DESCX, 1 )
      ELSE
*
*        Use a Level 1 PBLAS solve, scaling intermediate results.
*
         IF( XMAX.GT.BIGNUM*HALF ) THEN
*
*           Scale X so that its components are less than or equal to
*           BIGNUM in absolute value.
*
            SCALE = ( BIGNUM*HALF ) / XMAX
            CALL PZDSCAL( N, SCALE, X, IX, JX, DESCX, 1 )
            XMAX = BIGNUM
         ELSE
            XMAX = XMAX*TWO
         END IF
*
         IF( NOTRAN ) THEN
*
*           Solve A * x = b
*
            DO 120 J = JFIRST, JLAST, JINC
*
*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
*
C              XJ = CABS1( X( J ) )
               CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, 
     $                      MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X )
               IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) THEN
                  XJTMP = X( IROWX )
                  CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 )
               ELSE
                  CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, 
     $                          ITMP1X, ITMP2X )
               END IF
               XJ = CABS1( XJTMP )
               IF( NOUNIT ) THEN
C                 TJJS = A( J, J )*TSCAL
                  CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, 
     $                         MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 )
                  IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) 
     $               THEN
                     TJJS = A( (ICOL-1)*LDA+IROW )*TSCAL
                     CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1 )
                  ELSE
                     CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS, 1, 
     $                             ITMP1, ITMP2 )
                  END IF
               ELSE
                  TJJS = TSCAL
                  IF( TSCAL.EQ.ONE )
     $               GO TO 110
               END IF
               TJJ = CABS1( TJJS )
               IF( TJJ.GT.SMLNUM ) THEN
*
*                    abs(A(j,j)) > SMLNUM:
*
                  IF( TJJ.LT.ONE ) THEN
                     IF( XJ.GT.TJJ*BIGNUM ) THEN
*
*                          Scale x by 1/b(j).
*
                        REC = ONE / XJ
                        CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                        XJTMP = XJTMP*REC
                        SCALE = SCALE*REC
                        XMAX = XMAX*REC
                     END IF
                  END IF
C                 X( J ) = ZLADIV( X( J ), TJJS )
C                 XJ = CABS1( X( J ) )
                  XJTMP = ZLADIV( XJTMP, TJJS )
                  XJ = CABS1( XJTMP )
                  IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) THEN
                     X( IROWX ) = XJTMP
                  END IF
               ELSE IF( TJJ.GT.ZERO ) THEN
*
*                    0 < abs(A(j,j)) <= SMLNUM:
*
                  IF( XJ.GT.TJJ*BIGNUM ) THEN
*
*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
*                       to avoid overflow when dividing by A(j,j).
*
                     REC = ( TJJ*BIGNUM ) / XJ
                     IF( CNORM( J ).GT.ONE ) THEN
*
*                          Scale by 1/CNORM(j) to avoid overflow when
*                          multiplying x(j) times column j.
*
                        REC = REC / CNORM( J )
                     END IF
                     CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                     XJTMP = XJTMP*REC
                     SCALE = SCALE*REC
                     XMAX = XMAX*REC
                  END IF
C                 X( J ) = ZLADIV( X( J ), TJJS )
C                 XJ = CABS1( X( J ) )
                  XJTMP = ZLADIV( XJTMP, TJJS )
                  XJ = CABS1( XJTMP )
                  IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) THEN
                     X( IROWX ) = XJTMP
                  END IF
               ELSE
*
*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
*                    scale = 0, and compute a solution to A*x = 0.
*
                  CALL PZLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, 
     $                          DESCX )
                  IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) THEN
                     X( IROWX ) = CONE
                  ENDIF
                  XJTMP = CONE
                  XJ = ONE
                  SCALE = ZERO
                  XMAX = ZERO
               END IF
  110          CONTINUE
*
*              Scale x if necessary to avoid overflow when adding a
*              multiple of column j of A.
*
               IF( XJ.GT.ONE ) THEN
                  REC = ONE / XJ
                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
*
*                    Scale x by 1/(2*abs(x(j))).
*
                     REC = REC*HALF
                     CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                     XJTMP = XJTMP*REC
                     SCALE = SCALE*REC
                  END IF
               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
*
*                 Scale x by 1/2.
*
                  CALL PZDSCAL( N, HALF, X, IX, JX, DESCX, 1 )
                  XJTMP = XJTMP*HALF
                  SCALE = SCALE*HALF
               END IF
*
               IF( UPPER ) THEN
                  IF( J.GT.1 ) THEN
*
*                    Compute the update
*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
*
                     ZDUM = -XJTMP*TSCAL
                     CALL PZAXPY( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1, X,
     $                            IX, JX, DESCX, 1 )
                     CALL PZAMAX( J-1, ZDUM, IMAX, X, IX, JX, DESCX, 1 )
                     XMAX = CABS1( ZDUM )
                     CALL DGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, 
     $                             -1, -1 )
                  END IF
               ELSE
                  IF( J.LT.N ) THEN
*
*                    Compute the update
*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
*
                     ZDUM = -XJTMP*TSCAL
                     CALL PZAXPY( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1,
     $                            X, IX+J, JX, DESCX, 1 )
                     CALL PZAMAX( N-J, ZDUM, I, X, IX+J, JX, DESCX, 1 )
                     XMAX = CABS1( ZDUM )
                     CALL DGSUM2D( CONTXT, 'Row', ' ', 1, 1, XMAX, 1, 
     $                             -1, -1 )
                  END IF
               END IF
  120       CONTINUE
*
         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
*
*           Solve A**T * x = b
*
            DO 170 J = JFIRST, JLAST, JINC
*
*              Compute x(j) = b(j) - sum A(k,j)*x(k).
*                                    k<>j
*
C              XJ = CABS1( X( J ) )
               CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, 
     $                       MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X )
               IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) THEN
                  XJTMP = X( IROWX )
                  CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 )
               ELSE
                  CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, 
     $                          ITMP1X, ITMP2X )
               END IF
               XJ = CABS1( XJTMP )
               USCAL = DCMPLX( TSCAL )
               REC = ONE / MAX( XMAX, ONE )
               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
*
*                 If x(j) could overflow, scale x by 1/(2*XMAX).
*
                  REC = REC*HALF
                  IF( NOUNIT ) THEN
C                    TJJS = A( J, J )*TSCAL
                     CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, 
     $                         MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 )
                     IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) 
     $                  THEN
                        TJJS = A( (ICOL-1)*LDA+IROW )*TSCAL
                        CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1,TJJS,1)
                     ELSE
                        CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1,TJJS,1, 
     $                                ITMP1, ITMP2 )
                     END IF
                  ELSE
                     TJJS = TSCAL
                  END IF
                  TJJ = CABS1( TJJS )
                  IF( TJJ.GT.ONE ) THEN
*
*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
*
                     REC = MIN( ONE, REC*TJJ )
                     USCAL = ZLADIV( USCAL, TJJS )
                  END IF
                  IF( REC.LT.ONE ) THEN
                     CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                     XJTMP = XJTMP*REC
                     SCALE = SCALE*REC
                     XMAX = XMAX*REC
                  END IF
               END IF
*
               CSUMJ = CZERO
               IF( USCAL.EQ.CONE ) THEN
*
*                 If the scaling needed for A in the dot product is 1,
*                 call PZDOTU to perform the dot product.
*
                  IF( UPPER ) THEN
                     CALL PZDOTU( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1,
     $                            X, IX, JX, DESCX, 1 )
                  ELSE IF( J.LT.N ) THEN
                     CALL PZDOTU( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1,
     $                            X, IX+J, JX, DESCX, 1 )
                  END IF
                  IF( MYCOL.EQ.ITMP2X ) THEN
                     CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 )
                  ELSE
                     CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, 
     $                             MYROW, ITMP2X )
                  END IF
               ELSE
*
*                 Otherwise, scale column of A by USCAL before dot product.
*                 Below is not the best way to do it.
*
                  IF( UPPER ) THEN
C                    DO 130 I = 1, J - 1
C                       CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I )
C 130                CONTINUE
                     ZDUM = DCONJG( USCAL )
                     CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 )
                     CALL PZDOTU( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1,
     $                            X, IX, JX, DESCX, 1 )
                     ZDUM = ZLADIV( ZDUM, USCAL )
                     CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 )
                  ELSE IF( J.LT.N ) THEN
C                    DO 140 I = J + 1, N
C                       CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I )
C  140               CONTINUE
                     ZDUM = DCONJG( USCAL )
                     CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1)
                     CALL PZDOTU( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1,
     $                            X, IX+J, JX, DESCX, 1 )
                     ZDUM = ZLADIV( ZDUM, USCAL )
                     CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 )
                  END IF
                  IF( MYCOL.EQ.ITMP2X ) THEN
                     CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 )
                  ELSE
                     CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, 
     $                             MYROW, ITMP2X )
                  END IF
               END IF
*
               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
*
*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
*                 was not used to scale the dotproduct.
*
C                 X( J ) = X( J ) - CSUMJ
C                 XJ = CABS1( X( J ) )
                  XJTMP = XJTMP - CSUMJ
                  XJ = CABS1( XJTMP )
C                  IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) )
C     $               X( IROWX ) = XJTMP
                  IF( NOUNIT ) THEN
C                    TJJS = A( J, J )*TSCAL
                     CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, 
     $                         MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 )
                     IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) 
     $                  THEN
                        TJJS = A( (ICOL-1)*LDA+IROW )*TSCAL
                        CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1,TJJS, 1)
                     ELSE
                        CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1,TJJS, 1, 
     $                                ITMP1, ITMP2 )
                     END IF
                  ELSE
                     TJJS = TSCAL
                     IF( TSCAL.EQ.ONE )
     $                  GO TO 160
                  END IF
*
*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
*
                  TJJ = CABS1( TJJS )
                  IF( TJJ.GT.SMLNUM ) THEN
*
*                       abs(A(j,j)) > SMLNUM:
*
                     IF( TJJ.LT.ONE ) THEN
                        IF( XJ.GT.TJJ*BIGNUM ) THEN
*
*                             Scale X by 1/abs(x(j)).
*
                           REC = ONE / XJ
                           CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                           XJTMP = XJTMP*REC
                           SCALE = SCALE*REC
                           XMAX = XMAX*REC
                        END IF
                     END IF
C                    X( J ) = ZLADIV( X( J ), TJJS )
                     XJTMP = ZLADIV( XJTMP, TJJS )
                     IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) 
     $                  THEN
                        X( IROWX ) = XJTMP
                     END IF
                  ELSE IF( TJJ.GT.ZERO ) THEN
*
*                       0 < abs(A(j,j)) <= SMLNUM:
*
                     IF( XJ.GT.TJJ*BIGNUM ) THEN
*
*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
*
                        REC = ( TJJ*BIGNUM ) / XJ
                        CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                        XJTMP = XJTMP*REC
                        SCALE = SCALE*REC
                        XMAX = XMAX*REC
                     END IF
C                    X( J ) = ZLADIV( X( J ), TJJS )
                     XJTMP = ZLADIV( XJTMP, TJJS )
                     IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) 
     $                  THEN
                        X( IROWX ) = XJTMP
                     END IF
                  ELSE
*
*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
*                       scale = 0 and compute a solution to A**T *x = 0.
*
                     CALL PZLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, 
     $                             DESCX )
                     IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) 
     $                  THEN
                        X( IROWX ) = CONE
                     ENDIF
                     XJTMP = CONE
                     SCALE = ZERO
                     XMAX = ZERO
                  END IF
  160             CONTINUE
               ELSE
*
*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
*                 product has already been divided by 1/A(j,j).
*
C                 X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
                  XJTMP = ZLADIV( XJTMP, TJJS ) - CSUMJ
                  IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) 
     $                  THEN
                     X( IROWX ) = XJTMP
                  END IF
               END IF
               XMAX = MAX( XMAX, CABS1( XJTMP ) )
  170       CONTINUE
*
         ELSE
*
*           Solve A**H * x = b
*
            DO 220 J = JFIRST, JLAST, JINC
*
*              Compute x(j) = b(j) - sum A(k,j)*x(k).
*                                    k<>j
*
               CALL INFOG2L( IX+J-1, JX, DESCX, NPROW, NPCOL, MYROW, 
     $                       MYCOL, IROWX, ICOLX, ITMP1X, ITMP2X )
               IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) THEN
                  XJTMP = X( IROWX )
                  CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1 )
               ELSE
                  CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, XJTMP, 1, 
     $                          ITMP1X, ITMP2X )
               END IF
               XJ = CABS1( XJTMP )
               USCAL = TSCAL
               REC = ONE / MAX( XMAX, ONE )
               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
*
*                 If x(j) could overflow, scale x by 1/(2*XMAX).
*
                  REC = REC*HALF
                  IF( NOUNIT ) THEN
C                    TJJS = DCONJG( A( J, J ) )*TSCAL
                     CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, 
     $                         MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 )
                     IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) 
     $                  THEN
                        TJJS = DCONJG( A( (ICOL-1)*LDA+IROW ) )*TSCAL
                        CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS,1)
                     ELSE
                        CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS,1, 
     $                                ITMP1, ITMP2 )
                     END IF
                  ELSE
                     TJJS = TSCAL
                  END IF
                  TJJ = CABS1( TJJS )
                  IF( TJJ.GT.ONE ) THEN
*
*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
*
                     REC = MIN( ONE, REC*TJJ )
                     USCAL = ZLADIV( USCAL, TJJS )
                  END IF
                  IF( REC.LT.ONE ) THEN
                     CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                     XJTMP = XJTMP*REC
                     SCALE = SCALE*REC
                     XMAX = XMAX*REC
                  END IF
               END IF
*
               CSUMJ = CZERO
               IF( USCAL.EQ.CONE ) THEN
*
*                 If the scaling needed for A in the dot product is 1,
*                 call PZDOTC to perform the dot product.
*
                  IF( UPPER ) THEN
                     CALL PZDOTC( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1,
     $                            X, IX, JX, DESCX, 1 )
                  ELSE IF( J.LT.N ) THEN
                     CALL PZDOTC( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1,
     $                            X, IX+J, JX, DESCX, 1 )
                  END IF
                  IF( MYCOL.EQ.ITMP2X ) THEN
                     CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 )
                  ELSE
                     CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, 
     $                             MYROW, ITMP2X )
                  END IF
               ELSE
*
*                 Otherwise, scale column of A by USCAL before dot product.
*                 Below is not the best way to do it.
*
                  IF( UPPER ) THEN
C                    DO 180 I = 1, J - 1
C                       CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )*
C    $                          X( I )
C 180                CONTINUE
                     ZDUM = DCONJG( USCAL )
                     CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 )
                     CALL PZDOTC( J-1, CSUMJ, A, IA, JA+J-1, DESCA, 1,
     $                            X, IX, JX, DESCX, 1 )
                     ZDUM = ZLADIV( CONE, ZDUM )
                     CALL PZSCAL( J-1, ZDUM, A, IA, JA+J-1, DESCA, 1 )
                  ELSE IF( J.LT.N ) THEN
C                    DO 190 I = J + 1, N
C                       CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )*
C    $                          X( I )
C 190                CONTINUE
                     ZDUM = DCONJG( USCAL )
                     CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1)
                     CALL PZDOTC( N-J, CSUMJ, A, IA+J, JA+J-1, DESCA, 1,
     $                            X, IX+J, JX, DESCX, 1 )
                     ZDUM = ZLADIV( CONE, ZDUM )
                     CALL PZSCAL( N-J, ZDUM, A, IA+J, JA+J-1, DESCA, 1 )
                  END IF
                  IF( MYCOL.EQ.ITMP2X ) THEN
                     CALL ZGEBS2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1 )
                  ELSE
                     CALL ZGEBR2D( CONTXT, 'Row', ' ', 1, 1, CSUMJ, 1, 
     $                             MYROW, ITMP2X )
                  END IF
               END IF
*
               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
*
*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
*                 was not used to scale the dotproduct.
*
C                 X( J ) = X( J ) - CSUMJ
C                 XJ = CABS1( X( J ) )
                  XJTMP = XJTMP - CSUMJ
                  XJ = CABS1( XJTMP )
C                  IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) )
C     $               X( IROWX ) = XJTMP
                  IF( NOUNIT ) THEN
C                    TJJS = DCONJG( A( J, J ) )*TSCAL
                     CALL INFOG2L( IA+J-1, JA+J-1, DESCA, NPROW, NPCOL, 
     $                         MYROW, MYCOL, IROW, ICOL, ITMP1, ITMP2 )
                     IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) 
     $                  THEN
                        TJJS = DCONJG( A( (ICOL-1)*LDA+IROW ) )*TSCAL
                        CALL ZGEBS2D( CONTXT, 'All', ' ', 1, 1, TJJS,1)
                     ELSE
                        CALL ZGEBR2D( CONTXT, 'All', ' ', 1, 1, TJJS,1, 
     $                                ITMP1, ITMP2 )
                     END IF
                  ELSE
                     TJJS = TSCAL
                     IF( TSCAL.EQ.ONE )
     $                  GO TO 210
                  END IF
*
*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
*
                  TJJ = CABS1( TJJS )
                  IF( TJJ.GT.SMLNUM ) THEN
*
*                       abs(A(j,j)) > SMLNUM:
*
                     IF( TJJ.LT.ONE ) THEN
                        IF( XJ.GT.TJJ*BIGNUM ) THEN
*
*                             Scale X by 1/abs(x(j)).
*
                           REC = ONE / XJ
                           CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                           XJTMP = XJTMP*REC
                           SCALE = SCALE*REC
                           XMAX = XMAX*REC
                        END IF
                     END IF
C                    X( J ) = ZLADIV( X( J ), TJJS )
                     XJTMP = ZLADIV( XJTMP, TJJS )
                     IF(( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X )) 
     $                  X( IROWX ) = XJTMP
                  ELSE IF( TJJ.GT.ZERO ) THEN
*
*                       0 < abs(A(j,j)) <= SMLNUM:
*
                     IF( XJ.GT.TJJ*BIGNUM ) THEN
*
*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
*
                        REC = ( TJJ*BIGNUM ) / XJ
                        CALL PZDSCAL( N, REC, X, IX, JX, DESCX, 1 )
                        XJTMP = XJTMP*REC
                        SCALE = SCALE*REC
                        XMAX = XMAX*REC
                     END IF
C                    X( J ) = ZLADIV( X( J ), TJJS )
                     XJTMP = ZLADIV( XJTMP, TJJS )
                     IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) 
     $                  X( IROWX ) = XJTMP
                  ELSE
*
*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
*                       scale = 0 and compute a solution to A**H *x = 0.
*
                     CALL PZLASET( ' ', N, 1, CZERO, CZERO, X, IX, JX, 
     $                             DESCX )
                     IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) ) 
     $                  X( IROWX ) = CONE
                     XJTMP = CONE
                     SCALE = ZERO
                     XMAX = ZERO
                  END IF
  210             CONTINUE
               ELSE
*
*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
*                 product has already been divided by 1/A(j,j).
*
C                 X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
                  XJTMP = ZLADIV( XJTMP, TJJS ) - CSUMJ
                  IF( ( MYROW.EQ.ITMP1X ) .AND. ( MYCOL.EQ.ITMP2X ) )
     $               X( IROWX ) = XJTMP
               END IF
               XMAX = MAX( XMAX, CABS1( XJTMP ) )
  220       CONTINUE
         END IF
         SCALE = SCALE / TSCAL
      END IF
*
*     Scale the column norms by 1/TSCAL for return.
*
      IF( TSCAL.NE.ONE ) THEN
         CALL DSCAL( N, ONE / TSCAL, CNORM, 1 )
      END IF
*
      RETURN
*
*     End of PZLATRS
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzlawil.f'
then
	echo shar: will not over-write existing file "'pzlawil.f'"
else
cat << "SHAR_EOF" > 'pzlawil.f'
      SUBROUTINE PZLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V )
*
*  Mark R. Fahey
*  May 28, 1999
*
*     .. Scalar Arguments ..
      INTEGER            II, JJ, M
      COMPLEX*16         H33, H43H34, H44
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * )
      COMPLEX*16         A( * ), V( * )
*     ..
*
*  Purpose
*  =======
*
*  PZLAWIL gets the transform given by H44,H33, & H43H34 into V
*     starting at row M.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  II      (global input) INTEGER
*          Row owner of H(M+2,M+2)
*
*  JJ      (global input) INTEGER
*          Column owner of H(M+2,M+2)
*
*  M       (global input) INTEGER
*          On entry, this is where the transform starts (row M.)
*          Unchanged on exit.
*
*  A       (global input) COMPLEX*16 array, dimension
*          (DESCA(LLD_),*)
*          On entry, the Hessenberg matrix.
*          Unchanged on exit.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*          Unchanged on exit.
*
*  H44
*  H33
*  H43H34  (global input) COMPLEX*16
*          These three values are for the double shift QR iteration.
*          Unchanged on exit.
*
*  V       (global output) COMPLEX*16 array of size 3.
*          Contains the transform on ouput.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*     ..
*     .. Local Scalars ..
      INTEGER            CONTXT, DOWN, HBL, ICOL, IROW, JSRC, LDA, LEFT,
     $                   MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM, RIGHT,
     $                   RSRC, UP
      DOUBLE PRECISION   S 
      COMPLEX*16         CDUM, H11, H12, H21, H22, H33S, H44S, V1, V2, 
     $                   V3
*     ..
*     .. Local Arrays ..
      COMPLEX*16         BUF( 4 )
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, INFOG2L, ZGERV2D, ZGESD2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, DIMAG, MOD
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
      HBL = DESCA( MB_ )
      CONTXT = DESCA( CTXT_ )
      LDA = DESCA( LLD_ )
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
      LEFT = MOD( MYCOL+NPCOL-1, NPCOL )
      RIGHT = MOD( MYCOL+1, NPCOL )
      UP = MOD( MYROW+NPROW-1, NPROW )
      DOWN = MOD( MYROW+1, NPROW )
      NUM = NPROW*NPCOL
*
*     On node (II,JJ) collect all DIA,SUP,SUB info from M, M+1
*
      MODKM1 = MOD( M+1, HBL )
      IF( MODKM1.EQ.0 ) THEN
         IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND.
     $       ( NPCOL.GT.1 ) ) THEN
            CALL INFOG2L( M+2, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, RSRC, JSRC )
            BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW )
            CALL ZGESD2D( CONTXT, 1, 1, BUF, 1, II, JJ )
         END IF
         IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) )
     $        THEN
            CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW,
     $                    ICOL, RSRC, JSRC )
            BUF( 1 ) = A( ( ICOL-1 )*LDA+IROW )
            BUF( 2 ) = A( ( ICOL-1 )*LDA+IROW+1 )
            BUF( 3 ) = A( ICOL*LDA+IROW )
            BUF( 4 ) = A( ICOL*LDA+IROW+1 )
            CALL ZGESD2D( CONTXT, 4, 1, BUF, 4, II, JJ )
         END IF
         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
            CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, RSRC, JSRC )
            IF( NPCOL.GT.1 ) THEN
               CALL ZGERV2D( CONTXT, 1, 1, V3, 1, MYROW, LEFT )
            ELSE
               V3 = A( ( ICOL-2 )*LDA+IROW )
            END IF
            IF( NUM.GT.1 ) THEN
               CALL ZGERV2D( CONTXT, 4, 1, BUF, 4, UP, LEFT )
               H11 = BUF( 1 )
               H21 = BUF( 2 )
               H12 = BUF( 3 )
               H22 = BUF( 4 )
            ELSE
               H11 = A( ( ICOL-3 )*LDA+IROW-2 )
               H21 = A( ( ICOL-3 )*LDA+IROW-1 )
               H12 = A( ( ICOL-2 )*LDA+IROW-2 )
               H22 = A( ( ICOL-2 )*LDA+IROW-1 )
            END IF
         END IF
      END IF
      IF( MODKM1.EQ.1 ) THEN
         IF( ( DOWN.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND. ( NUM.GT.1 ) )
     $        THEN
            CALL INFOG2L( M, M, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW,
     $                    ICOL, RSRC, JSRC )
            CALL ZGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II,
     $                    JJ )
         END IF
         IF( ( DOWN.EQ.II ) .AND. ( MYCOL.EQ.JJ ) .AND. ( NPROW.GT.1 ) )
     $        THEN
            CALL INFOG2L( M, M+1, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, RSRC, JSRC )
            CALL ZGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II,
     $                    JJ )
         END IF
         IF( ( MYROW.EQ.II ) .AND. ( RIGHT.EQ.JJ ) .AND.
     $       ( NPCOL.GT.1 ) ) THEN
            CALL INFOG2L( M+1, M, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, RSRC, JSRC )
            CALL ZGESD2D( CONTXT, 1, 1, A( ( ICOL-1 )*LDA+IROW ), 1, II,
     $                    JJ )
         END IF
         IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
            CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, RSRC, JSRC )
            IF( NUM.GT.1 ) THEN
               CALL ZGERV2D( CONTXT, 1, 1, H11, 1, UP, LEFT )
            ELSE
               H11 = A( ( ICOL-3 )*LDA+IROW-2 )
            END IF
            IF( NPROW.GT.1 ) THEN
               CALL ZGERV2D( CONTXT, 1, 1, H12, 1, UP, MYCOL )
            ELSE
               H12 = A( ( ICOL-2 )*LDA+IROW-2 )
            END IF
            IF( NPCOL.GT.1 ) THEN
               CALL ZGERV2D( CONTXT, 1, 1, H21, 1, MYROW, LEFT )
            ELSE
               H21 = A( ( ICOL-3 )*LDA+IROW-1 )
            END IF
            H22 = A( ( ICOL-2 )*LDA+IROW-1 )
            V3 = A( ( ICOL-2 )*LDA+IROW )
         END IF
      END IF
      IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) )
     $   RETURN
*
      IF( MODKM1.GT.1 ) THEN
         CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL,
     $                 IROW, ICOL, RSRC, JSRC )
         H11 = A( ( ICOL-3 )*LDA+IROW-2 )
         H21 = A( ( ICOL-3 )*LDA+IROW-1 )
         H12 = A( ( ICOL-2 )*LDA+IROW-2 )
         H22 = A( ( ICOL-2 )*LDA+IROW-1 )
         V3 = A( ( ICOL-2 )*LDA+IROW )
      END IF
*
      H44S = H44 - H11
      H33S = H33 - H11
      V1 = ( H33S*H44S-H43H34 ) / H21 + H12
      V2 = H22 - H11 - H33S - H44S
      S = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 )
      V1 = V1 / S
      V2 = V2 / S
      V3 = V3 / S
      V( 1 ) = V1
      V( 2 ) = V2
      V( 3 ) = V3
*
      RETURN
*
*     End of PZLAWIL
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzrot.c'
then
	echo shar: will not over-write existing file "'pzrot.c'"
else
cat << "SHAR_EOF" > 'pzrot.c'
/* ---------------------------------------------------------------------
*
*  -- Mark R. Fahey
*     June 28, 2000
*
*  ---------------------------------------------------------------------
*/
/*
*  Include files
*/
#include "pblas.h"

void pzrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s )
/*
*  Mark Fahey
*  June 22, 2000
*/
/*
*  .. Scalar Arguments ..
*/
   int         * incx, * incy, * ix, * iy, * jx, * jy, * n;
   double      * c;
   complex16   * s;
/*
*  ..
*  .. Array Arguments ..
*/
   int         desc_X[], desc_Y[];
   complex16   X[], Y[];
{
/*
*  Purpose
*  =======
*
*  PZROT applies a plane rotation, where the cos (C) is real and the
*  sin (S) is complex, and the vectors CX and CY are complex, i.e.,
*
*     [ sub( X ) ] := [  C          S  ] [ sub( X ) ]
*     [ sub( Y ) ] := [ -conjg(S)   C  ] [ sub( Y ) ]
*
*  where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
*                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
*
*        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
*                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y,
*
*  and where C*C + S*CONJG(S) = 1.0.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
*                                 DT_A = 1.
*  CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) descA[ M_ ]    The number of rows in the global
*                                 array A.
*  N_A    (global) descA[ N_ ]    The number of columns in the global
*                                 array A.
*  MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
*                                 te the rows of the array.
*  NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
*                                 te the columns of the array.
*  RSRC_A (global) descA[ RSRC_ ] The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) descA[ CSRC_ ] The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Because vectors may be seen as particular matrices, a distributed
*  vector is considered to be a distributed matrix.
*
*  If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
*  process column having the first entries of sub( Y ) must also contain
*  the first entries of sub( X ). Moreover, the quantity
*  MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
*
*  If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
*  Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
*  MOD( IY-1, MB_Y ).
*
*  If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
*  Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
*  MOD( JY-1, NB_Y ).
*
*  If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
*  equal to MB_Y, and the process row having the first entries of
*  sub( Y ) must also contain the first entries of sub( X ). Moreover,
*  the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
*
*  Arguments
*  =========
*
*  N       (input) INTEGER
*          The number of elements in the vectors CX and CY.
*
*  X       (local input) COMPLEX array containing the local
*          pieces of a distributed matrix of dimension of at least
*              ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
*          This array contains the entries of the distributed vector
*          sub( X ).
*          On output, CX is overwritten with C*X + S*Y.
*
*  IX      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix X.
*
*  INCX    (global input) pointer to INTEGER
*          The global increment for the elements of X. Only two values
*          of INCX are supported in this version, namely 1 and M_X.
*
*  Y       (local input) COMPLEX array containing the local
*          pieces of a distributed matrix of dimension of at least
*              ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
*          This array contains the entries of the distributed vector
*          sub( Y ).
*          On output, CY is overwritten with -CONJG(S)*X + C*Y.
*
*  IY      (global input) pointer to INTEGER
*          The global row index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  JY      (global input) pointer to INTEGER
*          The global column index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  DESCY   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix Y.
*
*  INCY    (global input) pointer to INTEGER
*          The global increment for the elements of Y. Only two values
*          of INCY are supported in this version, namely 1 and M_Y.
*
*  C       (input) pointer to DOUBLE 
*  S       (input) pointer COMPLEX
*          C and S define a rotation
*             [  C          S  ]
*             [ -conjg(S)   C  ]
*          where C*C + S*CONJG(S) = 1.0.
*
* =====================================================================
*
*  .. Local Scalars ..
*/
   int         ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
               jjy, lcm, lcmp, mycol, myrow, nn, np, np0,
               nprow, npcol, nq, nz, ione=1, tmp1, wksz;
   complex16   xwork[1], ywork[1], zero;
/* ..
*  .. PBLAS Buffer ..
*/
   complex16   * buff;
/* ..
*  .. External Functions ..
*/
   void        blacs_gridinfo_();
   void        zgerv2d_();
   void        zgesd2d_();
   void        pbchkvect();
   void        pbchkmat();
   void        pberror_();
   char        * getpbbuf();
   F_INTG_FCT  pbztrnv_();
   F_INTG_FCT  zrot_();
   F_INTG_FCT  ilcm_();
/* ..
*  .. Executable Statements ..
*
*  Get grid parameters
*/
   ictxt = desc_X[CTXT_];
   blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
/*
*  Test the input parameters
*/
   info = 0;
   if( nprow == -1 )
      info = -(500+CTXT_+1);
   else
   {
      pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx,
                 &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
      pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy,
                 &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );

      if( info == 0 )
      {
         if( *n != 1 )
         {
            if( *incx == desc_X[M_] )
            {                 /* X is distributed along a process row */
               if( *incy == desc_Y[M_] )
               {               /* Y is distributed over a process row */
                  if( ( ixcol != iycol ) ||
                      ( ( (*jx-1) % desc_X[NB_] ) !=
                        ( (*jy-1) % desc_Y[NB_] ) ) )
                     info = -9;
                  else if( desc_Y[NB_] != desc_X[NB_] )
                     info = -(1000+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
                     info = -8;
                  else if( desc_Y[MB_] != desc_X[NB_] )
                     info = -(1000+MB_+1);
               }
               else
               {
                  info = -11;
               }
            }
            else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
            {              /* X is distributed along a process column */
               if( *incy == desc_Y[M_] )
               {                  /* Y is distributed over a process row */
                  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
                     info = -9;
                  else if( desc_Y[NB_] != desc_X[MB_] )
                     info = -(1000+NB_+1);
               }
               else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
               {            /* Y is distributed over a process column */
                  if( ( ixrow != iyrow ) ||
                      ( ( (*ix-1) % desc_X[MB_] ) !=
                        ( (*iy-1) % desc_Y[MB_] ) ) )
                     info = -8;
                  else if( desc_Y[MB_] != desc_X[MB_] )
                     info = -(1000+MB_+1);
               }
               else
               {
                  info = -11;
               }
            }
            else
            {
               info = -6;
            }
         }
         if( ictxt != desc_Y[CTXT_] )
            info = -(1000+CTXT_+1);
      }
   }
   if( info )
   {
      pberror_( &ictxt, "PZROT", &info );
      return;
   }
/*
*  Quick return if possible.
*/
   zero.re  = ZERO;
   zero.im  = ZERO;
   if( *n == 0 ) return;
/*
*  rotation
*/
   if( *n == 1 )
   {
      if( ( myrow == ixrow ) && ( mycol == ixcol ) )
      {
         buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
         if( ( myrow != iyrow ) || ( mycol != iycol ) )
         {
            zgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol );
            zgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol );
         }
         else
            *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
         zrot_( n, buff, n, ywork, n, c, s );
         X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff;
         if( ( myrow == iyrow ) && ( mycol == iycol ) )
            Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork;
      }
      else if( ( myrow == iyrow ) && ( mycol == iycol ) )
      {
         zgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n,
                   &ixrow, &ixcol );
         zgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol );
         zrot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s );
      }
      return;
   }
 
   if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
   {               /* X and Y are both distributed over a process row */
      nz = (*jx-1) % desc_Y[NB_];
      nn = *n + nz;
      nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
      if( mycol == ixcol )
         nq -= nz;
      if( ixrow == iyrow )
      {
         if( myrow == ixrow )
         {
            zrot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                        &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
         }
      }
      else
      {
         if( myrow == ixrow )
         {
            zgesd2d_( &ictxt, &ione, &nq,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &iyrow, &mycol );
            buff = (complex16 *)getpbbuf( "PZROT", nq*sizeof(complex16) );
            zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &iyrow, &mycol );
            zrot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                        buff, &ione, c, s );
         }
         else if( myrow == iyrow )
         {
            zgesd2d_( &ictxt, &ione, &nq,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &ixrow, &mycol );
            buff = (complex16 *)getpbbuf( "PZROT", nq*sizeof(complex16) );
            zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow, &mycol );
            zrot_( &nq, buff, &ione,
                        &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
         }
      }
   }
   else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
            ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
   {            /* X and Y are both distributed over a process column */
      nz = (*ix-1) % desc_X[MB_];
      nn = *n + nz;
      np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
      if( myrow == ixrow )
         np -= nz;
      if( ixcol == iycol )
      {
         if( mycol == ixcol )
         {
            zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                        &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
         }
      }
      else
      {
         if( mycol == ixcol )
         {
            zgesd2d_( &ictxt, &np, &ione,
                      &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
                      &myrow, &iycol );
            buff = (complex16 *)getpbbuf( "PZROT", np*sizeof(complex16) );
            zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow, &iycol );
            zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
                        buff, &ione, c, s );
         }
         else if( mycol == iycol )
         {
            zgesd2d_( &ictxt, &np, &ione,
                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
                      &myrow, &ixcol );
            buff = (complex16 *)getpbbuf( "PZROT", np*sizeof(complex16) );
            zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow, &ixcol );
            zrot_( &np, buff, &ione,
                        &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
         }
      }
   }
   else       /* X and Y are not distributed along the same direction */
   {
      lcm = ilcm_( &nprow, &npcol );
      if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
      {                     /* X is distributed over a process column */
         lcmp = lcm / nprow;
         nz = (*jy-1) % desc_Y[NB_];
         nn = *n + nz;
         tmp1 = nn / desc_Y[MB_];
         np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
         tmp1 = np0 / desc_X[MB_];
         wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp );
         wksz = np + wksz;
 
         buff = (complex16 *)getpbbuf( "PZROT", wksz*sizeof(complex16) );
 
         if( mycol == iycol )
            jjy -= nz;
         if( myrow == ixrow )
            np -= nz;
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
                   &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol,
                   &ixrow, &ixcol, buff+np );
         if( mycol == ixcol )
         {
            zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                     incx, buff, &ione, c, s );
         }
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_Y[NB_], &nz, buff, &ione, &zero, 
                   &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], 
                   &ixrow, &ixcol,  &iyrow, &iycol, buff+np );
      }
      else                  /* Y is distributed over a process column */
      {
         lcmp = lcm / nprow;
         nz = (*jx-1) % desc_X[NB_];
         nn = *n + nz;
         tmp1 = nn / desc_X[MB_];
         np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow );
         np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
         tmp1 = np0 / desc_Y[MB_];
         wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
         wksz = np + wksz;
 
         buff = (complex16 *)getpbbuf( "PZROT", wksz*sizeof(complex16) );
 
         if( myrow == iyrow )
            np -= nz;
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
                   &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
                   &iyrow, &iycol, buff+np );
         if( mycol == iycol )
         {
            zrot_( &np, buff, &ione,
                     &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
         }
         pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
                   &desc_X[NB_], &nz, buff, &ione, &zero,
                   &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], 
                   &iyrow, &iycol, &ixrow, &ixcol, buff+np );
      }
   }
}
SHAR_EOF
fi # end of overwriting check
if test -f 'pztrevc.f'
then
	echo shar: will not over-write existing file "'pztrevc.f'"
else
cat << "SHAR_EOF" > 'pztrevc.f'
      SUBROUTINE PZTREVC( SIDE, HOWMNY, SELECT, N, T, DESCT, VL, DESCVL, 
     $                    VR, DESCVR, MM, M, WORK, RWORK, INFO )
*
*  Mark R. Fahey
*  June 20, 2000
*
*     .. Scalar Arguments ..
      CHARACTER          HOWMNY, SIDE
      INTEGER            INFO, M, MM, N
*     ..
*     .. Array Arguments ..
      LOGICAL            SELECT( * )
      INTEGER            DESCT( * ), DESCVL( * ), DESCVR( * )
      DOUBLE PRECISION   RWORK( * )
      COMPLEX*16         T( * ), VL( * ), VR( * ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  PZTREVC computes some or all of the right and/or left eigenvectors of
*  a complex upper triangular matrix T in parallel.
*
*  The right eigenvector x and the left eigenvector y of T corresponding
*  to an eigenvalue w are defined by:
*
*               T*x = w*x,     y'*T = w*y'
*
*  where y' denotes the conjugate transpose of the vector y.
*
*  If all eigenvectors are requested, the routine may either return the
*  matrices X and/or Y of right or left eigenvectors of T, or the
*  products Q*X and/or Q*Y, where Q is an input unitary
*  matrix. If T was obtained from the Schur factorization of an
*  original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
*  right or left eigenvectors of A.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension r x c.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the r processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the c processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  SIDE    (global input) CHARACTER*1
*          = 'R':  compute right eigenvectors only;
*          = 'L':  compute left eigenvectors only;
*          = 'B':  compute both right and left eigenvectors.
*
*  HOWMNY  (global input) CHARACTER*1
*          = 'A':  compute all right and/or left eigenvectors;
*          = 'B':  compute all right and/or left eigenvectors,
*                  and backtransform them using the input matrices
*                  supplied in VR and/or VL;
*          = 'S':  compute selected right and/or left eigenvectors,
*                  specified by the logical array SELECT.
*
*  SELECT  (global input) LOGICAL array, dimension (N)
*          If HOWMNY = 'S', SELECT specifies the eigenvectors to be
*          computed.
*          If HOWMNY = 'A' or 'B', SELECT is not referenced.
*          To select the eigenvector corresponding to the j-th
*          eigenvalue, SELECT(j) must be set to .TRUE..
*
*  N       (global input) INTEGER
*          The order of the matrix T. N >= 0.
*
*  T       (global input/output) COMPLEX*16 array, dimension 
*          (DESCT(LLD_),*)
*          The upper triangular matrix T.  T is modified, but restored
*          on exit.
*
*  DESCT   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix T.
*
*  VL      (global input/output) COMPLEX*16 array, dimension 
*          (DESCVL(LLD_),MM)
*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
*          contain an N-by-N matrix Q (usually the unitary matrix Q of
*          Schur vectors returned by ZHSEQR).
*          On exit, if SIDE = 'L' or 'B', VL contains:
*          if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
*          if HOWMNY = 'B', the matrix Q*Y;
*          if HOWMNY = 'S', the left eigenvectors of T specified by
*                           SELECT, stored consecutively in the columns
*                           of VL, in the same order as their
*                           eigenvalues.
*          If SIDE = 'R', VL is not referenced.
*
*  DESCVL  (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix VL.
*
*  VR      (global input/output) COMPLEX*16 array, dimension 
*          (DESCVR(LLD_),MM)
*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
*          contain an N-by-N matrix Q (usually the unitary matrix Q of
*          Schur vectors returned by ZHSEQR).
*          On exit, if SIDE = 'R' or 'B', VR contains:
*          if HOWMNY = 'A', the matrix X of right eigenvectors of T;
*          if HOWMNY = 'B', the matrix Q*X;
*          if HOWMNY = 'S', the right eigenvectors of T specified by
*                           SELECT, stored consecutively in the columns
*                           of VR, in the same order as their
*                           eigenvalues.
*          If SIDE = 'L', VR is not referenced.
*
*  DESCVR  (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix VR.
*
*  MM      (global input) INTEGER
*          The number of columns in the arrays VL and/or VR. MM >= M.
*
*  M       (global output) INTEGER
*          The number of columns in the arrays VL and/or VR actually
*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
*          is set to N.  Each selected eigenvector occupies one
*          column.
*
*  WORK    (local workspace) COMPLEX*16 array, dimension ( 2*DESCT(LLD_) )
*          Additional workspace may be required if PZLATRS is updated
*          to use WORK.
*
*  RWORK   (local workspace) DOUBLE PRECISION array, 
*                                          dimension ( DESCT(LLD_) )
*
*  INFO    (global output) INTEGER
*          = 0:  successful exit
*          < 0:  if INFO = -i, the i-th argument had an illegal value
*
*  Further Details
*  ===============
*
*  The algorithm used in this program is basically backward (forward)
*  substitution.  It is the hope that scaling would be used to make the 
*  the code robust against possible overflow.  But scaling has not yet
*  been implemented in PZLATRS which is called by this routine to solve
*  the triangular systems.  PZLATRS just calls PZTRSV.
*
*  Each eigenvector is normalized so that the element of largest
*  magnitude has magnitude 1; here the magnitude of a complex number
*  (x,y) is taken to be |x| + |y|.
*
*  =====================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
      COMPLEX*16         CZERO, CONE
      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
     $                     CONE = ( 1.0D+0, 0.0D+0 ) )
      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
     $                   MB_, NB_, RSRC_, CSRC_, LLD_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*     ..
*     .. Local Scalars ..
      LOGICAL            ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV
      INTEGER            CONTXT, CSRC, I, II, IS, J, K, KI, LDT, LDW, 
     $                   MB, NB, NPCOL, NPROW, RSRC, LDVL, LDVR
      DOUBLE PRECISION   OVFL, REMAXD, SCALE, SMIN, SMLNUM, ULP, UNFL
      COMPLEX*16         CDUM, REMAXC, SHIFT, TEMP
*     ..
*     .. Local Arrays ..
      INTEGER            DESCW( DLEN_ )
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ICEIL, NUMROC
      DOUBLE PRECISION   PDLAMCH
      EXTERNAL           ICEIL, LSAME, NUMROC, PDLAMCH
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, IGAMN2D, INFOG2L, DGSUM2D,
     $                   PDLABAD, PDZASUM, PXERBLA, PZAMAX, PZCOPY, 
     $                   PZDSCAL, PZGEMV, PZLASET, PZLATRS, ZGSUM2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN, MOD
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1, pzlantr
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
*       This is just to keep ftnchek happy
      IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_*
     $    RSRC_.LT.0 )RETURN
*
      CONTXT = DESCT( CTXT_ )
      RSRC = DESCT( RSRC_ )
      CSRC = DESCT( CSRC_ )
      MB = DESCT( MB_ )
      NB = DESCT( NB_ )
      LDT = DESCT( LLD_ )
      LDW = LDT
      LDVR = DESCVR( LLD_ )
      LDVL = DESCVL( LLD_ )
*
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
      SELF = MYROW*NPCOL + MYCOL
*
*     Decode and test the input parameters
*
      BOTHV = LSAME( SIDE, 'B' )
      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
*
      ALLV = LSAME( HOWMNY, 'A' )
      OVER = LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'O' )
      SOMEV = LSAME( HOWMNY, 'S' )
*
*     Set M to the number of columns required to store the selected
*     eigenvectors.
*
      IF( SOMEV ) THEN
         M = 0
         DO 10 J = 1, N
            IF( SELECT( J ) )
     $         M = M + 1
   10    CONTINUE
      ELSE
         M = N
      END IF
*
      INFO = 0
      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
         INFO = -1
      ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN
         INFO = -2
      ELSE IF( N.LT.0 ) THEN
         INFO = -4
      ELSE IF( MM.LT.M ) THEN
         INFO = -11
      END IF
      CALL IGAMN2D( CONTXT, 'ALL', ' ', 1, 1, INFO, 1, ITMP1, ITMP2, -1,
     $              -1, -1 )
      IF( INFO.LT.0 ) THEN
         CALL PXERBLA( CONTXT, 'PZTREVC', -INFO )
         RETURN
      END IF
*
*     Quick return if possible.
*
      IF( N.EQ.0 )
     $   RETURN
*
*     Set the constants to control overflow.
*
      UNFL = PDLAMCH( CONTXT, 'Safe minimum' )
      OVFL = ONE / UNFL
      CALL PDLABAD( CONTXT, UNFL, OVFL )
      ULP = PDLAMCH( CONTXT, 'Precision' )
      SMLNUM = UNFL*( N / ULP )
*
*     Store the diagonal elements of T in working array WORK( LDW+1 ).
*
      DO 20 I = 1, N
         CALL INFOG2L( I, I, DESCT, NPROW, NPCOL, MYROW, MYCOL,
     $                 IROW, ICOL, ITMP1, ITMP2 )
         IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
            WORK( LDW+IROW ) = T( (ICOL-1)*LDT+IROW )
         END IF
   20 CONTINUE
*
*     Compute 1-norm of each column of strictly upper triangular
*     part of T to control overflow in triangular solver.  Computed,
*     but not used.  For use in PZLATRS.
*
      RWORK( 1 ) = ZERO
      DO 30 J = 2, N
         CALL PDZASUM( J-1, RWORK( J ), T, 1, J, DESCT, 1 )
   30 CONTINUE
*     I replicate the norms in RWORK.  Should they be distributed
*     over the process rows?
      CALL DGSUM2D( CONTXT, 'Row', ' ', N, 1, RWORK, 1, -1, -1 )
*
      IF( RIGHTV ) THEN
*
*        Compute right eigenvectors.
*
*        Need to set the distribution pattern of WORK
*
         CALL DESCINIT( DESCW, N, 1, NB, 1, RSRC, CSRC, CONTXT, LDW, 
     $                  INFO )
*
         IS = M
         DO 80 KI = N, 1, -1
*
            IF( SOMEV ) THEN
               IF( .NOT.SELECT( KI ) )
     $            GO TO 80
            END IF
*
            SMIN = ZERO
            SHIFT = CZERO
            CALL INFOG2L( KI, KI, DESCT, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, ITMP1, ITMP2 )
            IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
               SHIFT = T( (ICOL-1)*LDT+IROW )
               SMIN = MAX( ULP*( CABS1( SHIFT ) ), SMLNUM )
            END IF
            CALL DGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SMIN, 1, -1, -1 )
            CALL ZGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SHIFT, 1, -1, -1 )
*
            CALL INFOG2L( 1, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, ITMP1, ITMP2 )
            IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
               WORK( 1 ) = CONE
            END IF
*
*           Form right-hand side.  Distribute rhs onto first column
*           of processor grid.
*
            IF( KI.GT.1 ) THEN
               CALL PZCOPY( KI-1, T, 1, KI, DESCT, 1, WORK, 1, 
     $                      1, DESCW, 1 )
            ENDIF
            DO 40 K = 1, KI-1
               CALL INFOG2L( K, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN
                  WORK( IROW ) = -WORK( IROW )
               END IF
   40       CONTINUE
*
*           Solve the triangular system:
*              (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
*
            DO 50 K = 1, KI - 1
               CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
                  T( (ICOL-1)*LDT+IROW ) = T( (ICOL-1)*LDT+IROW ) - 
     $                                     SHIFT
                  IF( CABS1( T( (ICOL-1)*LDT+IROW ) ).LT.SMIN ) THEN
                     T( (ICOL-1)*LDT+IROW ) = DCMPLX( SMIN )
                  END IF
               END IF
   50       CONTINUE
*
            IF( KI.GT.1 ) THEN
               CALL PZLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y',
     $                      KI-1, T, 1, 1, DESCT, WORK, 1, 1, DESCW,
     $                      SCALE, RWORK, INFO )
               CALL INFOG2L( KI, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN
                  WORK( IROW ) = DCMPLX( SCALE )
               END IF
            END IF
*
*           Copy the vector x or Q*x to VR and normalize.
*
            IF( .NOT.OVER ) THEN
               CALL PZCOPY( KI, WORK, 1, 1, DESCW, 1, VR, 1, IS, 
     $                      DESCVR, 1 )
*
               CALL PZAMAX( KI, REMAXC, II, VR, 1, IS, DESCVR, 1 )
               REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL )
               CALL PZDSCAL( KI, REMAXD, VR, 1, IS, DESCVR, 1 )
*
               CALL PZLASET( ' ', N-KI, 1, CZERO, CZERO, VR, KI+1, 
     $                       IS, DESCVR )
            ELSE
               IF( KI.GT.1 )
     $            CALL PZGEMV( 'N', N, KI-1, CONE, VR, 1, 1, DESCVR,
     $                         WORK, 1, 1, DESCW, 1, DCMPLX( SCALE ), 
     $                         VR, 1, KI, DESCVR, 1 )
*
               CALL PZAMAX( N, REMAXC, II, VR, 1, KI, DESCVR, 1 )
               REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL )
               CALL PZDSCAL( N, REMAXD, VR, 1, KI, DESCVR, 1 )
            END IF
*
*           Set back the original diagonal elements of T.
*
            DO 70 K = 1, KI - 1
               CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
                  T( (ICOL-1)*LDT+IROW ) = WORK( LDW+IROW )
               END IF
   70       CONTINUE
*
            IS = IS - 1
   80    CONTINUE
      END IF
*
      IF( LEFTV ) THEN
*
*        Compute left eigenvectors.
*
*        Need to set the distribution pattern of WORK
*
         CALL DESCINIT( DESCW, N, 1, MB, 1, RSRC, CSRC, CONTXT, LDW, 
     $                  INFO )
*
         IS = 1
         DO 130 KI = 1, N
*
            IF( SOMEV ) THEN
               IF( .NOT.SELECT( KI ) )
     $            GO TO 130
            END IF
*
            SMIN = ZERO
            SHIFT = CZERO
            CALL INFOG2L( KI, KI, DESCT, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, ITMP1, ITMP2 )
            IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
               SHIFT = T( (ICOL-1)*LDT+IROW )
               SMIN = MAX( ULP*( CABS1( SHIFT ) ), SMLNUM )
            END IF
            CALL DGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SMIN, 1, -1, -1 )
            CALL ZGSUM2D( CONTXT, 'ALL', ' ', 1, 1, SHIFT, 1, -1, -1 )
*
            CALL INFOG2L( N, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL,
     $                    IROW, ICOL, ITMP1, ITMP2 )
            IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
               WORK( IROW ) = CONE
            END IF
*
*           Form right-hand side.
*
            IF( KI.LT.N ) THEN
               CALL PZCOPY( N-KI, T, KI, KI+1, DESCT, N, WORK, KI+1, 
     $                      1, DESCW, 1 )
            ENDIF
            DO 90 K = KI + 1, N
               CALL INFOG2L( K, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN
                  WORK( IROW ) = -DCONJG( WORK( IROW ) )
               END IF
   90       CONTINUE
*
*           Solve the triangular system:
*              (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
*
            DO 100 K = KI + 1, N
               CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
                  T( (ICOL-1)*LDT+IROW ) = T( (ICOL-1)*LDT+IROW ) - 
     $                                     SHIFT
                  IF( CABS1( T( (ICOL-1)*LDT+IROW ) ).LT.SMIN )
     $               T( (ICOL-1)*LDT+IROW ) = DCMPLX( SMIN )
               END IF
  100       CONTINUE
*
            IF( KI.LT.N ) THEN
               CALL PZLATRS( 'Upper', 'Conjugate transpose', 'Nonunit',
     $                     'Y', N-KI, T, KI+1, KI+1, DESCT, WORK, KI+1, 
     $                      1, DESCW, SCALE, RWORK, INFO )
               CALL INFOG2L( KI, 1, DESCW, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( MYROW.EQ.ITMP1 .AND. MYCOL.EQ.ITMP2 ) THEN
                  WORK( IROW ) = DCMPLX( SCALE )
               END IF
            END IF
*
*           Copy the vector x or Q*x to VL and normalize.
*
            IF( .NOT.OVER ) THEN
               CALL PZCOPY( N-KI+1, WORK, KI, 1, DESCW, 1, VL, KI, 
     $                      IS, DESCVL, 1 )
*
               CALL PZAMAX( N-KI+1, REMAXC, II, VL, KI, IS, DESCVL, 1 )
               REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL )
               CALL PZDSCAL( N-KI+1, REMAXD, VL, KI, IS, DESCVL, 1 )
*
               CALL PZLASET( ' ', KI-1, 1, CZERO, CZERO, VL, 1, 
     $                       IS, DESCVL )
            ELSE
               IF( KI.LT.N )
     $            CALL PZGEMV( 'N', N, N-KI, CONE, VL, 1, KI+1, 
     $                        DESCVL, WORK, KI+1, 1, DESCW, 1, 
     $                        DCMPLX( SCALE ), VL, 1, KI, DESCVL, 1 )
*
               CALL PZAMAX( N, REMAXC, II, VL, 1, KI, DESCVL, 1 )
               REMAXD = ONE / MAX( CABS1( REMAXC ), UNFL )
               CALL PZDSCAL( N, REMAXD, VL, 1, KI, DESCVL, 1 )
            END IF
*
*           Set back the original diagonal elements of T.
*
            DO 120 K = KI + 1, N
               CALL INFOG2L( K, K, DESCT, NPROW, NPCOL, MYROW, MYCOL,
     $                       IROW, ICOL, ITMP1, ITMP2 )
               IF( ( MYROW.EQ.ITMP1 ) .AND. ( MYCOL.EQ.ITMP2 ) ) THEN
                  T( (ICOL-1)*LDT+IROW ) = WORK( LDW+IROW )
               END IF
  120       CONTINUE
*
            IS = IS + 1
  130    CONTINUE
      END IF
*
      RETURN
*
*     End of PZTREVC
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'zlahqr.f'
then
	echo shar: will not over-write existing file "'zlahqr.f'"
else
cat << "SHAR_EOF" > 'zlahqr.f'
      SUBROUTINE ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
     $                   IHIZ, Z, LDZ, INFO )
*
*  Mark R. Fahey
*  June 22, 2000
*
*     .. Scalar Arguments ..
      LOGICAL            WANTT, WANTZ
      INTEGER            IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N
*     ..
*     .. Array Arguments ..
      COMPLEX*16         H( LDH, * ), W( * ), Z( LDZ, * )
*     ..
*
*  Purpose
*  =======
*
*  ZLAHQR is an auxiliary routine called by ZHSEQR to update the
*    eigenvalues and Schur decomposition already computed by ZHSEQR, by
*    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
*  This version of ZLAHQR (not the standard LAPACK version) uses a
*    double-shift algorithm (like LAPACK's DLAHQR).
*  Unlike the standard LAPACK convention, this does not assume the 
*    subdiagonal is real, nor does it work to preserve this quality if 
*    given.
*
*  Arguments
*  =========
*
*  WANTT   (input) LOGICAL
*          = .TRUE. : the full Schur form T is required;
*          = .FALSE.: only eigenvalues are required.
*
*  WANTZ   (input) LOGICAL
*          = .TRUE. : the matrix of Schur vectors Z is required;
*          = .FALSE.: Schur vectors are not required.
*
*  N       (input) INTEGER
*          The order of the matrix H.  N >= 0.
*
*  ILO     (input) INTEGER
*  IHI     (input) INTEGER
*          It is assumed that H is already upper triangular in rows and
*          columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
*          ZLAHQR works primarily with the Hessenberg submatrix in rows
*          and columns ILO to IHI, but applies transformations to all of
*          H if WANTT is .TRUE..
*          1 <= ILO <= max(1,IHI); IHI <= N.
*
*  H       (input/output) COMPLEX*16 array, dimension (LDH,N)
*          On entry, the upper Hessenberg matrix H.
*          On exit, if WANTT is .TRUE., H is upper triangular in rows
*          and columns ILO:IHI.  If WANTT is .FALSE., the contents of H 
*          are unspecified on exit.
*
*  LDH     (input) INTEGER
*          The leading dimension of the array H. LDH >= max(1,N).
*
*  W       (output) COMPLEX*16 array, dimension (N)
*          The computed eigenvalues ILO to IHI are stored in the
*          corresponding elements of W. If WANTT is .TRUE., the
*          eigenvalues are stored in the same order as on the diagonal
*          of the Schur form returned in H, with W(i) = H(i,i).
*
*  ILOZ    (input) INTEGER
*  IHIZ    (input) INTEGER
*          Specify the rows of Z to which transformations must be
*          applied if WANTZ is .TRUE..
*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
*
*  Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
*          If WANTZ is .TRUE., on entry Z must contain the current
*          matrix Z of transformations, and on exit Z has been updated; 
*          transformations are applied only to the submatrix 
*          Z(ILOZ:IHIZ,ILO:IHI).  If WANTZ is .FALSE., Z is not 
*          referenced.
*
*  LDZ     (input) INTEGER
*          The leading dimension of the array Z. LDZ >= max(1,N).
*
*  INFO    (output) INTEGER
*          = 0: successful exit
*          > 0: if INFO = i, ZLAHQR failed to compute all the
*               eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
*               iterations; elements i+1:ihi of W contain those
*               eigenvalues which have been successfully computed.
*
*  =====================================================================
*
*     .. Parameters ..
      COMPLEX*16         ZERO, ONE
      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
      DOUBLE PRECISION   RZERO, RONE, HALF
      PARAMETER          ( RZERO = 0.0D+0, RONE = 1.0D+0,
     $                   HALF = 0.5D+0 )
      DOUBLE PRECISION   DAT1, DAT2
      PARAMETER          ( DAT1 = 0.75D+0, DAT2 = -0.4375D+0 )
*     ..
*     .. Local Scalars ..
      INTEGER            I, I1, I2, ITN, ITS, J, K, L, M, NH, NR, NZ
      DOUBLE PRECISION   CS, OVFL, RTEMP, S, SMLNUM, TST1, ULP, UNFL
      COMPLEX*16         CDUM, H00, H10, H11, H11S, H12, H21, H22, H33,
     $                   H33S, H43H34, H44, H44S, SN, SUM, TEMP, T1, T2,
     $                   T3, V1, V2, V3
*     ..
*     .. Local Arrays ..
      DOUBLE PRECISION   RWORK( 1 )
      COMPLEX*16         V( 3 )
*     ..
*     .. External Functions ..
      DOUBLE PRECISION   DLAMCH, DLAPY2, ZLANHS
      COMPLEX*16         ZLADIV
      EXTERNAL           DLAMCH, DLAPY2, ZLANHS, ZLADIV
*     ..
*     .. External Subroutines ..
      EXTERNAL           DLABAD, ZCOPY, ZLANV2, ZLARFG, ZSCAL
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
      INFO = 0
*
*     Quick return if possible
*
      IF( N.EQ.0 )
     $   RETURN
      IF( ILO.EQ.IHI ) THEN
         W( ILO ) = H( ILO, ILO )
         RETURN
      END IF
*
      NH = IHI - ILO + 1
      NZ = IHIZ - ILOZ + 1
*
*     Set machine-dependent constants for the stopping criterion.
*     If norm(H) <= sqrt(OVFL), overflow should not occur.
*
      UNFL = DLAMCH( 'Safe minimum' )
      OVFL = RONE / UNFL
      CALL DLABAD( UNFL, OVFL )
      ULP = DLAMCH( 'Precision' )
      SMLNUM = UNFL*( NH / ULP )
*
*     I1 and I2 are the indices of the first row and last column of H
*     to which transformations must be applied. If eigenvalues only are
*     being computed, I1 and I2 are set inside the main loop.
*
      IF( WANTT ) THEN
         I1 = 1
         I2 = N
      END IF
*
*     ITN is the total number of QR iterations allowed.
*
      ITN = 30*NH
*
*     The main loop begins here. I is the loop index and decreases from
*     IHI to ILO in steps of 1 or 2. Each iteration of the loop works
*     with the active submatrix in rows and columns L to I.
*     Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
*     H(L,L-1) is negligible so that the matrix splits.
*
      I = IHI
   10 CONTINUE
      L = ILO
      IF( I.LT.ILO )
     $   GO TO 130
*
*     Perform QR iterations on rows and columns ILO to I until a
*     submatrix of order 1 or 2 splits off at the bottom because a
*     subdiagonal element has become negligible.
*
      DO 110 ITS = 0, ITN
*
*        Look for a single small subdiagonal element.
*
         DO 20 K = I, L + 1, -1
            TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
            IF( TST1.EQ.RZERO )
     $         TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, RWORK )
            IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
     $         GO TO 30
   20    CONTINUE
   30    CONTINUE
         L = K
         IF( L.GT.ILO ) THEN
*
*           H(L,L-1) is negligible
*
            H( L, L-1 ) = ZERO
         END IF
*
*        Exit from loop if a submatrix of order 1 or 2 has split off.
*
         IF( L.GE.I-1 )
     $      GO TO 120
*
*        Now the active submatrix is in rows and columns L to I. If
*        eigenvalues only are being computed, only the active submatrix
*        need be transformed.
*
         IF( .NOT.WANTT ) THEN
            I1 = L
            I2 = I
         END IF
*
         IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN
*
*           Exceptional shift.
*
C            S = ABS( DBLE( H( I,I-1 ) ) ) + ABS( DBLE( H( I-1,I-2 ) ) )
            S = CABS1( H( I,I-1 ) ) + CABS1( H( I-1,I-2 ) )
            H44 = DAT1*S
            H33 = H44
            H43H34 = DAT2*S*S
         ELSE
*
*           Prepare to use Wilkinson's shift.
*
            H44 = H( I, I )
            H33 = H( I-1, I-1 )
            H43H34 = H( I, I-1 )*H( I-1, I )
         END IF
*
*        Look for two consecutive small subdiagonal elements.
*
         DO 40 M = I - 2, L, -1
*
*           Determine the effect of starting the double-shift QR
*           iteration at row M, and see if this would make H(M,M-1)
*           negligible.
*
            H11 = H( M, M )
            H22 = H( M+1, M+1 )
            H21 = H( M+1, M )
            H12 = H( M, M+1 )
            H44S = H44 - H11
            H33S = H33 - H11
            V1 = ( H33S*H44S-H43H34 ) / H21 + H12
            V2 = H22 - H11 - H33S - H44S
            V3 = H( M+2, M+1 )
            S = CABS1( V1 ) + CABS1( V2 ) + ABS( V3 )
            V1 = V1 / S
            V2 = V2 / S
            V3 = V3 / S
            V( 1 ) = V1
            V( 2 ) = V2
            V( 3 ) = V3
            IF( M.EQ.L )
     $         GO TO 50
            H00 = H( M-1, M-1 )
            H10 = H( M, M-1 )
            TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+
     $             CABS1( H22 ) )
            IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).LE.ULP*TST1 )
     $         GO TO 50
   40    CONTINUE
   50    CONTINUE
*
*        Double-shift QR step
*
         DO 100 K = M, I - 1
*
*           The first iteration of this loop determines a reflection G
*           from the vector V and applies it from left and right to H,
*           thus creating a nonzero bulge below the subdiagonal.
*
*           Each subsequent iteration determines a reflection G to
*           restore the Hessenberg form in the (K-1)th column, and thus
*           chases the bulge one step toward the bottom of the active
*           submatrix.  NR is the order of G
*
            NR = MIN( 3, I-K+1 )
            IF( K.GT.M )
     $         CALL ZCOPY( NR, H( K, K-1 ), 1, V, 1 )
            CALL ZLARFG( NR, V( 1 ), V( 2 ), 1, T1 )
            IF( K.GT.M ) THEN
               H( K, K-1 ) = V( 1 )
               H( K+1, K-1 ) = ZERO
               IF( K.LT.I-1 )
     $            H( K+2, K-1 ) = ZERO
            ELSE IF( M.GT.L ) THEN
*              The real double-shift code uses H( K, K-1 ) = -H( K, K-1 )
*              instead of the following.
               H( K, K-1 ) = H( K, K-1 ) - DCONJG( T1 )*H( K, K-1 )
            END IF
            V2 = V( 2 )
            T2 = T1*V2
            IF( NR.EQ.3 ) THEN
               V3 = V( 3 )
               T3 = T1*V3
*
*              Apply G from the left to transform the rows of the matrix
*              in columns K to I2.
*
               DO 60 J = K, I2
                  SUM = DCONJG( T1 )*H( K, J ) + 
     $               DCONJG( T2 )*H( K+1, J ) + DCONJG( T3 )*H( K+2, J )
                  H( K, J ) = H( K, J ) - SUM
                  H( K+1, J ) = H( K+1, J ) - SUM*V2
                  H( K+2, J ) = H( K+2, J ) - SUM*V3
   60          CONTINUE
*
*              Apply G from the right to transform the columns of the
*              matrix in rows I1 to min(K+3,I).
*
               DO 70 J = I1, MIN( K+3, I )
                  SUM = T1*H( J, K ) + T2*H( J, K+1 ) + T3*H( J, K+2 )
                  H( J, K ) = H( J, K ) - SUM
                  H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 )
                  H( J, K+2 ) = H( J, K+2 ) - SUM*DCONJG( V3 )
   70          CONTINUE
*
               IF( WANTZ ) THEN
*
*              Accumulate transformations in the matrix Z
*
                  DO 80 J = ILOZ, IHIZ
                     SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + 
     $                                    T3*Z( J, K+2 )
                     Z( J, K ) = Z( J, K ) - SUM
                     Z( J, K+1 ) = Z( J, K+1 ) - SUM*DCONJG( V2 )
                     Z( J, K+2 ) = Z( J, K+2 ) - SUM*DCONJG( V3 )
   80             CONTINUE
               END IF
            ELSE IF( NR.EQ.2 ) THEN
*
*              Apply G from the left to transform the rows of the matrix
*              in columns K to I2.
*
               DO 61 J = K, I2
                  SUM = DCONJG( T1 )*H( K, J ) + DCONJG(T2)*H( K+1, J )
                  H( K, J ) = H( K, J ) - SUM
                  H( K+1, J ) = H( K+1, J ) - SUM*V2
   61          CONTINUE
*
*              Apply G from the right to transform the columns of the
*              matrix in rows I1 to min(K+2,I).
*
               DO 71 J = I1, MIN( K+2, I )
                  SUM = T1*H( J, K ) + T2*H( J, K+1 )
                  H( J, K ) = H( J, K ) - SUM
                  H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 )
   71          CONTINUE
*
               IF( WANTZ ) THEN
*
*                 Accumulate transformations in the matrix Z
*
                  DO 81 J = ILOZ, IHIZ
                     SUM = T1*Z( J, K ) + T2*Z( J, K+1 )
                     Z( J, K ) = Z( J, K ) - SUM
                     Z( J, K+1 ) = Z( J, K+1 ) - SUM*DCONJG( V2 )
   81             CONTINUE
               END IF
            END IF
*
*           Since at the start of the QR step we have for M > L
*              H( K, K-1 ) = H( K, K-1 ) - DCONJG( T1 )*H( K, K-1 )
*           then we don't need to do the following
*           IF( K.EQ.M .AND. M.GT.L ) THEN
*              If the QR step was started at row M > L because two
*              consecutive small subdiagonals were found, then H(M,M-1)
*              must also be updated by a factor of (1-T1).
*              TEMP = ONE - T1
*              H( m, m-1 ) = H( m, m-1 )*DCONJG( TEMP )
*           END IF
  100    CONTINUE
*
*        Ensure that H(I,I-1) is real.
*
  110 CONTINUE
*
*     Failure to converge in remaining number of iterations
*
      INFO = I
      RETURN
*
  120 CONTINUE
*
      IF( L.EQ.I ) THEN
*
*        H(I,I-1) is negligible: one eigenvalue has converged.
*
         W( I ) = H( I, I )
*
      ELSE IF( L.EQ.I-1 ) THEN
*
*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
*
*        Transform the 2-by-2 submatrix to standard Schur form,
*        and compute and store the eigenvalues.
*
         CALL ZLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), 
     $                H( I, I ), W( I-1 ), W( I ), CS, SN )
*
         IF( WANTT ) THEN
*
*           Apply the transformation to the rest of H.
*
            IF( I2.GT.I )
     $         CALL ZROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, 
     $                    CS, SN )
            CALL ZROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, 
     $                 DCONJG(SN) )
         END IF
         IF( WANTZ ) THEN
*
*           Apply the transformation to Z.
*
            CALL ZROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, 
     $                 DCONJG(SN) )
         END IF
*
      END IF
*
*     Decrement number of remaining iterations, and return to start of
*     the main loop with new value of I.
*
      ITN = ITN - ITS
      I = L - 1
      GO TO 10
*
  130 CONTINUE
      RETURN
*
*     End of ZLAHQR
*
      END


SHAR_EOF
fi # end of overwriting check
if test -f 'zlamsh.f'
then
	echo shar: will not over-write existing file "'zlamsh.f'"
else
cat << "SHAR_EOF" > 'zlamsh.f'
      SUBROUTINE ZLAMSH ( S, LDS, NBULGE, JBLK, H, LDH, N, ULP )
*
*  Mark R. Fahey
*  May 28, 1999
*
*     .. Scalar Arguments ..
      INTEGER            LDS, NBULGE, JBLK, LDH, N
      DOUBLE PRECISION   ULP
*     ..
*     .. Array Arguments ..
      COMPLEX*16         S( LDS,* ), H( LDH,* )
*     ..
*
*  Purpose
*  =======
*
*  ZLAMSH sends multiple shifts through a small (single node) matrix to
*     see how consecutive small subdiagonal elements are modified by 
*     subsequent shifts in an effort to maximize the number of bulges 
*     that can be sent through.
*  ZLAMSH should only be called when there are multiple shifts/bulges 
*     (NBULGE > 1) and the first shift is starting in the middle of an
*     unreduced Hessenberg matrix because of two or more consecutive small
*     subdiagonal elements.
*
*  Arguments
*  =========
*
*  S       (local input/output) COMPLEX*16 array, ( LDS,* )
*          On entry, the matrix of shifts.  Only the 2x2 diagonal of S is
*             referenced.  It is assumed that S has JBLK double shifts
*             (size 2).
*          On exit, the data is rearranged in the best order for 
*             applying.
*
*  LDS     (local input) INTEGER
*          On entry, the leading dimension of S.  Unchanged on exit.
*              1 < NBULGE <= JBLK <= LDS/2
*
*  NBULGE  (local input/output) INTEGER
*          On entry, the number of bulges to send through H ( >1 ).
*              NBULGE should be less than the maximum determined (JBLK).
*              1 < NBULGE <= JBLK <= LDS/2
*          On exit, the maximum number of bulges that can be sent 
*              through.
*
*  JBLK    (local input) INTEGER
*          On entry, the number of shifts determined for S.
*          Unchanged on exit.
*
*  H       (local input/output) COMPLEX*16 array ( LDH,N )
*          On entry, the local matrix to apply the shifts on.
*              H should be aligned so that the starting row is 2.
*          On exit, the data is destroyed.
*
*  LDH     (local input) INTEGER
*          On entry, the leading dimension of H.  Unchanged on exit.
*
*  N       (local input) INTEGER
*          On entry, the size of H.  If all the bulges are expected to
*              go through, N should be at least 4*NBULGE+2.
*              Otherwise, NBULGE may be reduced by this routine.
*
*  ULP     (local input) DOUBLE PRECISION
*          On entry, machine precision
*          Unchanged on exit.
*
*  =====================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION RONE, TEN
      PARAMETER        ( RONE = 1.0D+0, TEN = 10.0D+0 )
      COMPLEX*16       ZERO, ONE
      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ), 
     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
*     ..
*     .. Local Scalars ..
      INTEGER          I, IBULGE, IVAL, J, K, M, NR
      DOUBLE PRECISION DVAL, S1, TST1
      COMPLEX*16       CDUM, H44, H33, H43H34, H00, H10, H11, H12, H21, 
     $                 H22, H44S, H33S, SUM, T1, T2, T3, V1, V2, V3
*     ..
*     .. Local Arrays ..
      COMPLEX*16       V( 3 )
*     ..
*     .. External Subroutines ..
      EXTERNAL         ZCOPY, ZLARFG
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC        ABS, DBLE, DCONJG, DIMAG, MAX
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
      M = 2
      DO 10 IBULGE = 1, NBULGE
         H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2)
         H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1)
         H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)*
     $            S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1)
         H11 = H( M, M )
         H22 = H( M+1, M+1 )
         H21 = H( M+1, M )
         H12 = H( M, M+1 )
         H44S = H44 - H11
         H33S = H33 - H11
         V1 = ( H33S*H44S-H43H34 ) / H21 + H12
         V2 = H22 - H11 - H33S - H44S
         V3 = H( M+2, M+1 )
         S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 )
         V1 = V1 / S1
         V2 = V2 / S1
         V3 = V3 / S1
         V( 1 ) = V1
         V( 2 ) = V2
         V( 3 ) = V3
         H00 = H( M-1, M-1 )
         H10 = H( M, M-1 )
         TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+CABS1( H22 ) )
         IF( CABS1( H10 )*( CABS1( V2 )+CABS1( V3 ) ).GT.ULP*TST1 ) THEN
*           Find minimum
            DVAL = (CABS1(H10)*(CABS1(V2)+CABS1(V3))) / (ULP*TST1)   
            IVAL = IBULGE
            DO 15 I = IBULGE+1, NBULGE
               H44 = S(2*JBLK-2*I+2, 2*JBLK-2*I+2)
               H33 = S(2*JBLK-2*I+1,2*JBLK-2*I+1)
               H43H34 = S(2*JBLK-2*I+1,2*JBLK-2*I+2)*
     $                  S(2*JBLK-2*I+2, 2*JBLK-2*I+1)
               H11 = H( M, M )
               H22 = H( M+1, M+1 )
               H21 = H( M+1, M )
               H12 = H( M, M+1 )
               H44S = H44 - H11
               H33S = H33 - H11
               V1 = ( H33S*H44S-H43H34 ) / H21 + H12
               V2 = H22 - H11 - H33S - H44S
               V3 = H( M+2, M+1 )
               S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 )
               V1 = V1 / S1
               V2 = V2 / S1
               V3 = V3 / S1
               V( 1 ) = V1
               V( 2 ) = V2
               V( 3 ) = V3
               H00 = H( M-1, M-1 )
               H10 = H( M, M-1 )
               TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+
     $                     CABS1( H22 ) )
               IF ( (DVAL.GT.(CABS1(H10)*(CABS1(V2)+CABS1(V3)))/
     $             (ULP*TST1)) .AND. ( DVAL .GT. RONE ) ) THEN
                  DVAL = (CABS1(H10)*(CABS1(V2)+CABS1(V3)))/(ULP*TST1)   
                  IVAL = I
               END IF
  15        CONTINUE
            IF ( (DVAL .LT. TEN) .AND. (IVAL .NE. IBULGE) ) THEN
               H44 = S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+2)
               H33 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1)
               H43H34 = S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2)
               H10 = S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1)
               S(2*JBLK-2*IVAL+2,2*JBLK-2*IVAL+2) = 
     $              S(2*JBLK-2*IBULGE+2,2*JBLK-2*IBULGE+2)
               S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+1) =
     $              S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1)
               S(2*JBLK-2*IVAL+1,2*JBLK-2*IVAL+2) =
     $              S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) 
               S(2*JBLK-2*IVAL+2, 2*JBLK-2*IVAL+1) =
     $              S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) 
               S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2) = H44
               S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1) = H33
               S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2) = H43H34
               S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1) = H10
            END IF
            H44 = S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+2)
            H33 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+1)
            H43H34 = S(2*JBLK-2*IBULGE+1,2*JBLK-2*IBULGE+2)*
     $               S(2*JBLK-2*IBULGE+2, 2*JBLK-2*IBULGE+1)
            H11 = H( M, M )
            H22 = H( M+1, M+1 )
            H21 = H( M+1, M )
            H12 = H( M, M+1 )
            H44S = H44 - H11
            H33S = H33 - H11
            V1 = ( H33S*H44S-H43H34 ) / H21 + H12
            V2 = H22 - H11 - H33S - H44S
            V3 = H( M+2, M+1 )
            S1 = CABS1( V1 ) + CABS1( V2 ) + CABS1( V3 )
            V1 = V1 / S1
            V2 = V2 / S1
            V3 = V3 / S1
            V( 1 ) = V1
            V( 2 ) = V2
            V( 3 ) = V3
            H00 = H( M-1, M-1 )
            H10 = H( M, M-1 )
            TST1 = CABS1( V1 )*( CABS1( H00 )+CABS1( H11 )+
     $                  CABS1( H22 ) )
         END IF
         IF( CABS1( H10 )*( CABS1( V2 )+CABS1(V3) ).GT.TEN*ULP*TST1 ) 
     $   THEN
*           IBULGE better not be 1 here or we have a bug!
            NBULGE = MAX(IBULGE -1,1)
            RETURN
         END IF
         DO 120 K = M, N - 1
            NR = MIN( 3, N-K+1 )
            IF( K.GT.M )
     $         CALL ZCOPY( NR, H( K, K-1 ), 1, V, 1 )
            CALL ZLARFG( NR, V( 1 ), V( 2 ), 1, T1 )
            IF( K.GT.M ) THEN
               H( K, K-1 ) = V( 1 )
               H( K+1, K-1 ) = ZERO
               IF( K.LT.N-1 )
     $            H( K+2, K-1 ) = ZERO
            ELSE
*              H(m,m-1) must be updated, 
*              
               H( K, K-1 ) = H( K, K-1 ) - DCONJG( T1 )*H( K, K-1 )
            END IF
            V2 = V( 2 )
            T2 = T1*V2
            IF( NR.EQ.3 ) THEN
               V3 = V( 3 )
               T3 = T1*V3
               DO 60 J = K, N
                  SUM = DCONJG(T1)*H( K, J ) + DCONJG(T2)*H( K+1, J ) 
     $                    + DCONJG( T3 )*H( K+2, J )
                  H( K, J ) = H( K, J ) - SUM
                  H( K+1, J ) = H( K+1, J ) - SUM*V2
                  H( K+2, J ) = H( K+2, J ) - SUM*V3
   60          CONTINUE
               DO 70 J = 1, MIN( K+3, N )
                  SUM = T1*H( J, K ) + T2*H( J, K+1 ) + T3*H( J, K+2 )
                  H( J, K ) = H( J, K ) - SUM
                  H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 )
                  H( J, K+2 ) = H( J, K+2 ) - SUM*DCONJG( V3 )
   70          CONTINUE
            END IF
  120    CONTINUE
   10 CONTINUE
* 
      RETURN
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'zlanv2.f'
then
	echo shar: will not over-write existing file "'zlanv2.f'"
else
cat << "SHAR_EOF" > 'zlanv2.f'
      SUBROUTINE ZLANV2( A, B, C, D, RT1, RT2, CS, SN )
*
*  Mark R. Fahey
*  May 28, 1999
*
*     .. Scalar Arguments ..
      DOUBLE PRECISION   CS
      COMPLEX*16         A, B, C, D, RT1, RT2, SN
*     ..
*
*  Purpose
*  =======
*
*  ZLANV2 computes the Schur factorization of a complex 2-by-2 nonhermitian
*  matrix in standard form:
*
*       [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
*       [ C  D ]   [ SN  CS ] [  0  DD ] [-SN  CS ]
*
*  Arguments
*  =========
*
*  A       (input/output) COMPLEX*16
*  B       (input/output) COMPLEX*16
*  C       (input/output) COMPLEX*16
*  D       (input/output) COMPLEX*16
*          On entry, the elements of the input matrix.
*          On exit, they are overwritten by the elements of the
*          standardised Schur form.
*
*  RT1     (output) COMPLEX*16
*  RT2     (output) COMPLEX*16
*          The two eigenvalues. 
*
*  CS      (output) DOUBLE PRECISION
*  SN      (output) COMPLEX*16
*          Parameters of the rotation matrix.
*
*  =====================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION   RZERO, HALF, RONE
      PARAMETER          ( RZERO = 0.0D+0, HALF = 0.5D+0, 
     $                     RONE = 1.0D+0 )
      COMPLEX*16         ZERO, ONE
      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ), 
     $                     ONE = ( 1.0D+0, 0.0D+0 ) )
*     ..
*     .. Local Scalars ..
      COMPLEX*16         AA, BB, DD, TEMP, T, U, X, Y
*     ..
*     .. External Functions ..
      COMPLEX*16         ZLADIV
      EXTERNAL           ZLADIV
*     ..
*     .. External Subroutines ..
      EXTERNAL           ZLARTG
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, DCONJG, DIMAG, SQRT
*     ..
*     .. Executable Statements ..
*
*     Initialize CS and SN
*
      CS = RONE
      SN = ZERO
*
      IF( C.EQ.ZERO ) THEN
         GO TO 10
*
      ELSE IF( B.EQ.ZERO ) THEN
*
*        Swap rows and columns
*
         CS = RZERO
         SN = ONE
         TEMP = D
         D = A
         A = TEMP
         B = -C 
         C = ZERO
         GO TO 10
      ELSE IF( (A-D).EQ.ZERO ) THEN
         TEMP = SQRT( B*C )
         A = A + TEMP
         D = D - TEMP
         IF( (B+C).EQ.ZERO ) THEN
             CS = SQRT( HALF )
             SN = ( RZERO, RONE )*CS
         ELSE
            TEMP = SQRT( B + C )
            CS = ZLADIV( SQRT( B ), TEMP )
            SN = ZLADIV( SQRT( C ), TEMP )
         END IF
         B = B - C
         C = ZERO
         GO TO 10
      ELSE
*
*        Compute eigenvalue closest to D
*
         T = D
         U = B*C
         X = HALF*( A-T )
         Y = SQRT( X*X+U )
         IF( DBLE( X )*DBLE( Y )+DIMAG( X )*DIMAG( Y ).LT.RZERO )
     $         Y = -Y
         T = T - ZLADIV( U, ( X+Y ) )
*
*        Do one QR step with exact shift T - resulting 2 x 2 in
*        triangular form.
*
         CALL ZLARTG( A-T, C, CS, SN, AA )
*
         D = D - T
         BB = CS*B + SN*D
         DD = -DCONJG( SN )*B + CS*D

         A = AA*CS + BB*DCONJG( SN ) + T
         B = -AA*SN + BB*CS
         C = ZERO
         D = T
*
      END IF
*
   10 CONTINUE
*
*     Store eigenvalues in RT1 and RT2.
*
      RT1 = A
      RT2 = D
      RETURN
*
*     End of ZLANV2
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'zlaref.f'
then
	echo shar: will not over-write existing file "'zlaref.f'"
else
cat << "SHAR_EOF" > 'zlaref.f'
      SUBROUTINE ZLAREF( TYPE, A, LDA, WANTZ, Z, LDZ, BLOCK, IROW1,
     $                    ICOL1, ISTART, ISTOP, ITMP1, ITMP2, LILOZ,
     $                    LIHIZ, VECS, V2, V3, T1, T2, T3 )
*
*  Mark R Fahey
*  May 28, 1999
*
*     .. Scalar Arguments ..
      LOGICAL            BLOCK, WANTZ
      CHARACTER          TYPE
      INTEGER            ICOL1, IROW1, ISTART, ISTOP, ITMP1, ITMP2, LDA,
     $                   LDZ, LIHIZ, LILOZ
      COMPLEX*16         T1, T2, T3, V2, V3
*     ..
*     .. Array Arguments ..
      COMPLEX*16         A( LDA, * ), VECS( * ), Z( LDZ, * )
*     ..
*
*  Purpose
*  =======
*
*  ZLAREF applies one or several Householder reflectors of size 3
*     to one or two matrices (if column is specified) on either their
*     rows or columns.
*
*  Arguments
*  =========
*
*  TYPE    (global input) CHARACTER*1
*          If 'R': Apply reflectors to the rows of the matrix
*              (apply from left)
*          Otherwise: Apply reflectors to the columns of the matrix
*          Unchanged on exit.
*
*  A       (global input/output) COMPLEX*16 array, (LDA,*)
*          On entry, the matrix to receive the reflections.
*          The updated matrix on exit.
*
*  LDA     (local input) INTEGER
*          On entry, the leading dimension of A.  Unchanged on exit.
*
*  WANTZ   (global input) LOGICAL
*          If .TRUE., then apply any column reflections to Z as well.
*          If .FALSE., then do no additional work on Z.
*
*  Z       (global input/output) COMPLEX*16 array, (LDZ,*)
*          On entry, the second matrix to receive column reflections.
*          This is changed only if WANTZ is set.
*
*  LDZ     (local input) INTEGER
*          On entry, the leading dimension of Z.  Unchanged on exit.
*
*  BLOCK   (global input) LOGICAL
*          If .TRUE., then apply several reflectors at once and read
*             their data from the VECS array.
*          If .FALSE., apply the single reflector given by V2, V3,
*             T1, T2, and T3.
*
*  IROW1   (local input/output) INTEGER
*          On entry, the local row element of A.
*          Undefined on output.
*
*
*  ICOL1   (local input/output) INTEGER
*          On entry, the local column element of A.
*          Undefined on output.
*
*  ISTART  (global input) INTEGER
*          Specifies the "number" of the first reflector.  This is
*              used as an index into VECS if BLOCK is set.
*              ISTART is ignored if BLOCK is .FALSE..
*
*  ISTOP   (global input) INTEGER
*          Specifies the "number" of the last reflector.  This is
*              used as an index into VECS if BLOCK is set.
*              ISTOP is ignored if BLOCK is .FALSE..
*
*  ITMP1   (local input) INTEGER
*          Starting range into A.  For rows, this is the local
*              first column.  For columns, this is the local first row.
*
*  ITMP2   (local input) INTEGER
*          Ending range into A.  For rows, this is the local last
*              column.  For columns, this is the local last row.
*
*  LILOZ
*  LIHIZ   (local input) INTEGER
*          These serve the same purpose as ITMP1,ITMP2 but for Z
*              when WANTZ is set.
*
*  VECS    (global input) COMPLEX*16 array of size 3*N (matrix size)
*          This holds the size 3 reflectors one after another and this
*              is only accessed when BLOCK is .TRUE.
*
*  V2
*  V3
*  T1
*  T2
*  T3      (global input/output) COMPLEX*16
*          This holds information on a single size 3 Householder
*              reflector and is read when BLOCK is .FALSE., and
*              overwritten when BLOCK is .TRUE.
*
*  =====================================================================
*
*     .. Local Scalars ..
      INTEGER            J, K
      COMPLEX*16         A1, A11, A2, A22, A3, A4, A5, B1, B2, B3, B4,
     $                   B5, H11, H22, SUM, SUM1, SUM2, SUM3, T12, T13,
     $                   T22, T23, T32, T33, TMP1, TMP2, TMP3, V22, V23,
     $                   V32, V33
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      EXTERNAL           LSAME
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DCONJG, MOD
*     ..
*     .. Executable Statements ..
*
      IF( LSAME( TYPE, 'R' ) ) THEN
         IF( BLOCK ) THEN
            DO 30 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3
               V2 = VECS( ( K-1 )*3+1 )
               V3 = VECS( ( K-1 )*3+2 )
               T1 = VECS( ( K-1 )*3+3 )
               V22 = VECS( ( K-1 )*3+4 )
               V32 = VECS( ( K-1 )*3+5 )
               T12 = VECS( ( K-1 )*3+6 )
               V23 = VECS( ( K-1 )*3+7 )
               V33 = VECS( ( K-1 )*3+8 )
               T13 = VECS( ( K-1 )*3+9 )
               T2 = T1*V2
               T3 = T1*V3
               T22 = T12*V22
               T32 = T12*V32
               T23 = T13*V23
               T33 = T13*V33
               DO 10 J = ITMP1, ITMP2-MOD(ITMP2-ITMP1+1,2), 2
                  A1 = A ( IROW1  , J   )
                  A2 = A ( IROW1+1, J   ) 
                  A3 = A ( IROW1+2, J   ) 
                  A4 = A ( IROW1+3, J   ) 
                  A5 = A ( IROW1+4, J   ) 
                  B1 = A ( IROW1  , J+1 )
                  B2 = A ( IROW1+1, J+1 ) 
                  B3 = A ( IROW1+2, J+1 ) 
                  B4 = A ( IROW1+3, J+1 ) 
                  B5 = A ( IROW1+4, J+1 ) 
                  SUM1 = DCONJG( T1 )*A1 + DCONJG( T2 )*A2 + 
     $                   DCONJG( T3 )*A3
                  A( IROW1  , J   ) = A1 - SUM1
                  H11               = A2 - SUM1 * V2
                  H22               = A3 - SUM1 * V3
                  TMP1 = DCONJG( T1 )*B1 + DCONJG( T2 )*B2 + 
     $                   DCONJG( T3 )*B3
                  A( IROW1  , J+1 ) = B1 - TMP1
                  A11               = B2 - TMP1 * V2
                  A22               = B3 - TMP1 * V3
                  SUM2 = DCONJG( T12 )*H11 + DCONJG( T22 )*H22 + 
     $                   DCONJG( T32 )*A4
                  A( IROW1+1, J   ) = H11 - SUM2
                  H11               = H22 - SUM2 * V22
                  H22               = A4  - SUM2 * V32
                  TMP2 = DCONJG( T12 )*A11 + DCONJG( T22 )*A22 + 
     $                   DCONJG( T32 )*B4
                  A( IROW1+1, J+1 ) = A11 - TMP2
                  A11               = A22 - TMP2 * V22
                  A22               = B4  - TMP2 * V32
                  SUM3 = DCONJG( T13 )*H11 + DCONJG( T23 )*H22 + 
     $                   DCONJG( T33 )*A5
                  A( IROW1+2, J   ) = H11 - SUM3
                  A( IROW1+3, J   ) = H22 - SUM3 * V23
                  A( IROW1+4, J   ) = A5  - SUM3 * V33
                  TMP3 = DCONJG( T13 )*A11 + DCONJG( T23 )*A22 + 
     $                   DCONJG( T33 )*B5
                  A( IROW1+2, J+1 ) = A11 - TMP3
                  A( IROW1+3, J+1 ) = A22 - TMP3 * V23
                  A( IROW1+4, J+1 ) = B5  - TMP3 * V33
   10          CONTINUE
               DO 20 J = ITMP2-MOD(ITMP2-ITMP1+1,2)+1, ITMP2
                  SUM = DCONJG( T1 )*A( IROW1, J ) + DCONJG( T2 )*
     $                  A( IROW1+1, J ) + DCONJG( T3 )*A( IROW1+2, J )
                  A( IROW1, J ) = A( IROW1, J ) - SUM
                  H11 = A( IROW1+1, J ) - SUM*V2
                  H22 = A( IROW1+2, J ) - SUM*V3
                  SUM = DCONJG( T12 )*H11 + DCONJG( T22 )*H22 + 
     $                  DCONJG( T32 )*A( IROW1+3, J )
                  A( IROW1+1, J ) = H11 - SUM
                  H11 = H22 - SUM*V22
                  H22 = A( IROW1+3, J ) - SUM*V32
                  SUM = DCONJG( T13 )*H11 + DCONJG( T23 )*H22 + 
     $                  DCONJG( T33 )*A( IROW1+4, J )
                  A( IROW1+2, J ) = H11 - SUM
                  A( IROW1+3, J ) = H22 - SUM*V23
                  A( IROW1+4, J ) = A( IROW1+4, J ) - SUM*V33
   20          CONTINUE
               IROW1 = IROW1 + 3
   30       CONTINUE
            DO 50 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP
               V2 = VECS( ( K-1 )*3+1 )
               V3 = VECS( ( K-1 )*3+2 )
               T1 = VECS( ( K-1 )*3+3 )
               T2 = T1*V2
               T3 = T1*V3
               DO 40 J = ITMP1, ITMP2
                  SUM = DCONJG( T1 )*A( IROW1, J ) + DCONJG( T2 )*
     $                  A( IROW1+1, J ) + DCONJG( T3 )*A( IROW1+2, J )
                  A( IROW1, J ) = A( IROW1, J ) - SUM
                  A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*V2
                  A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*V3
   40          CONTINUE
               IROW1 = IROW1 + 1
   50       CONTINUE
         ELSE
            DO 60 J = ITMP1, ITMP2
               SUM = DCONJG( T1 )*A( IROW1, J ) + DCONJG( T2 )*
     $               A( IROW1+1, J ) + DCONJG( T3 )*A( IROW1+2, J )
               A( IROW1, J ) = A( IROW1, J ) - SUM
               A( IROW1+1, J ) = A( IROW1+1, J ) - SUM*V2
               A( IROW1+2, J ) = A( IROW1+2, J ) - SUM*V3
   60       CONTINUE
         END IF
      ELSE
*
*        Do column transforms
*
         IF( BLOCK ) THEN
            DO 90 K = ISTART, ISTOP - MOD( ISTOP-ISTART+1, 3 ), 3
               V2 = VECS( ( K-1 )*3+1 )
               V3 = VECS( ( K-1 )*3+2 )
               T1 = VECS( ( K-1 )*3+3 )
               V22 = VECS( ( K-1 )*3+4 )
               V32 = VECS( ( K-1 )*3+5 )
               T12 = VECS( ( K-1 )*3+6 )
               V23 = VECS( ( K-1 )*3+7 )
               V33 = VECS( ( K-1 )*3+8 )
               T13 = VECS( ( K-1 )*3+9 )
               T2 = T1*V2
               T3 = T1*V3
               T22 = T12*V22
               T32 = T12*V32
               T23 = T13*V23
               T33 = T13*V33
               DO 70 J = ITMP1, ITMP2
                  SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) +
     $                  T3*A( J, ICOL1+2 )
                  A( J, ICOL1 ) = A( J, ICOL1 ) - SUM
                  H11 = A( J, ICOL1+1 ) - SUM*DCONJG( V2 )
                  H22 = A( J, ICOL1+2 ) - SUM*DCONJG( V3 )
                  SUM = T12*H11 + T22*H22 + T32*A( J, ICOL1+3 )
                  A( J, ICOL1+1 ) = H11 - SUM
                  H11 = H22 - SUM*DCONJG( V22 )
                  H22 = A( J, ICOL1+3 ) - SUM*DCONJG( V32 )
                  SUM = T13*H11 + T23*H22 + T33*A( J, ICOL1+4 )
                  A( J, ICOL1+2 ) = H11 - SUM
                  A( J, ICOL1+3 ) = H22 - SUM*DCONJG( V23 )
                  A( J, ICOL1+4 ) = A( J, ICOL1+4 ) - SUM*DCONJG( V33 )
   70          CONTINUE
               IF( WANTZ ) THEN
                  DO 80 J = LILOZ, LIHIZ
                     SUM = T1*Z( J, ICOL1 ) + T2*Z( J, ICOL1+1 ) +
     $                     T3*Z( J, ICOL1+2 )
                     Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM
                     H11 = Z( J, ICOL1+1 ) - SUM*DCONJG( V2 )
                     H22 = Z( J, ICOL1+2 ) - SUM*DCONJG( V3 )
                     SUM = T12*H11 + T22*H22 + T32*Z( J, ICOL1+3 )
                     Z( J, ICOL1+1 ) = H11 - SUM
                     H11 = H22 - SUM*DCONJG( V22 )
                     H22 = Z( J, ICOL1+3 ) - SUM*DCONJG( V32 )
                     SUM = T13*H11 + T23*H22 + T33*Z( J, ICOL1+4 )
                     Z( J, ICOL1+2 ) = H11 - SUM
                     Z( J, ICOL1+3 ) = H22 - SUM*DCONJG( V23 )
                     Z( J, ICOL1+4 ) = Z( J, ICOL1+4 ) - SUM*DCONJG(V33)
   80             CONTINUE
               END IF
               ICOL1 = ICOL1 + 3
   90       CONTINUE
            DO 120 K = ISTOP - MOD( ISTOP-ISTART+1, 3 ) + 1, ISTOP
               V2 = VECS( ( K-1 )*3+1 )
               V3 = VECS( ( K-1 )*3+2 )
               T1 = VECS( ( K-1 )*3+3 )
               T2 = T1*V2
               T3 = T1*V3
               DO 100 J = ITMP1, ITMP2
                  SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) +
     $                  T3*A( J, ICOL1+2 )
                  A( J, ICOL1 ) = A( J, ICOL1 ) - SUM
                  A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*DCONJG( V2 )
                  A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*DCONJG( V3 )
  100          CONTINUE
               IF( WANTZ ) THEN
                  DO 110 J = LILOZ, LIHIZ
                     SUM = T1*Z( J, ICOL1 ) + T2*Z( J, ICOL1+1 ) +
     $                     T3*Z( J, ICOL1+2 )
                     Z( J, ICOL1 ) = Z( J, ICOL1 ) - SUM
                     Z( J, ICOL1+1 ) = Z( J, ICOL1+1 ) - SUM*DCONJG(V2)
                     Z( J, ICOL1+2 ) = Z( J, ICOL1+2 ) - SUM*DCONJG(V3)
  110             CONTINUE
               END IF
               ICOL1 = ICOL1 + 1
  120       CONTINUE
         ELSE
            DO 130 J = ITMP1, ITMP2
               SUM = T1*A( J, ICOL1 ) + T2*A( J, ICOL1+1 ) +
     $               T3*A( J, ICOL1+2 )
               A( J, ICOL1 ) = A( J, ICOL1 ) - SUM
               A( J, ICOL1+1 ) = A( J, ICOL1+1 ) - SUM*DCONJG( V2 )
               A( J, ICOL1+2 ) = A( J, ICOL1+2 ) - SUM*DCONJG( V3 )
  130       CONTINUE
         END IF
      END IF
      RETURN
*
*     End of ZLAREF
*
      END
SHAR_EOF
fi # end of overwriting check
cd ..
if test ! -d 'Testing'
then
	mkdir 'Testing'
fi
cd 'Testing'
if test -f 'EVC.dat'
then
	echo shar: will not over-write existing file "'EVC.dat'"
else
cat << "SHAR_EOF" > 'EVC.dat'
'SCALAPACK NEP (Nonsymmetric Eigenvalue Problem) input file'
'MPI Machine'
'EVC.out'               output file name (if any)
6                       device out
4                       number of problems sizes
200 400 600 800         Problem sizes, if >1500, may need to use new PZLATRS
1                       number of NB's
64                      values of NB
1                       number of process grids (ordered pairs of P & Q)
3 2 2 1 4 2 1           values of P
3 2 1 4 1 4 8           values of Q
20.0                    threshold
SHAR_EOF
fi # end of overwriting check
if test -f 'Makefile'
then
	echo shar: will not over-write existing file "'Makefile'"
else
cat << "SHAR_EOF" > 'Makefile'
############################################################################
#
#  Program:         ScaLAPACK
#
#  Module:          Makefile
#
#  Purpose:         Eigenroutine Testing Makefile
#
#  Creation date:   March 20, 1995
#
#  Modified:
#
#  Send bug reports, comments or suggestions to scalapack@cs.utk.edu
#
############################################################################

include ../../SLmake.inc

F77 = f90
F77LOADER = f90

znepexe = xznep
zevcexe = xzevc

zmatgen = pzmatgen.o pmatgeninc.o

znep =  pznepdriver.o pznepinfo.o pznepfchk.o $(zmatgen)
zevc =  pzevcdriver.o pzevcinfo.o pzget22.o $(zmatgen) 

all : complex16

complex: $(cnepexe) $(cevcexe)
complex16: $(znepexe) $(zevcexe)

$(cnepexe) : $(cnep)
	$(F77LOADER) $(F77LOADFLAGS) -o $(cnepexe) $(cnep) $(PARLIBS)

$(cevcexe) : $(cevc)
	$(F77LOADER) $(F77LOADFLAGS) -o $(cevcexe) $(cevc) $(PARLIBS)

$(znepexe) : $(znep)
	$(F77LOADER) $(F77LOADFLAGS) -o $(znepexe) $(znep) $(PARLIBS)

$(zevcexe) : $(zevc)
	$(F77LOADER) -g $(F77LOADFLAGS) -o $(zevcexe) $(zevc) $(PARLIBS)

$(cnep): $(FRC)
$(cevc): $(FRC)
$(znep): $(FRC)
$(zevc): $(FRC)

FRC:
	@FRC=$(FRC)

clean :
	rm -f *.o

.c.o:
	$(CC) $(CDEFS) $(OPTS) -c $*.c

.f.o: 
	$(F77) -c $(OPTS) $*.f
SHAR_EOF
fi # end of overwriting check
if test -f 'NEP.dat'
then
	echo shar: will not over-write existing file "'NEP.dat'"
else
cat << "SHAR_EOF" > 'NEP.dat'
'SCALAPACK NEP (Nonsymmetric Eigenvalue Problem) input file'
'MPI Machine'
'NEP.out'               output file name (if any)
6                       device out
4                       number of problems sizes
200 400 600 1000        Problem sizes
1                       number of NB's
100                     values of NB
1                       number of process grids (ordered pairs of P & Q)
1 2 1 1 4 2 1           values of P
1 2 1 4 1 4 8           values of Q
20.0                    threshold
SHAR_EOF
fi # end of overwriting check
if test -f 'README'
then
	echo shar: will not over-write existing file "'README'"
else
cat << "SHAR_EOF" > 'README'
Files in this directory
=======================
EVC.dat         NEP.dat         pzevcdriver.f   pzget22.f       pznepfchk.f
Makefile        README          pzevcinfo.f     pznepdriver.f   pznepinfo.f


Notes
=====
When using pzevcdriver to test large matrices (>1500), one should use
the pzlatrs routine provided in the SRC directory.  This will control
scaling problems when pztrevc is calculating eigenvectors.  It is,
however, slow in comparison to PZTRSV which it is replacing.

Note also that pzlatrs uses pzdotu, pzdotc, and pzaxpy.  I found a bug
in each of these routines that were included in ScaLAPACK 1.0.  Thus,
up-to-date versions are included in the SRC directory.  

See the README file in the SRC directory for more information.
SHAR_EOF
fi # end of overwriting check
if test -f 'pzevcdriver.f'
then
	echo shar: will not over-write existing file "'pzevcdriver.f'"
else
cat << "SHAR_EOF" > 'pzevcdriver.f'
      PROGRAM PZEVCDRIVER
*
*     Mark R. Fahey
*     June 2000
*
*  Purpose
*  =======
*
*  PZEVCDRIVER is the main test program for the COMPLEX*16 
*  SCALAPACK PZTREVC routine.  This test driver performs a right and
*  left eigenvector calculation of a triangular matrix followed by
*  a residual checks of the calcuated eigenvectors.
*
*  The program must be driven by a short data file and uses the same
*  input file as the PZNEPDRIVER.  An annotated example of a data file 
*  can be obtained by deleting the first 3 characters from the following 
*  18 lines:
*  'SCALAPACK, Version 1.4, NEP (Nonsymmetric EigenProblem) input file'
*  'Intel iPSC/860 hypercube, gamma model.'
*  'NEP.out'            output file name (if any)
*  6                    device out
*  8                    number of problems sizes
*  1 2 3 4 6 10 100 200 vales of N
*  3                    number of NB's
*  6 20 40              values of NB
*  4                    number of process grids (ordered pairs of P & Q)
*  1 2 1 4              values of P
*  1 2 4 1              values of Q
*  20.0                 threshold
*
*  Internal Parameters
*  ===================
*
*  TOTMEM   INTEGER, default = 4000000
*           TOTMEM is a machine-specific parameter indicating the
*           maximum amount of available memory in bytes.
*           The user should customize TOTMEM to his platform.  Remember
*           to leave room in memory for the operating system, the BLACS
*           buffer, etc.  For example, on a system with 8 MB of memory
*           per process (e.g., one processor on an Intel iPSC/860), the
*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
*           code, BLACS buffer, etc).  However, for PVM, we usually set
*           TOTMEM = 2000000.  Some experimenting with the maximum value
*           of TOTMEM may be required.
*
*  DBLESZ   INTEGER, default = 8 bytes.
*           DBLESZ indicate the length in bytes on the given platform
*           for a double precision real.
*  MEM      COMPLEX*16 array, dimension ( TOTMEM / DBLESZ )
*
*           All arrays used by SCALAPACK routines are allocated from
*           this array and referenced by pointers.  The integer IPA,
*           for example, is a pointer to the starting element of MEM for
*           the matrix A.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      INTEGER            DBLESZ, TOTMEM, MEMSIZ, NTESTS
      COMPLEX*16         PADVAL, ZERO, ONE
      PARAMETER          ( DBLESZ = 16, TOTMEM = 220000000,
     $                   MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20 )
      PARAMETER          ( PADVAL = ( -9923.0D+0, -9923.0D+0 ),
     $                     ZERO = ( 0.0D+0, 0.0D+0 ),
     $                     ONE = ( 1.0D+0, 0.0D+0 ) )
*     ..
*     .. Local Scalars ..
      LOGICAL            CHECK
      CHARACTER*6        PASSED
      CHARACTER*80       OUTFILE
      INTEGER            I, IAM, IASEED, ICTXT, III, IMIDPAD, INFO, IPA,
     $                   IPOSTPAD, IPREPAD, IPW, IPWR, IPZ, IPVL, J, K,
     $                   KFAIL, KPASS, KSKIP, KTESTS, LDA, LDZ, LWORK,
     $                   M,MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT,
     $                   NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ
      REAL               THRESH
      DOUBLE PRECISION   ANORM, FRESID, NOPS, QRESID, TMFLOPS, ZNORM
*     ..
*     .. Local Arrays ..
      LOGICAL            SELECT ( 1 )
      INTEGER            DESCA( DLEN_ ), DESCZ( DLEN_ ), IERR( 2 ),
     $                   IDUM( 1 ), NBVAL( NTESTS ), NVAL( NTESTS ),
     $                   PVAL( NTESTS ), QVAL( NTESTS )
      DOUBLE PRECISION   CTIME( 2 ), WTIME( 2 ), RWORK( 5000 ),
     $                   RESULT( 2 )
      COMPLEX*16         MEM( MEMSIZ )
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
     $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
     $                   BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD,
     $                   PZFILLPAD, PZGEMM, PZLAHQR, PZLASET, PZMATGEN,
     $                   PZGET22, PZEVCINFO, SLBOOT, SLCOMBINE,
     $                   SLTIMER
*     ..
*     .. External Functions ..
      INTEGER            ILCM, NUMROC
      DOUBLE PRECISION   PDLAMCH, PZLANGE, PZLANHS
      EXTERNAL           ILCM, NUMROC, PDLAMCH, PZLANGE, PZLANHS
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, MAX, MIN
*     ..
*     .. Data statements ..
      DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
*     ..
*     .. Executable Statements ..
*
*     Get starting information
*
      CALL BLACS_PINFO( IAM, NPROCS )
      IASEED = 100
      CALL PZEVCINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL,
     $                NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS,
     $                THRESH, MEM, IAM, NPROCS )
      CHECK = ( THRESH.GE.0.0E+0 )
*
*     Print headings
*
      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9995 )
         WRITE( NOUT, FMT = 9994 )
         WRITE( NOUT, FMT = * )
      END IF
*
*     Loop over different process grids
*
      DO 30 I = 1, NGRIDS
*
         NPROW = PVAL( I )
         NPCOL = QVAL( I )
*
*        Make sure grid information is correct
*
         IERR( 1 ) = 0
         IF( NPROW.LT.1 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW
            IERR( 1 ) = 1
         ELSE IF( NPCOL.LT.1 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL
            IERR( 1 ) = 1
         ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS
            IERR( 1 ) = 1
         END IF
*
         IF( IERR( 1 ).GT.0 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9997 )'grid'
            KSKIP = KSKIP + 1
            GO TO 30
         END IF
*
*        Define process grid
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
         CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*        Go to bottom of process grid loop if this case doesn't use my
*        process
*
         IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL )
     $      GO TO 30
*
         DO 20 J = 1, NMAT
*
            N = NVAL( J )
*
*           Make sure matrix information is correct
*
            IERR( 1 ) = 0
            IF( N.LT.1 ) THEN
               IF( IAM.EQ.0 )
     $            WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N
               IERR( 1 ) = 1
            END IF
*
*           Check all processes for an error
*
            CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
*
            IF( IERR( 1 ).GT.0 ) THEN
               IF( IAM.EQ.0 )
     $            WRITE( NOUT, FMT = 9997 )'matrix'
               KSKIP = KSKIP + 1
               GO TO 20
            END IF
*
            DO 10 K = 1, NNB
*
               NB = NBVAL( K )
*
*              Make sure nb is legal
*
               IERR( 1 ) = 0
               IF( NB.LT.6 ) THEN
                  IERR( 1 ) = 1
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB
               END IF
*
*              Check all processes for an error
*
               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
*
               IF( IERR( 1 ).GT.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9997 )'NB'
                  KSKIP = KSKIP + 1
                  GO TO 10
               END IF
*
*              Padding constants
*
               NP = NUMROC( N, NB, MYROW, 0, NPROW )
               NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
               IF( CHECK ) THEN
                  IPREPAD = MAX( NB, NP )
                  IMIDPAD = NB
                  IPOSTPAD = MAX( NB, NQ )
                  IPREPAD = IPREPAD + 1000
                  IMIDPAD = IMIDPAD + 1000
                  IPOSTPAD = IPOSTPAD + 1000                  
               ELSE
                  IPREPAD = 0
                  IMIDPAD = 0
                  IPOSTPAD = 0
               END IF
*
*              Initialize the array descriptor for the matrix A
*
               CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT,
     $                        MAX( 1, NP )+IMIDPAD, IERR( 1 ) )
*
*              Initialize the array descriptor for the matrix Z
*
               CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT,
     $                        MAX( 1, NP )+IMIDPAD, IERR( 2 ) )
*
               LDA = DESCA( LLD_ )
               LDZ = DESCZ( LLD_ )
*
*              Check all processes for an error
*
               CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 )
*
               IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9997 )'descriptor'
                  KSKIP = KSKIP + 1
                  GO TO 10
               END IF
*
*              Assign pointers into MEM for SCALAPACK arrays, A is
*              allocated starting at position MEM( IPREPAD+1 )
*
               IPA = IPREPAD + 1
               IPZ = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD
               IPWR = IPZ + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD
               IPVL = IPWR + N + IPOSTPAD + IPREPAD
               IPVR = IPVL + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD
               IPW = IPVR + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD
               III = N / NB
               IF( III*NB.LT.N )
     $            III = III + 1
               III = 7*III / ILCM( NPROW, NPCOL )
*
*
               LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III )
               LWORK = LWORK + MAX(2*N, (8*ILCM(NPROW,NPCOL)+2)**2 )
*
               IF( CHECK ) THEN
*
*                 Figure the amount of workspace required by the
*                 checking routines PZEVCFCHK and PZLANHS
*
                  WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ),
     $                      DESCA( MB_ )*NQ ) + IPOSTPAD
*
               ELSE
*
                  WORKSIZ = LWORK + IPOSTPAD
*
               END IF
*
*              Check for adequate memory for problem size
*
               IERR( 1 ) = 0
               IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9996 )'Schur reduction',
     $               ( IPW+WORKSIZ )*DBLESZ
                  IERR( 1 ) = 1
               END IF
*
*              Check all processes for an error
*
               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
*
               IF( IERR( 1 ).GT.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9997 )'MEMORY'
                  KSKIP = KSKIP + 1
                  GO TO 10
               END IF
*
*              Generate matrix Z = In
*
               CALL PZLASET( 'All', N, N, ZERO, ONE, MEM( IPZ ), 1, 1,
     $                       DESCZ )
               CALL PZLASET( 'All', N, N, ZERO, ZERO, MEM( IPVR ),1,1,
     $                       DESCZ )
               CALL PZLASET( 'All', N, N, ZERO, ZERO, MEM( IPVL ),1,1,
     $                       DESCZ )
*
*              Generate matrix A upper Hessenberg
*
               CALL PZMATGEN( ICTXT, 'No transpose', 'No transpose',
     $                        DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ),
     $                        DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ),
     $                        DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0,
     $                        NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL )
               CALL PZLASET( 'Lower', MAX( 0, N-1 ), MAX( 0, N-1 ),
     $                       ZERO, ZERO, MEM( IPA ), MIN( N, 2 ), 1,
     $                       DESCA )
*
*              Calculate inf-norm of A for residual error-checking
*
               IF( CHECK ) THEN
                  CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
     $                            DESCA( LLD_ ), IPREPAD, IPOSTPAD,
     $                            PADVAL )
                  CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPVR-IPREPAD ),
     $                            DESCZ( LLD_ ), IPREPAD, IPOSTPAD,
     $                            PADVAL )
                  CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPVL-IPREPAD ),
     $                            DESCZ( LLD_ ), IPREPAD, IPOSTPAD,
     $                            PADVAL )
                  CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPZ-IPREPAD ),
     $                            DESCZ( LLD_ ), IPREPAD, IPOSTPAD,
     $                            PADVAL )
                  CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
     $                            MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  ANORM = PZLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA,
     $                    MEM( IPW ) )
                  CALL PZCHEKPAD( ICTXT, 'PZLANHS', NP, NQ,
     $                            MEM( IPA-IPREPAD ), DESCA( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZLANHS', WORKSIZ-IPOSTPAD, 1,
     $                            MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
     $                            IPREPAD, IPOSTPAD, PADVAL )
*
                  CALL PZFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N,
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ),
     $                            LWORK, IPREPAD, IPOSTPAD, PADVAL )
*
               END IF
*
*              Set eigenvalues from diagonal
*
               DO JJJ = 1, N
                  CALL INFOG2L( JJJ, JJJ, DESCZ, NPROW, NPCOL, MYROW,
     $                       MYCOL, IROW, ICOL, II, JJ )
                  IF( MYROW.EQ.II .AND. MYCOL.EQ.JJ ) THEN
                     MEM( IPWR-1+JJJ ) = MEM( IPA-1+(ICOL-1)*LDA+IROW )
                  ELSE 
                     MEM( IPWR-1+JJJ ) = ZERO
                  ENDIF
               END DO
               CALL ZGSUM2D( CONTXT, 'All', ' ', N, 1, MEM( IPWR ), 1, 
     $                      -1, -1 )
*
               CALL SLBOOT( )
               CALL BLACS_BARRIER( ICTXT, 'All' )
               CALL SLTIMER( 1 )
*
*              Perform eigenvector calculation 
*
               CALL PZTREVC( 'B', 'A', SELECT, N, MEM( IPA ), DESCA,
     $                       MEM( IPVL ), DESCZ, MEM( IPVR ), DESCZ, N,
     $                       M, MEM( IPW ), RWORK, INFO )
*
               CALL SLTIMER( 1 )
*
               IF( INFO.NE.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = * )'PZTREVC INFO=', INFO
                  KFAIL = KFAIL + 1
                  GO TO 10
               END IF
*
               IF( CHECK ) THEN
*
*                 Check for memory overwrite in NEP factorization
*
                  CALL PZCHEKPAD( ICTXT, 'PZTREVC (A)', NP, NQ,
     $                            MEM( IPA-IPREPAD ), DESCA( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZTREVC (VR)', NP, NQ,
     $                            MEM( IPVR-IPREPAD ), DESCZ( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZTREVC (VL)', NP, NQ,
     $                            MEM( IPVL-IPREPAD ), DESCZ( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZTREVC (WR)', N, 1,
     $                            MEM( IPWR-IPREPAD ), N, IPREPAD,
     $                            IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZTREVC (WORK)', LWORK, 1,
     $                            MEM( IPW-IPREPAD ), LWORK, IPREPAD,
     $                            IPOSTPAD, PADVAL )
*
                  CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
     $                            MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
     $                            IPREPAD, IPOSTPAD, PADVAL )
*
*                 Compute || T * Z - Z * D || / ( N*|| T ||*EPS )
*
                  FRESID = 0.0D+0
                  QRESID = 0.0D+0
                  CALL PZGET22( 'N', 'N', 'N', N, MEM( IPA ), DESCA, 
     $                          MEM( IPVR ), DESCZ, MEM( IPWR ), 
     $                          MEM( IPZ ), DESCZ, RWORK, RESULT )
                  FRESID = RESULT( 1 )
                  QRESID = RESULT( 2 )
*
*                 Compute || T^H * L - L * D^H || / ( N*|| T ||*EPS )
*
                  CALL PZGET22( 'C', 'N', 'C', N, MEM( IPA ), DESCA, 
     $                          MEM( IPVL ), DESCZ, MEM( IPWR ), 
     $                          MEM( IPZ ), DESCZ, RWORK, RESULT )
                  FRESID = MAX( FRESID, RESULT( 1 ) )
                  QRESID = MAX( QRESID, RESULT( 2 ) )
*
                  CALL PZCHEKPAD( ICTXT, 'PZGET22 (A)', NP, NQ,
     $                            MEM( IPA-IPREPAD ), DESCA( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZGET22 (VR)', NP, NQ,
     $                            MEM( IPVR-IPREPAD ), DESCZ( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZGET22 (VL)', NP, NQ,
     $                            MEM( IPVL-IPREPAD ), DESCZ( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZGET22 (Z)', NP, NQ,
     $                            MEM( IPZ-IPREPAD ), DESCZ( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
*
*                 Test residual and detect NaN result
*
                  IF( ( FRESID.LE.THRESH ) .AND.
     $                ( ( FRESID-FRESID ).EQ.0.0D+0 ) .AND.
     $                ( QRESID.LE.THRESH ) .AND.
     $                ( ( QRESID-QRESID ).EQ.0.0D+0 ) ) THEN
                     KPASS = KPASS + 1
                     PASSED = 'PASSED'
                  ELSE
                     KFAIL = KFAIL + 1
                     PASSED = 'FAILED'
                     IF( IAM.EQ.0 ) THEN
                        WRITE( NOUT, FMT = 9986 )FRESID
                        WRITE( NOUT, FMT = 9985 )QRESID
                     END IF
                  END IF
*
               ELSE
*
*                 Don't perform the checking, only timing
*
                  KPASS = KPASS + 1
                  FRESID = FRESID - FRESID
                  QRESID = QRESID - QRESID
                  PASSED = 'BYPASS'
*
               END IF
*
*              Gather maximum of all CPU and WALL clock timings
*
               CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME )
               CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME )
*
*              Print results
*
               IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
*
*                 2 N^2 flops for PxTREVC
*
                  NOPS = 2.0D+0*DBLE( N )**2
*
*                 Calculate total megaflops -- eigenvector calc only,
*                 -- for WALL and CPU time, and print output
*
*                 Print WALL time if machine supports it
*
                  IF( WTIME( 1 ).GT.0.0D+0 ) THEN
                     TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 )
                  ELSE
                     TMFLOPS = 0.0D+0
                  END IF
                  IF( WTIME( 1 ).GE.0.0D+0 )
     $               WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW,
     $               NPCOL, WTIME( 1 ), TMFLOPS, PASSED
*
*                 Print CPU time if machine supports it
*
                  IF( CTIME( 1 ).GT.0.0D+0 ) THEN
                     TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 )
                  ELSE
                     TMFLOPS = 0.0D+0
                  END IF
*
                  IF( CTIME( 1 ).GE.0.0D+0 )
     $               WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW,
     $               NPCOL, CTIME( 1 ), TMFLOPS, PASSED
               END IF
*
   10       CONTINUE
*
   20    CONTINUE
*
         CALL BLACS_GRIDEXIT( ICTXT )
*
   30 CONTINUE
*
*     Print ending messages and close output file
*
      IF( IAM.EQ.0 ) THEN
         KTESTS = KPASS + KFAIL + KSKIP
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9992 )KTESTS
         IF( CHECK ) THEN
            WRITE( NOUT, FMT = 9991 )KPASS
            WRITE( NOUT, FMT = 9989 )KFAIL
         ELSE
            WRITE( NOUT, FMT = 9990 )KPASS
         END IF
         WRITE( NOUT, FMT = 9988 )KSKIP
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9987 )
         IF( NOUT.NE.6 .AND. NOUT.NE.0 )
     $      CLOSE ( NOUT )
      END IF
*
      CALL BLACS_EXIT( 0 )
*
 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
     $      '; It should be at least 1' )
 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
     $      I4 )
 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
     $      I11 )
 9995 FORMAT( 'TIME     N  NB    P    Q NEP Time   MFLOPS  CHECK' )
 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' )
 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2,
     $      1X, A6 )
 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
 9990 FORMAT( I5, ' tests completed without checking.' )
 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
 9987 FORMAT( 'END OF TESTS.' )
 9986 FORMAT( '||H*Z - Z*D|| / (||T|| * N * eps) = ', G25.7 )
 9985 FORMAT( 'max_j(max|Z(j)| - 1) / ( N * eps ) ', G25.7 )
*
      STOP
*
*     End of PZEVCDRIVER
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzevcinfo.f'
then
	echo shar: will not over-write existing file "'pzevcinfo.f'"
else
cat << "SHAR_EOF" > 'pzevcinfo.f'
      SUBROUTINE PZEVCINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB,
     $                      NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL,
     $                      LDQVAL, THRESH, WORK, IAM, NPROCS )
*
*     Mark R. Fahey
*     March 2000
*
*     .. Scalar Arguments ..
      CHARACTER*( * )    SUMMRY
      INTEGER            IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS,
     $                   NMAT, NNB, NOUT, NPROCS
      REAL               THRESH
*     ..
*     .. Array Arguments ..
      INTEGER            NBVAL( LDNBVAL ), NVAL( LDNVAL ),
     $                   PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  PZEVCINFO gets needed startup information for PZTREVC driver
*  and transmits it to all processes.
*
*  Arguments
*  =========
*
*  SUMMRY   (global output) CHARACTER*(*)
*           Name of output (summary) file (if any). Only defined for
*           process 0.
*
*  NOUT     (global output) INTEGER
*           The unit number for output file. NOUT = 6, ouput to screen,
*           NOUT = 0, output to stderr.  Only defined for process 0.
*
*  NMAT     (global output) INTEGER
*           The number of different values that can be used for N.
*
*  NVAL     (global output) INTEGER array, dimension (LDNVAL)
*           The values of N (the order of the matrix) to run the code
*           with.
*
*  LDNVAL   (global input) INTEGER
*           The maximum number of different values that can be used for
*           N, LDNVAL > =  NMAT.
*
*  NNB      (global output) INTEGER
*           The number of different values that can be used for NB.
*
*  NBVAL    (global output) INTEGER array, dimension (LDNBVAL)
*           The values of NB (blocksize) to run the code with.
*
*  LDNBVAL  (global input) INTEGER
*           The maximum number of different values that can be used for
*           NB, LDNBVAL >= NNB.
*
*  NGRIDS   (global output) INTEGER
*           The number of different values that can be used for P & Q.
*
*  PVAL     (global output) INTEGER array, dimension (LDPVAL)
*           The values of P (number of process rows) to run the code
*           with.
*
*  LDPVAL   (global input) INTEGER
*           The maximum number of different values that can be used for
*           P, LDPVAL >= NGRIDS.
*
*  QVAL     (global output) INTEGER array, dimension (LDQVAL)
*           The values of Q (number of process columns) to run the code
*           with.
*
*  LDQVAL   (global input) INTEGER
*           The maximum number of different values that can be used for
*           Q, LDQVAL >= NGRIDS.
*
*  THRESH   (global output) REAL
*           Indicates what error checks shall be run and printed out:
*           < 0 : Perform no error checking
*           > 0 : report all residuals greater than THRESH
*
*  WORK     (local workspace) INTEGER array of dimension >=
*           MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all
*           input arrays in order to send info in one message.
*
*  IAM      (local input) INTEGER
*           My process number.
*
*  NPROCS   (global input) INTEGER
*           The total number of processes.
*
*
*  Implemented by:  M. Fahey, June 2000
*
* ======================================================================
*
* Note: For packing the information we assumed that the length in bytes
* ===== of an integer is equal to the length in bytes of a real single
*       precision.
*
* ======================================================================
*
*     .. Parameters ..
      INTEGER            NIN
      PARAMETER          ( NIN = 11 )
*     ..
*     .. Local Scalars ..
      CHARACTER*79       USRINFO
      INTEGER            I, ICTXT
      DOUBLE PRECISION   EPS
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT,
     $                   BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D,
     $                   IGEBS2D, SGEBR2D, SGEBS2D
*     ..
*     .. External Functions ..
      DOUBLE PRECISION   PDLAMCH
      EXTERNAL           PDLAMCH
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
*     ..
*     .. Executable Statements ..
*
*     Process 0 reads the input data, broadcasts to other processes and
*     writes needed information to NOUT
*
      IF( IAM.EQ.0 ) THEN
*
*        Open file and skip data file header
*
         OPEN( NIN, FILE = 'EVC.dat', STATUS = 'OLD' )
         READ( NIN, FMT = * )SUMMRY
         SUMMRY = ' '
*
*        Read in user-supplied info about machine type, compiler, etc.
*
         READ( NIN, FMT = 9999 )USRINFO
*
*        Read name and unit number for summary output file
*
         READ( NIN, FMT = * )SUMMRY
         READ( NIN, FMT = * )NOUT
         IF( NOUT.NE.0 .AND. NOUT.NE.6 )
     $      OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
*
*        Read and check the parameter values for the tests.
*
*        Get number of matrices and their dimensions
*
         READ( NIN, FMT = * )NMAT
         IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'N', LDNVAL
            GO TO 30
         END IF
         READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT )
*
*        Get values of NB
*
         READ( NIN, FMT = * )NNB
         IF( NNB.GT.LDNBVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL
            GO TO 30
         END IF
         READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB )
*
         DO 10 I = 1, NNB
            IF( NBVAL( I ).LT.6 ) THEN
               WRITE( NOUT, FMT = 9992 )NBVAL( I )
               GO TO 30
            END IF
   10    CONTINUE
*
*        Get number of grids
*
         READ( NIN, FMT = * )NGRIDS
         IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL
            GO TO 30
         ELSE IF( NGRIDS.GT.LDQVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL
            GO TO 30
         END IF
*
*        Get values of P and Q
*
         READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS )
         READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS )
*
*        Get level of checking
*
         READ( NIN, FMT = * )THRESH
*
*        Close input file
*
         CLOSE ( NIN )
*
*        For pvm only: if virtual machine not set up, allocate it and
*        spawn the correct number of processes.
*
         IF( NPROCS.LT.1 ) THEN
            NPROCS = 0
            DO 20 I = 1, NGRIDS
               NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) )
   20       CONTINUE
            CALL BLACS_SETUP( IAM, NPROCS )
         END IF
*
*        Temporarily define blacs grid to include all processes so
*        information can be broadcast to all processes
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
*        Compute machine epsilon
*
         EPS = PDLAMCH( ICTXT, 'eps' )
*
*        Pack information arrays and broadcast
*
         CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 )
*
         WORK( 1 ) = NMAT
         WORK( 2 ) = NNB
         WORK( 3 ) = NGRIDS
         CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 )
*
         I = 1
         CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 )
         I = I + NMAT
         CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 )
         I = I + NNB
         CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 )
         I = I + NGRIDS
         CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 )
         I = I + NGRIDS - 1
         CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I )
*
*        regurgitate input
*
         WRITE( NOUT, FMT = 9999 )
     $      'ScaLAPACK nonsymmetric eigenvector calculation.'
         WRITE( NOUT, FMT = 9999 )USRINFO
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' //
     $      'complex double precision eigenvector calculation.'
         WRITE( NOUT, FMT = 9999 )'The following scaled residual ' //
     $      'checks will be computed:'
         WRITE( NOUT, FMT = 9999 )
     $      ' Residual  = max( ||T*R-R*D||/(||H||*eps*N)' //
     $      ' , ||T^H*L-L*D^H||/(||H||*eps*N) )'
         WRITE( NOUT, FMT = 9999 )
     $      ' Normalization residual = max(max_j(max|R(j)|-1),' //
     $      ' max_j(max|L(j)|-1))/(eps*N)'
         WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' //
     $      'generated for each test.'
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9999 )'An explanation of the input/output '
     $       // 'parameters follows:'
         WRITE( NOUT, FMT = 9999 )
     $      'TIME    : Indicates whether WALL or ' //
     $      'CPU time was used.'
*
         WRITE( NOUT, FMT = 9999 )
     $      'N       : The number of columns in the ' // 'matrix A.'
         WRITE( NOUT, FMT = 9999 )
     $      'NB      : The size of the square blocks the' //
     $      ' matrix A is split into.'
         WRITE( NOUT, FMT = 9999 )
     $      'P       : The number of process rows.'
         WRITE( NOUT, FMT = 9999 )
     $      'Q       : The number of process columns.'
         WRITE( NOUT, FMT = 9999 )
     $      'THRESH  : If a residual value is less than' //
     $      ' THRESH, CHECK is flagged as PASSED'
         WRITE( NOUT, FMT = 9999 )
     $      'NEP time : Time in seconds to decompose the ' // ' matrix'
         WRITE( NOUT, FMT = 9999 )'MFLOPS  : Rate of execution '
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9999 )
     $      'The following parameter values will be used:'
         WRITE( NOUT, FMT = 9996 )'N    ',
     $      ( NVAL( I ), I = 1, MIN( NMAT, 10 ) )
         IF( NMAT.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT )
         WRITE( NOUT, FMT = 9996 )'NB   ',
     $      ( NBVAL( I ), I = 1, MIN( NNB, 10 ) )
         IF( NNB.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB )
         WRITE( NOUT, FMT = 9996 )'P    ',
     $      ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) )
         IF( NGRIDS.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS )
         WRITE( NOUT, FMT = 9996 )'Q    ',
     $      ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) )
         IF( NGRIDS.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS )
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9995 )EPS
         WRITE( NOUT, FMT = 9998 )THRESH
*
      ELSE
*
*        If in pvm, must participate setting up virtual machine
*
         IF( NPROCS.LT.1 )
     $      CALL BLACS_SETUP( IAM, NPROCS )
*
*        Temporarily define blacs grid to include all processes so
*        information can be broadcast to all processes
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
*        Compute machine epsilon
*
         EPS = PDLAMCH( ICTXT, 'eps' )
*
         CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 )
         CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 )
         NMAT = WORK( 1 )
         NNB = WORK( 2 )
         NGRIDS = WORK( 3 )
*
         I = NMAT + NNB + 2*NGRIDS
         CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 )
         I = 1
         CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 )
         I = I + NMAT
         CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 )
         I = I + NNB
         CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 )
         I = I + NGRIDS
         CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 )
*
      END IF
*
      CALL BLACS_GRIDEXIT( ICTXT )
*
      RETURN
*
   30 CONTINUE
      WRITE( NOUT, FMT = 9993 )
      CLOSE ( NIN )
      IF( NOUT.NE.6 .AND. NOUT.NE.0 )
     $   CLOSE ( NOUT )
      CALL BLACS_ABORT( ICTXT, 1 )
*
      STOP
*
 9999 FORMAT( A )
 9998 FORMAT( 'Routines pass computational tests if scaled residual ',
     $      'is less than ', G12.5 )
 9997 FORMAT( '                 ', 10I6 )
 9996 FORMAT( 2X, A5, '   :        ', 10I6 )
 9995 FORMAT( 'Relative machine precision (eps) is taken to be ',
     $      E18.6 )
 9994 FORMAT( ' Number of values of ', 5A,
     $      ' is less than 1 or greater ', 'than ', I2 )
 9993 FORMAT( ' Illegal input in file ', 40A, '.  Aborting run.' )
 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' )
*
*     End of PZEVCINFO
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pzget22.f'
then
	echo shar: will not over-write existing file "'pzget22.f'"
else
cat << "SHAR_EOF" > 'pzget22.f'
      SUBROUTINE PZGET22( TRANSA, TRANSE, TRANSW, N, A, DESCA, E, 
     $                    DESCE, W, WORK, DESCW, RWORK, RESULT )
*
*  Mark R. Fahey
*  June 23, 2000
*
*     .. Scalar Arguments ..
      CHARACTER          TRANSA, TRANSE, TRANSW
      INTEGER            N
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCE( * ), DESCW( * )
      DOUBLE PRECISION   RESULT( 2 ), RWORK( * )
      COMPLEX*16         A( * ), E( * ), W( * ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  PZGET22 does an eigenvector check.
*
*  The basic test is:
*
*     RESULT(1) = | A E  -  E W | / ( |A| |E| ulp )
*
*  using the 1-norm.  It also tests the normalization of E:
*
*     RESULT(2) = max | m-norm(E(j)) - 1 | / ( n ulp )
*                  j
*
*  where E(j) is the j-th eigenvector, and m-norm is the max-norm of a
*  vector.  The max-norm of a complex n-vector x in this case is the
*  maximum of |re(x(i)| + |im(x(i)| over i = 1, ..., n.
*
*  Arguments
*  ==========
*
*  TRANSA  (input) CHARACTER*1
*          Specifies whether or not A is transposed.
*          = 'N':  No transpose
*          = 'T':  Transpose
*          = 'C':  Conjugate transpose
*
*  TRANSE  (input) CHARACTER*1
*          Specifies whether or not E is transposed.
*          = 'N':  No transpose, eigenvectors are in columns of E
*          = 'T':  Transpose, eigenvectors are in rows of E
*          = 'C':  Conjugate transpose, eigenvectors are in rows of E
*
*  TRANSW  (input) CHARACTER*1
*          Specifies whether or not W is transposed.
*          = 'N':  No transpose
*          = 'T':  Transpose, same as TRANSW = 'N'
*          = 'C':  Conjugate transpose, use -WI(j) instead of WI(j)
*
*  N       (input) INTEGER
*          The order of the matrix A.  N >= 0.
*
*  A       (input) COMPLEX*16 array, dimension (*)
*          The matrix whose eigenvectors are in E.
*
*  DESCA   (input) INTEGER array, dimension(*)
*
*  E       (input) COMPLEX*16 array, dimension (*)
*          The matrix of eigenvectors. If TRANSE = 'N', the eigenvectors
*          are stored in the columns of E, if TRANSE = 'T' or 'C', the
*          eigenvectors are stored in the rows of E.
*
*  DESCE   (input) INTEGER array, dimension(*)
*
*  W       (input) COMPLEX*16 array, dimension (N)
*          The eigenvalues of A.
*
*  WORK    (workspace) COMPLEX*16 array, dimension (*)
*  DESCW   (input) INTEGER array, dimension(*)
*
*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
*
*  RESULT  (output) DOUBLE PRECISION array, dimension (2)
*          RESULT(1) = | A E  -  E W | / ( |A| |E| ulp )
*          RESULT(2) = max | m-norm(E(j)) - 1 | / ( n ulp )
*                       j
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
     $                   MB_, NB_, RSRC_, CSRC_, LLD_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                   CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                   RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      DOUBLE PRECISION   ZERO, ONE
      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
      COMPLEX*16         CZERO, CONE
      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
*     ..
*     .. Local Scalars ..
      CHARACTER          NORMA, NORME
      INTEGER            ITRNSE, ITRNSW, J, JCOL, JROW, JVEC
      DOUBLE PRECISION   ANORM, ENORM, ENRMAX, ENRMIN, ERRNRM, TEMP1,
     $                   TEMP2, ULP, UNFL
      COMPLEX*16         CDUM, WTEMP
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      DOUBLE PRECISION   PDLAMCH, PZLANGE
      EXTERNAL           LSAME, PDLAMCH, PZLANGE
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DESCINIT, DGAMX2D, DGAMN2D,
     $                   PZAXPY, PZGEMM, PZLASET
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN
*     ..
*     .. Statement Functions ..
      DOUBLE PRECISION   CABS1
*     ..
*     .. Statement Function definitions ..
      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
*     ..
*     .. Executable Statements ..
*
*     Initialize RESULT (in case N=0)
*
      RESULT( 1 ) = ZERO
      RESULT( 2 ) = ZERO
      IF( N.LE.0 )
     $   RETURN
*
      CONTXT = DESCA( CTXT_ )
      RSRC = DESCA( RSRC_ )
      CSRC = DESCA( CSRC_ )
      NB = DESCA( NB_ )
      MB = DESCA( MB_ )
      LDA = DESCA( LLD_ )
      LDE = DESCE( LLD_ )
      LDW = DESCW( LLD_ )
      CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL )
*
      UNFL = PDLAMCH( CONTXT, 'Safe minimum' )
      ULP = PDLAMCH( CONTXT, 'Precision' )
*
      ITRNSE = 0
      ITRNSW = 0
      NORMA = 'O'
      NORME = 'O'
*
      IF( LSAME( TRANSA, 'T' ) .OR. LSAME( TRANSA, 'C' ) ) THEN
         NORMA = 'I'
      END IF
*
      IF( LSAME( TRANSE, 'T' ) ) THEN
         ITRNSE = 1
         NORME = 'I'
      ELSE IF( LSAME( TRANSE, 'C' ) ) THEN
         ITRNSE = 2
         NORME = 'I'
      END IF
*
      IF( LSAME( TRANSW, 'C' ) ) THEN
         ITRNSW = 1
      END IF
*
*     Normalization of E:
*
      ENRMIN = ONE / ULP
      ENRMAX = ZERO
      IF( ITRNSE.EQ.0 ) THEN
         DO 20 JVEC = 1, N
            TEMP1 = ZERO
            DO 10 J = 1, N
               CALL INFOG2L( J, JVEC, DESCE, NPROW, NPCOL, MYROW, 
     $                       MYCOL, IROW, ICOL, II, JJ )
               IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
                  TEMP1 = MAX( TEMP1, CABS1( E( (ICOL-1)*LDE+IROW )))
               ENDIF
   10       CONTINUE
            IF( MYCOL.EQ.JJ ) THEN
            CALL DGAMX2D( CONTXT, 'Col', ' ', 1, 1, TEMP1, 1, RA, CA,
     $                   -1, -1, -1 )
            ENRMIN = MIN( ENRMIN, TEMP1 )
            ENRMAX = MAX( ENRMAX, TEMP1 )
            ENDIF
   20    CONTINUE
         CALL DGAMX2D( CONTXT, 'Row', ' ', 1, 1, ENRMAX, 1, RA, CA,
     $                 -1, -1, -1 )
         CALL DGAMN2D( CONTXT, 'Row', ' ', 1, 1, ENRMIN, 1, RA, CA,
     $                 -1, -1, -1 )
      ELSE
         DO 50 J = 1, N
            TEMP1 = ZERO
            DO 40 JVEC = 1, N
               CALL INFOG2L( J, JVEC, DESCE, NPROW, NPCOL, MYROW, 
     $                       MYCOL, IROW, ICOL, II, JJ )
               IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
                  TEMP1 = MAX( TEMP1, CABS1( E( (ICOL-1)*LDE+IROW )))
               ENDIF
   40       CONTINUE
            IF( MYROW.EQ.II ) THEN
            CALL DGAMX2D( CONTXT, 'Row', ' ', 1, 1, TEMP1, 1, RA, CA,
     $                    -1, -1, -1 )
            ENRMIN = MIN( ENRMIN, TEMP1 )
            ENRMAX = MAX( ENRMAX, TEMP1 )
            ENDIF
   50    CONTINUE
         CALL DGAMX2D( CONTXT, 'Row', ' ', 1, 1, ENRMAX, 1, RA, CA,
     $                 -1, -1, -1 )
         CALL DGAMN2D( CONTXT, 'Row', ' ', 1, 1, ENRMIN, 1, RA, CA,
     $                 -1, -1, -1 )
      END IF
*
*     Norm of A:
*
      ANORM = MAX( PZLANGE( NORMA, N, N, A, 1, 1, DESCA, RWORK ), 
     $             UNFL )
*
*     Norm of E:
*
      ENORM = MAX( PZLANGE( NORME, N, N, E, 1, 1, DESCE, RWORK ), 
     $             ULP )
*
*     Norm of error:
*
*     Error =  AE - EW
*
      CALL PZLASET( 'Full', N, N, CZERO, CZERO, WORK, 1, 1, DESCW )
*
      DO 100 JCOL = 1, N
         IF( ITRNSW.EQ.0 ) THEN
            WTEMP = W( JCOL )
         ELSE
            WTEMP = DCONJG( W( JCOL ) )
         END IF
*
         IF( ITRNSE.EQ.0 ) THEN
            CALL PZAXPY( N, WTEMP, E, 1, JCOL, DESCE, 1, WORK, 1, 
     $                   JCOL, DESCW, 1 )
         ELSE IF( ITRNSE.EQ.1 ) THEN
            CALL PZAXPY( N, WTEMP, E, JCOL, 1, DESCE, N, WORK, 1, 
     $                   JCOL, DESCW, 1 )
         ELSE
            CALL PZAXPY( N, DCONJG( WTEMP ), E, JCOL, 1, DESCE, N, 
     $                   WORK, 1, JCOL, DESCW, 1 )
            DO JROW = 1, N
               CALL INFOG2L( JROW, JCOL, DESCW, NPROW, NPCOL, MYROW, 
     $                       MYCOL, IROW, ICOL, II, JJ )
               IF( ( MYROW.EQ.II ) .AND. ( MYCOL.EQ.JJ ) ) THEN
                  WORK( (JCOL-1)*LDW+JROW ) = DCONJG( WORK( (JCOL-1)
     $                *LDW+JROW ) )
               ENDIF
            END DO
         END IF
  100 CONTINUE
*
      CALL PZGEMM( TRANSA, TRANSE, N, N, N, CONE, A, 1, 1, DESCA, E, 
     $             1, 1, DESCE, -CONE, WORK, 1, 1, DESCW )
*
      ERRNRM = PZLANGE( 'One', N, N, WORK, 1, 1, DESCW, RWORK ) 
     $                 / ENORM
*
*     Compute RESULT(1) (avoiding under/overflow)
*
      IF( ANORM.GT.ERRNRM ) THEN
         RESULT( 1 ) = ( ERRNRM / ANORM ) / ULP
      ELSE
         IF( ANORM.LT.ONE ) THEN
            RESULT( 1 ) = ( MIN( ERRNRM, ANORM ) / ANORM ) / ULP
         ELSE
            RESULT( 1 ) = MIN( ERRNRM / ANORM, ONE ) / ULP
         END IF
      END IF
*
*     Compute RESULT(2) : the normalization error in E.
*
      RESULT( 2 ) = MAX( ABS( ENRMAX-ONE ), ABS( ENRMIN-ONE ) ) /
     $              ( DBLE( N )*ULP )
*
      RETURN
*
*     End of PZGET22
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pznepdriver.f'
then
	echo shar: will not over-write existing file "'pznepdriver.f'"
else
cat << "SHAR_EOF" > 'pznepdriver.f'
      PROGRAM PZNEPDRIVER
*
*     Mark R. Fahey
*     March 2000
*
*  Purpose
*  =======
*
*  PZNEPDRIVER is the main test program for the COMPLEX*16 
*  SCALAPACK NEP routines.  This test driver performs a Schur
*  decomposition followed by residual check of a Hessenberg matrix.
*
*  The program must be driven by a short data file.  An annotated
*  example of a data file can be obtained by deleting the first 3
*  characters from the following 18 lines:
*  'SCALAPACK, Version 1.4, NEP (Nonsymmetric EigenProblem) input file'
*  'Intel iPSC/860 hypercube, gamma model.'
*  'NEP.out'            output file name (if any)
*  6                    device out
*  8                    number of problems sizes
*  1 2 3 4 6 10 100 200 vales of N
*  3                    number of NB's
*  6 20 40              values of NB
*  4                    number of process grids (ordered pairs of P & Q)
*  1 2 1 4              values of P
*  1 2 4 1              values of Q
*  20.0                 threshold
*
*  Internal Parameters
*  ===================
*
*  TOTMEM   INTEGER, default = 4000000
*           TOTMEM is a machine-specific parameter indicating the
*           maximum amount of available memory in bytes.
*           The user should customize TOTMEM to his platform.  Remember
*           to leave room in memory for the operating system, the BLACS
*           buffer, etc.  For example, on a system with 8 MB of memory
*           per process (e.g., one processor on an Intel iPSC/860), the
*           parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
*           code, BLACS buffer, etc).  However, for PVM, we usually set
*           TOTMEM = 2000000.  Some experimenting with the maximum value
*           of TOTMEM may be required.
*
*  DBLESZ   INTEGER, default = 8 bytes.
*           DBLESZ indicate the length in bytes on the given platform
*           for a double precision real.
*  MEM      COMPLEX*16 array, dimension ( TOTMEM / DBLESZ )
*
*           All arrays used by SCALAPACK routines are allocated from
*           this array and referenced by pointers.  The integer IPA,
*           for example, is a pointer to the starting element of MEM for
*           the matrix A.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      INTEGER            DBLESZ, TOTMEM, MEMSIZ, NTESTS
      COMPLEX*16         PADVAL, ZERO, ONE
      PARAMETER          ( DBLESZ = 16, TOTMEM = 200000000,
     $                   MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20 )
      PARAMETER          ( PADVAL = ( -9923.0D+0, -9923.0D+0 ),
     $                     ZERO = ( 0.0D+0, 0.0D+0 ),
     $                     ONE = ( 1.0D+0, 0.0D+0 ) )
*     ..
*     .. Local Scalars ..
      LOGICAL            CHECK
      CHARACTER*6        PASSED
      CHARACTER*80       OUTFILE
      INTEGER            I, IAM, IASEED, ICTXT, III, IMIDPAD, INFO, IPA,
     $                   IPOSTPAD, IPREPAD, IPW, IPWR, IPZ, J, K,
     $                   KFAIL, KPASS, KSKIP, KTESTS, LDA, LDZ, LWORK,
     $                   MYCOL, MYROW, N, NB, NGRIDS, NMAT, NNB, NOUT,
     $                   NP, NPCOL, NPROCS, NPROW, NQ, WORKSIZ
      REAL               THRESH
      DOUBLE PRECISION   ANORM, FRESID, NOPS, QRESID, TMFLOPS, ZNORM
*     ..
*     .. Local Arrays ..
      INTEGER            DESCA( DLEN_ ), DESCZ( DLEN_ ), IERR( 2 ),
     $                   IDUM( 1 ), NBVAL( NTESTS ), NVAL( NTESTS ),
     $                   PVAL( NTESTS ), QVAL( NTESTS )
      DOUBLE PRECISION   CTIME( 2 ), WTIME( 2 ), RWORK( 5000 )
      COMPLEX*16         MEM( MEMSIZ )
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_BARRIER, BLACS_EXIT, BLACS_GET,
     $                   BLACS_GRIDEXIT, BLACS_GRIDINFO, BLACS_GRIDINIT,
     $                   BLACS_PINFO, DESCINIT, IGSUM2D, PZCHEKPAD,
     $                   PZFILLPAD, PZGEMM, PZLAHQR, PZLASET, PZMATGEN,
     $                   PZNEPFCHK, PZNEPINFO, SLBOOT, SLCOMBINE,
     $                   SLTIMER
*     ..
*     .. External Functions ..
      INTEGER            ILCM, NUMROC
      DOUBLE PRECISION   PDLAMCH, PZLANGE, PZLANHS
      EXTERNAL           ILCM, NUMROC, PDLAMCH, PZLANGE, PZLANHS
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, MAX, MIN
*     ..
*     .. Data statements ..
      DATA               KFAIL, KPASS, KSKIP, KTESTS / 4*0 /
*     ..
*     .. Executable Statements ..
*
*     Get starting information
*
      CALL BLACS_PINFO( IAM, NPROCS )
      IASEED = 100
      CALL PZNEPINFO( OUTFILE, NOUT, NMAT, NVAL, NTESTS, NNB, NBVAL,
     $                NTESTS, NGRIDS, PVAL, NTESTS, QVAL, NTESTS,
     $                THRESH, MEM, IAM, NPROCS )
      CHECK = ( THRESH.GE.0.0E+0 )
*
*     Print headings
*
      IF( IAM.EQ.0 ) THEN
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9995 )
         WRITE( NOUT, FMT = 9994 )
         WRITE( NOUT, FMT = * )
      END IF
*
*     Loop over different process grids
*
      DO 30 I = 1, NGRIDS
*
         NPROW = PVAL( I )
         NPCOL = QVAL( I )
*
*        Make sure grid information is correct
*
         IERR( 1 ) = 0
         IF( NPROW.LT.1 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9999 )'GRID', 'nprow', NPROW
            IERR( 1 ) = 1
         ELSE IF( NPCOL.LT.1 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9999 )'GRID', 'npcol', NPCOL
            IERR( 1 ) = 1
         ELSE IF( NPROW*NPCOL.GT.NPROCS ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9998 )NPROW*NPCOL, NPROCS
            IERR( 1 ) = 1
         END IF
*
         IF( IERR( 1 ).GT.0 ) THEN
            IF( IAM.EQ.0 )
     $         WRITE( NOUT, FMT = 9997 )'grid'
            KSKIP = KSKIP + 1
            GO TO 30
         END IF
*
*        Define process grid
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
         CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*        Go to bottom of process grid loop if this case doesn't use my
*        process
*
         IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL )
     $      GO TO 30
*
         DO 20 J = 1, NMAT
*
            N = NVAL( J )
*
*           Make sure matrix information is correct
*
            IERR( 1 ) = 0
            IF( N.LT.1 ) THEN
               IF( IAM.EQ.0 )
     $            WRITE( NOUT, FMT = 9999 )'MATRIX', 'N', N
               IERR( 1 ) = 1
            END IF
*
*           Check all processes for an error
*
            CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
*
            IF( IERR( 1 ).GT.0 ) THEN
               IF( IAM.EQ.0 )
     $            WRITE( NOUT, FMT = 9997 )'matrix'
               KSKIP = KSKIP + 1
               GO TO 20
            END IF
*
            DO 10 K = 1, NNB
*
               NB = NBVAL( K )
*
*              Make sure nb is legal
*
               IERR( 1 ) = 0
               IF( NB.LT.6 ) THEN
                  IERR( 1 ) = 1
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9999 )'NB', 'NB', NB
               END IF
*
*              Check all processes for an error
*
               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
*
               IF( IERR( 1 ).GT.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9997 )'NB'
                  KSKIP = KSKIP + 1
                  GO TO 10
               END IF
*
*              Padding constants
*
               NP = NUMROC( N, NB, MYROW, 0, NPROW )
               NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
               IF( CHECK ) THEN
                  IPREPAD = MAX( NB, NP )
                  IMIDPAD = NB
                  IPOSTPAD = MAX( NB, NQ )
                  IPREPAD = IPREPAD + 1000
                  IMIDPAD = IMIDPAD + 1000
                  IPOSTPAD = IPOSTPAD + 1000                  
               ELSE
                  IPREPAD = 0
                  IMIDPAD = 0
                  IPOSTPAD = 0
               END IF
*
*              Initialize the array descriptor for the matrix A
*
               CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT,
     $                        MAX( 1, NP )+IMIDPAD, IERR( 1 ) )
*
*              Initialize the array descriptor for the matrix Z
*
               CALL DESCINIT( DESCZ, N, N, NB, NB, 0, 0, ICTXT,
     $                        MAX( 1, NP )+IMIDPAD, IERR( 2 ) )
*
               LDA = DESCA( LLD_ )
               LDZ = DESCZ( LLD_ )
               LDWORK = DESCZ( LLD_ )
*
*              Check all processes for an error
*
               CALL IGSUM2D( ICTXT, 'All', ' ', 2, 1, IERR, 2, -1, 0 )
*
               IF( IERR( 1 ).LT.0 .OR. IERR( 2 ).LT.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9997 )'descriptor'
                  KSKIP = KSKIP + 1
                  GO TO 10
               END IF
*
*              Assign pointers into MEM for SCALAPACK arrays, A is
*              allocated starting at position MEM( IPREPAD+1 )
*
               IPA = IPREPAD + 1
               IPZ = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD
               IPWR = IPZ + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD
               IPW = IPWR + DESCZ( LLD_ )*NQ + IPOSTPAD + IPREPAD
               III = N / NB
               IF( III*NB.LT.N )
     $            III = III + 1
               III = 7*III / ILCM( NPROW, NPCOL )
*
*
               LWORK = 3*N + MAX( 2*MAX( LDA, LDZ )+2*NQ, III )
               LWORK = LWORK + MAX(2*N, (8*ILCM(NPROW,NPCOL)+2)**2 )
*
               IF( CHECK ) THEN
*
*                 Figure the amount of workspace required by the
*                 checking routines PZNEPFCHK and PZLANHS
*
                  WORKSIZ = LWORK + MAX( NP*DESCA( NB_ ),
     $                      DESCA( MB_ )*NQ ) + IPOSTPAD
*
               ELSE
*
                  WORKSIZ = LWORK + IPOSTPAD
*
               END IF
*
*              Check for adequate memory for problem size
*
               IERR( 1 ) = 0
               IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9996 )'Schur reduction',
     $               ( IPW+WORKSIZ )*DBLESZ
                  IERR( 1 ) = 1
               END IF
*
*              Check all processes for an error
*
               CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, IERR, 1, -1, 0 )
*
               IF( IERR( 1 ).GT.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = 9997 )'MEMORY'
                  KSKIP = KSKIP + 1
                  GO TO 10
               END IF
*
*              Generate matrix Z = In
*
               CALL PZLASET( 'All', N, N, ZERO, ONE, MEM( IPZ ), 1, 1,
     $                       DESCZ )
*
*              Generate matrix A upper Hessenberg
*
               CALL PZMATGEN( ICTXT, 'No transpose', 'No transpose',
     $                        DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ),
     $                        DESCA( NB_ ), MEM( IPA ), DESCA( LLD_ ),
     $                        DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, 0,
     $                        NP, 0, NQ, MYROW, MYCOL, NPROW, NPCOL )
               CALL PZLASET( 'Lower', MAX( 0, N-2 ), MAX( 0, N-2 ),
     $                       ZERO, ZERO, MEM( IPA ), MIN( N, 3 ), 1,
     $                       DESCA )
*
*              Calculate inf-norm of A for residual error-checking
*
               IF( CHECK ) THEN
                  CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
     $                            DESCA( LLD_ ), IPREPAD, IPOSTPAD,
     $                            PADVAL )
                  CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPZ-IPREPAD ),
     $                            DESCZ( LLD_ ), IPREPAD, IPOSTPAD,
     $                            PADVAL )
                  CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
     $                            MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  ANORM = PZLANHS( 'I', N, MEM( IPA ), 1, 1, DESCA,
     $                    MEM( IPW ) )
                  CALL PZCHEKPAD( ICTXT, 'PZLANHS', NP, NQ,
     $                            MEM( IPA-IPREPAD ), DESCA( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZLANHS', WORKSIZ-IPOSTPAD, 1,
     $                            MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
     $                            IPREPAD, IPOSTPAD, PADVAL )
*
                  CALL PZFILLPAD( ICTXT, N, 1, MEM( IPWR-IPREPAD ), N,
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZFILLPAD( ICTXT, LWORK, 1, MEM( IPW-IPREPAD ),
     $                            LWORK, IPREPAD, IPOSTPAD, PADVAL )
*
               END IF
*
               CALL SLBOOT( )
               CALL BLACS_BARRIER( ICTXT, 'All' )
               CALL SLTIMER( 1 )
*
*              Perform NEP factorization
*
               CALL PZLAHQR( .TRUE., .TRUE., N, 1, N, MEM( IPA ), DESCA,
     $                       MEM( IPWR ), 1, N, MEM( IPZ ),
     $                       DESCZ, MEM( IPW ), LWORK, IDUM, 0, INFO )
*
               CALL SLTIMER( 1 )
*
               IF( INFO.NE.0 ) THEN
                  IF( IAM.EQ.0 )
     $               WRITE( NOUT, FMT = * )'PZLAHQR INFO=', INFO
                  KFAIL = KFAIL + 1
                  GO TO 10
               END IF
*
               IF( CHECK ) THEN
*
*                 Check for memory overwrite in NEP factorization
*
                  CALL PZCHEKPAD( ICTXT, 'PZLAHQR (A)', NP, NQ,
     $                            MEM( IPA-IPREPAD ), DESCA( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZLAHQR (Z)', NP, NQ,
     $                            MEM( IPZ-IPREPAD ), DESCZ( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZLAHQR (WR)', N, 1,
     $                            MEM( IPWR-IPREPAD ), N, IPREPAD,
     $                            IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZLAHQR (WORK)', LWORK, 1,
     $                            MEM( IPW-IPREPAD ), LWORK, IPREPAD,
     $                            IPOSTPAD, PADVAL )
*
                  CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
     $                            MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
     $                            IPREPAD, IPOSTPAD, PADVAL )
*
*                 Compute || Z * H * Z**T - H0 || / ( N*|| H0 ||*EPS )
*
                  CALL PZNEPFCHK( N, MEM( IPA ), 1, 1, DESCA, IASEED,
     $                            MEM( IPZ ), 1, 1, DESCZ, ANORM,
     $                            FRESID, MEM( IPW ) )
*
                  CALL PZCHEKPAD( ICTXT, 'PZNEPFCHK (A)', NP, NQ,
     $                            MEM( IPA-IPREPAD ), DESCA( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZNEPFCHK (Z)', NP, NQ,
     $                            MEM( IPZ-IPREPAD ), DESCZ( LLD_ ),
     $                            IPREPAD, IPOSTPAD, PADVAL )
                  CALL PZCHEKPAD( ICTXT, 'PZNEPFCHK (WORK)',
     $                            WORKSIZ-IPOSTPAD, 1,
     $                            MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
     $                            IPREPAD, IPOSTPAD, PADVAL )
*
*                 Compute || (Z**T)*Z - In ||_1
*
                  CALL PZLASET( 'All', N, N, ZERO, ONE, MEM( IPA ), 1,
     $                          1, DESCA )
                  CALL PZGEMM( 'Cong Tran', 'No transpose', N, N, N,
     $                         -ONE, MEM( IPZ ), 1, 1, DESCZ,
     $                         MEM( IPZ ), 1, 1, DESCZ, ONE, MEM( IPA ),
     $                         1, 1, DESCA )
                  ZNORM = PZLANGE( '1', N, N, MEM( IPA ), 1, 1, DESCA,
     $                    MEM( IPW ) )
                  QRESID = ZNORM / ( DBLE( N )*PDLAMCH( ICTXT, 'P' ) )
*
*                 Test residual and detect NaN result
*
                  IF( ( FRESID.LE.THRESH ) .AND.
     $                ( ( FRESID-FRESID ).EQ.0.0D+0 ) .AND.
     $                ( QRESID.LE.THRESH ) .AND.
     $                ( ( QRESID-QRESID ).EQ.0.0D+0 ) ) THEN
                     KPASS = KPASS + 1
                     PASSED = 'PASSED'
                  ELSE
                     KFAIL = KFAIL + 1
                     PASSED = 'FAILED'
                     IF( IAM.EQ.0 ) THEN
                        WRITE( NOUT, FMT = 9986 )FRESID
                        WRITE( NOUT, FMT = 9985 )QRESID
                     END IF
                  END IF
*
               ELSE
*
*                 Don't perform the checking, only timing
*
                  KPASS = KPASS + 1
                  FRESID = FRESID - FRESID
                  QRESID = QRESID - QRESID
                  PASSED = 'BYPASS'
*
               END IF
*
*              Gather maximum of all CPU and WALL clock timings
*
               CALL SLCOMBINE( ICTXT, 'All', '>', 'W', 1, 1, WTIME )
               CALL SLCOMBINE( ICTXT, 'All', '>', 'C', 1, 1, CTIME )
*
*              Print results
*
               IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
*
*                 18 N^3 flops for PxLAHQR
*
                  NOPS = 18.0D+0*DBLE( N )**3
*
*                 Calculate total megaflops -- factorization only,
*                 -- for WALL and CPU time, and print output
*
*                 Print WALL time if machine supports it
*
                  IF( WTIME( 1 ).GT.0.0D+0 ) THEN
                     TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 )
                  ELSE
                     TMFLOPS = 0.0D+0
                  END IF
                  IF( WTIME( 1 ).GE.0.0D+0 )
     $               WRITE( NOUT, FMT = 9993 )'WALL', N, NB, NPROW,
     $               NPCOL, WTIME( 1 ), TMFLOPS, PASSED
*
*                 Print CPU time if machine supports it
*
                  IF( CTIME( 1 ).GT.0.0D+0 ) THEN
                     TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 )
                  ELSE
                     TMFLOPS = 0.0D+0
                  END IF
*
                  IF( CTIME( 1 ).GE.0.0D+0 )
     $               WRITE( NOUT, FMT = 9993 )'CPU ', N, NB, NPROW,
     $               NPCOL, CTIME( 1 ), TMFLOPS, PASSED
               END IF
*
   10       CONTINUE
*
   20    CONTINUE
*
         CALL BLACS_GRIDEXIT( ICTXT )
*
   30 CONTINUE
*
*     Print ending messages and close output file
*
      IF( IAM.EQ.0 ) THEN
         KTESTS = KPASS + KFAIL + KSKIP
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9992 )KTESTS
         IF( CHECK ) THEN
            WRITE( NOUT, FMT = 9991 )KPASS
            WRITE( NOUT, FMT = 9989 )KFAIL
         ELSE
            WRITE( NOUT, FMT = 9990 )KPASS
         END IF
         WRITE( NOUT, FMT = 9988 )KSKIP
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9987 )
         IF( NOUT.NE.6 .AND. NOUT.NE.0 )
     $      CLOSE ( NOUT )
      END IF
*
      CALL BLACS_EXIT( 0 )
*
 9999 FORMAT( 'ILLEGAL ', A6, ': ', A5, ' = ', I3,
     $      '; It should be at least 1' )
 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', I4, '. It can be at most',
     $      I4 )
 9997 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
 9996 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
     $      I11 )
 9995 FORMAT( 'TIME     N  NB    P    Q NEP Time   MFLOPS  CHECK' )
 9994 FORMAT( '---- ----- --- ---- ---- -------- -------- ------' )
 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I4, 1X, I4, 1X, F8.2, 1X, F8.2,
     $      1X, A6 )
 9992 FORMAT( 'Finished ', I6, ' tests, with the following results:' )
 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
 9990 FORMAT( I5, ' tests completed without checking.' )
 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
 9987 FORMAT( 'END OF TESTS.' )
 9986 FORMAT( '||H - Q*S*Q^T|| / (||H|| * N * eps) = ', G25.7 )
 9985 FORMAT( '||Q^T*Q - I|| / ( N * eps ) ', G25.7 )
*
      STOP
*
*     End of PZNEPDRIVER
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pznepfchk.f'
then
	echo shar: will not over-write existing file "'pznepfchk.f'"
else
cat << "SHAR_EOF" > 'pznepfchk.f'
      SUBROUTINE PZNEPFCHK( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ,
     $                      DESCZ, ANORM, FRESID, WORK )
*
*     Mark R. Fahey
*     March 2000
*
*     .. Scalar Arguments ..
      INTEGER            IA, IASEED, IZ, JA, JZ, N
      DOUBLE PRECISION   ANORM, FRESID
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCZ( * )
      COMPLEX*16         A( * ), WORK( * ), Z( * )
*     ..
*
*  Purpose
*  =======
*
*  PZNEPFCHK computes the residual
*  || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N),
*  where Ao will be regenerated by the parallel random matrix generator,
*  sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1)
*  and ||.|| stands for the infinity norm.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  N       (global input) INTEGER
*          The order of sub( A ) and sub( Z ). N >= 0.
*
*  A       (local input/local output) COMPLEX*16 pointer into the
*          local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
*          On entry, this array contains the local pieces of the N-by-N
*          distributed matrix sub( A ) to be checked. On exit, this
*          array contains the local pieces of the difference
*          sub(Z)*sub( A )*sub(Z)**T - sub( Ao ).
*
*  IA      (global input) INTEGER
*          A's global row index, which points to the beginning of the
*          submatrix which is to be operated on.
*
*  JA      (global input) INTEGER
*          A's global column index, which points to the beginning of
*          the submatrix which is to be operated on.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  IASEED  (global input) INTEGER
*          The seed number to generate the original matrix Ao.
*
*  Z       (local input) COMPLEX*16 pointer into the local memory
*          to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this
*          array contains the local pieces of the N-by-N distributed
*          matrix sub( Z ).
*
*  IZ      (global input) INTEGER
*          Z's global row index, which points to the beginning of the
*          submatrix which is to be operated on.
*
*  JZ      (global input) INTEGER
*          Z's global column index, which points to the beginning of
*          the submatrix which is to be operated on.
*
*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix Z.
*
*  ANORM   (global input) DOUBLE PRECISION
*          The Infinity norm of sub( A ).
*
*  FRESID  (global output) DOUBLE PRECISION
*          The maximum (worst) factorizational error.
*
*  WORK    (local workspace) COMPLEX*16 array, dimension (LWORK).
*          LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where
*
*          IROFFA = MOD( IA-1, MB_A ),
*          ICOFFA = MOD( JA-1, NB_A ),
*          IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
*          IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
*          NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ),
*          NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
*
*          WORK is used to store a block of rows and a block of columns
*          of sub( A ).
*          INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
*          MYCOL, NPROW and NPCOL can be determined by calling the
*          subroutine BLACS_GRIDINFO.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
      COMPLEX*16         ONE, ZERO
      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ), 
     $                     ZERO = ( 0.0D+0, 0.0D+0 ) )
*     ..
*     .. Local Scalars ..
      INTEGER            I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF,
     $                   IW, J, JB, JJA, JN, LDA, LDW, MYCOL, MYROW, NP,
     $                   NPCOL, NPROW
      DOUBLE PRECISION   EPS
*     ..
*     .. Local Arrays ..
      INTEGER            DESCW( DLEN_ )
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DESCSET, INFOG2L, PZGEMM, 
     $                   PZLACPY, PZLASET, PZMATGEN, ZMATADD
*     ..
*     .. External Functions ..
      INTEGER            ICEIL, NUMROC
      DOUBLE PRECISION   PDLAMCH, PZLANGE
      EXTERNAL           ICEIL, NUMROC, PDLAMCH, PZLANGE
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN, MOD
*     ..
*     .. Executable Statements ..
*
      ICTXT = DESCA( CTXT_ )
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
      EPS = PDLAMCH( ICTXT, 'eps' )
*
      CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA,
     $              IAROW, IACOL )
      IROFF = MOD( IA-1, DESCA( MB_ ) )
      NP = NUMROC( N+IROFF, DESCA( MB_ ), MYROW, IAROW, NPROW )
      IF( MYROW.EQ.IAROW )
     $   NP = NP - IROFF
      LDW = MAX( 1, NP )
*
*     First compute H <- H * Z**T
*
      CALL DESCSET( DESCW, DESCA( MB_ ), N, DESCA( MB_ ), DESCA( NB_ ),
     $              IAROW, IACOL, ICTXT, DESCA( MB_ ) )
*
      DO 10 I = IA, IA + N - 1, DESCA( MB_ )
         IB = MIN( IA+N-I, DESCA( MB_ ) )
*
         CALL PZLACPY( 'All', IB, N, A, I, JA, DESCA, WORK, 1, 1,
     $                 DESCW )
         CALL PZGEMM( 'No transpose', 'Cong Tran', IB, N, N, ONE, WORK,
     $                1, 1, DESCW, Z, IZ, JZ, DESCZ, ZERO, A, I, JA,
     $                DESCA )
*
         DESCW( RSRC_ ) = MOD( DESCW( RSRC_ )+1, NPROW )
*
   10 CONTINUE
*
*     Then compute H <- Z * H = Z * H0 * Z**T
*
      CALL DESCSET( DESCW, N, DESCA( NB_ ), DESCA( MB_ ), DESCA( NB_ ),
     $              IAROW, IACOL, ICTXT, LDW )
*
      DO 20 J = JA, JA + N - 1, DESCA( NB_ )
         JB = MIN( JA+N-J, DESCA( NB_ ) )
*
         CALL PZLACPY( 'All', N, JB, A, IA, J, DESCA, WORK, 1, 1,
     $                 DESCW )
         CALL PZGEMM( 'No transpose', 'No transpose', N, JB, N, ONE, Z,
     $                IZ, JZ, DESCZ, WORK, 1, 1, DESCW, ZERO, A, IA, J,
     $                DESCA )
*
         DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL )
*
   20 CONTINUE
*
*     Compute H - H0
*
      JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 )
      LDA = DESCA( LLD_ )
      IOFFA = IIA + ( JJA-1 )*LDA
      IW = 1
      JB = JN - JA + 1
      DESCW( CSRC_ ) = IACOL
*
*     Handle first block of columns separately
*
      IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN
         CALL PZMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ),
     $                  DESCA( MB_ ), DESCA( NB_ ), WORK, LDW,
     $                  DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED, IIA-1,
     $                  NP, JJA-1, JB, MYROW, MYCOL, NPROW, NPCOL )
         CALL PZLASET( 'Lower', MAX( 0, N-2 ), JB, ZERO, ZERO, WORK,
     $                 MIN( IW+2, N ), 1, DESCW )
         CALL ZMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ), LDA )
         JJA = JJA + JB
         IOFFA = IOFFA + JB*LDA
      END IF
*
      IW = IW + DESCA( MB_ )
      DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL )
*
      DO 30 J = JN + 1, JA + N - 1, DESCA( NB_ )
         JB = MIN( JA+N-J, DESCA( NB_ ) )
*
         IF( MYCOL.EQ.DESCW( CSRC_ ) ) THEN
            CALL PZMATGEN( ICTXT, 'N', 'N', DESCA( M_ ), DESCA( N_ ),
     $                     DESCA( MB_ ), DESCA( NB_ ), WORK, LDW,
     $                     DESCA( RSRC_ ), DESCA( CSRC_ ), IASEED,
     $                     IIA-1, NP, JJA-1, JB, MYROW, MYCOL, NPROW,
     $                     NPCOL )
            CALL PZLASET( 'Lower', MAX( 0, N-IW-1 ), JB, ZERO, ZERO,
     $                    WORK, MIN( N, IW+2 ), 1, DESCW )
            CALL ZMATADD( NP, JB, -ONE, WORK, LDW, ONE, A( IOFFA ),
     $                    LDA )
            JJA = JJA + JB
            IOFFA = IOFFA + JB*LDA
         END IF
         IW = IW + DESCA( MB_ )
         DESCW( CSRC_ ) = MOD( DESCW( CSRC_ )+1, NPCOL )
   30 CONTINUE
*
*     Calculate factor residual
*
      FRESID = PZLANGE( 'I', N, N, A, IA, JA, DESCA, WORK ) /
     $         ( N*EPS*ANORM )
*
      RETURN
*
*     End PZNEPFCHK
*
      END
SHAR_EOF
fi # end of overwriting check
if test -f 'pznepinfo.f'
then
	echo shar: will not over-write existing file "'pznepinfo.f'"
else
cat << "SHAR_EOF" > 'pznepinfo.f'
      SUBROUTINE PZNEPINFO( SUMMRY, NOUT, NMAT, NVAL, LDNVAL, NNB,
     $                      NBVAL, LDNBVAL, NGRIDS, PVAL, LDPVAL, QVAL,
     $                      LDQVAL, THRESH, WORK, IAM, NPROCS )
*
*     Mark R. Fahey
*     March 2000
*
*     .. Scalar Arguments ..
      CHARACTER*( * )    SUMMRY
      INTEGER            IAM, LDNBVAL, LDNVAL, LDPVAL, LDQVAL, NGRIDS,
     $                   NMAT, NNB, NOUT, NPROCS
      REAL               THRESH
*     ..
*     .. Array Arguments ..
      INTEGER            NBVAL( LDNBVAL ), NVAL( LDNVAL ),
     $                   PVAL( LDPVAL ), QVAL( LDQVAL ), WORK( * )
*     ..
*
*  Purpose
*  =======
*
*  PZNEPINFO gets needed startup information for PZHSEQR drivers
*  and transmits it to all processes.
*
*  Arguments
*  =========
*
*  SUMMRY   (global output) CHARACTER*(*)
*           Name of output (summary) file (if any). Only defined for
*           process 0.
*
*  NOUT     (global output) INTEGER
*           The unit number for output file. NOUT = 6, ouput to screen,
*           NOUT = 0, output to stderr.  Only defined for process 0.
*
*  NMAT     (global output) INTEGER
*           The number of different values that can be used for N.
*
*  NVAL     (global output) INTEGER array, dimension (LDNVAL)
*           The values of N (the order of the matrix) to run the code
*           with.
*
*  LDNVAL   (global input) INTEGER
*           The maximum number of different values that can be used for
*           N, LDNVAL > =  NMAT.
*
*  NNB      (global output) INTEGER
*           The number of different values that can be used for NB.
*
*  NBVAL    (global output) INTEGER array, dimension (LDNBVAL)
*           The values of NB (blocksize) to run the code with.
*
*  LDNBVAL  (global input) INTEGER
*           The maximum number of different values that can be used for
*           NB, LDNBVAL >= NNB.
*
*  NGRIDS   (global output) INTEGER
*           The number of different values that can be used for P & Q.
*
*  PVAL     (global output) INTEGER array, dimension (LDPVAL)
*           The values of P (number of process rows) to run the code
*           with.
*
*  LDPVAL   (global input) INTEGER
*           The maximum number of different values that can be used for
*           P, LDPVAL >= NGRIDS.
*
*  QVAL     (global output) INTEGER array, dimension (LDQVAL)
*           The values of Q (number of process columns) to run the code
*           with.
*
*  LDQVAL   (global input) INTEGER
*           The maximum number of different values that can be used for
*           Q, LDQVAL >= NGRIDS.
*
*  THRESH   (global output) REAL
*           Indicates what error checks shall be run and printed out:
*           < 0 : Perform no error checking
*           > 0 : report all residuals greater than THRESH
*
*  WORK     (local workspace) INTEGER array of dimension >=
*           MAX( 3, LDNVAL+LDNBVAL+LDPVAL+LDQVAL ), used to pack all
*           input arrays in order to send info in one message.
*
*  IAM      (local input) INTEGER
*           My process number.
*
*  NPROCS   (global input) INTEGER
*           The total number of processes.
*
*
*  Implemented by:  M. Fahey, June 2000
*
* ======================================================================
*
* Note: For packing the information we assumed that the length in bytes
* ===== of an integer is equal to the length in bytes of a complex single
*       precision.
*
* ======================================================================
*
*     .. Parameters ..
      INTEGER            NIN
      PARAMETER          ( NIN = 11 )
*     ..
*     .. Local Scalars ..
      CHARACTER*79       USRINFO
      INTEGER            I, ICTXT
      DOUBLE PRECISION   EPS
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_ABORT, BLACS_GET, BLACS_GRIDEXIT,
     $                   BLACS_GRIDINIT, BLACS_SETUP, ICOPY, IGEBR2D,
     $                   IGEBS2D, SGEBR2D, SGEBS2D
*     ..
*     .. External Functions ..
      DOUBLE PRECISION   PDLAMCH
      EXTERNAL           PDLAMCH
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN
*     ..
*     .. Executable Statements ..
*
*     Process 0 reads the input data, broadcasts to other processes and
*     writes needed information to NOUT
*
      IF( IAM.EQ.0 ) THEN
*
*        Open file and skip data file header
*
         OPEN( NIN, FILE = 'NEP.dat', STATUS = 'OLD' )
         READ( NIN, FMT = * )SUMMRY
         SUMMRY = ' '
*
*        Read in user-supplied info about machine type, compiler, etc.
*
         READ( NIN, FMT = 9999 )USRINFO
*
*        Read name and unit number for summary output file
*
         READ( NIN, FMT = * )SUMMRY
         READ( NIN, FMT = * )NOUT
         IF( NOUT.NE.0 .AND. NOUT.NE.6 )
     $      OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' )
*
*        Read and check the parameter values for the tests.
*
*        Get number of matrices and their dimensions
*
         READ( NIN, FMT = * )NMAT
         IF( NMAT.LT.1 .OR. NMAT.GT.LDNVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'N', LDNVAL
            GO TO 30
         END IF
         READ( NIN, FMT = * )( NVAL( I ), I = 1, NMAT )
*
*        Get values of NB
*
         READ( NIN, FMT = * )NNB
         IF( NNB.GT.LDNBVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'NB', LDNBVAL
            GO TO 30
         END IF
         READ( NIN, FMT = * )( NBVAL( I ), I = 1, NNB )
*
         DO 10 I = 1, NNB
            IF( NBVAL( I ).LT.6 ) THEN
               WRITE( NOUT, FMT = 9992 )NBVAL( I )
               GO TO 30
            END IF
   10    CONTINUE
*
*        Get number of grids
*
         READ( NIN, FMT = * )NGRIDS
         IF( NGRIDS.LT.1 .OR. NGRIDS.GT.LDPVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'Grids', LDPVAL
            GO TO 30
         ELSE IF( NGRIDS.GT.LDQVAL ) THEN
            WRITE( NOUT, FMT = 9994 )'Grids', LDQVAL
            GO TO 30
         END IF
*
*        Get values of P and Q
*
         READ( NIN, FMT = * )( PVAL( I ), I = 1, NGRIDS )
         READ( NIN, FMT = * )( QVAL( I ), I = 1, NGRIDS )
*
*        Get level of checking
*
         READ( NIN, FMT = * )THRESH
*
*        Close input file
*
         CLOSE ( NIN )
*
*        For pvm only: if virtual machine not set up, allocate it and
*        spawn the correct number of processes.
*
         IF( NPROCS.LT.1 ) THEN
            NPROCS = 0
            DO 20 I = 1, NGRIDS
               NPROCS = MAX( NPROCS, PVAL( I )*QVAL( I ) )
   20       CONTINUE
            CALL BLACS_SETUP( IAM, NPROCS )
         END IF
*
*        Temporarily define blacs grid to include all processes so
*        information can be broadcast to all processes
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
*        Compute machine epsilon
*
         EPS = PDLAMCH( ICTXT, 'eps' )
*
*        Pack information arrays and broadcast
*
         CALL SGEBS2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1 )
*
         WORK( 1 ) = NMAT
         WORK( 2 ) = NNB
         WORK( 3 ) = NGRIDS
         CALL IGEBS2D( ICTXT, 'All', ' ', 3, 1, WORK, 3 )
*
         I = 1
         CALL ICOPY( NMAT, NVAL, 1, WORK( I ), 1 )
         I = I + NMAT
         CALL ICOPY( NNB, NBVAL, 1, WORK( I ), 1 )
         I = I + NNB
         CALL ICOPY( NGRIDS, PVAL, 1, WORK( I ), 1 )
         I = I + NGRIDS
         CALL ICOPY( NGRIDS, QVAL, 1, WORK( I ), 1 )
         I = I + NGRIDS - 1
         CALL IGEBS2D( ICTXT, 'All', ' ', I, 1, WORK, I )
*
*        regurgitate input
*
         WRITE( NOUT, FMT = 9999 )
     $      'ScaLAPACK QSQ^H by Schur Decomposition.'
         WRITE( NOUT, FMT = 9999 )USRINFO
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9999 )'Tests of the parallel ' //
     $      'complex double precision Schur decomposition.'
         WRITE( NOUT, FMT = 9999 )'The following scaled residual ' //
     $      'checks will be computed:'
         WRITE( NOUT, FMT = 9999 )
     $      ' Residual               = ||H-QSQ^H|| / ' //
     $      '(||H|| * eps * N )'
         WRITE( NOUT, FMT = 9999 )
     $      ' Orthogonality residual = ||I - Q^HQ|| / ' // '( eps * N )'
         WRITE( NOUT, FMT = 9999 )'The matrix A is randomly ' //
     $      'generated for each test.'
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9999 )'An explanation of the input/output '
     $       // 'parameters follows:'
         WRITE( NOUT, FMT = 9999 )
     $      'TIME    : Indicates whether WALL or ' //
     $      'CPU time was used.'
*
         WRITE( NOUT, FMT = 9999 )
     $      'N       : The number of columns in the ' // 'matrix A.'
         WRITE( NOUT, FMT = 9999 )
     $      'NB      : The size of the square blocks the' //
     $      ' matrix A is split into.'
         WRITE( NOUT, FMT = 9999 )
     $      'P       : The number of process rows.'
         WRITE( NOUT, FMT = 9999 )
     $      'Q       : The number of process columns.'
         WRITE( NOUT, FMT = 9999 )
     $      'THRESH  : If a residual value is less than' //
     $      ' THRESH, CHECK is flagged as PASSED'
         WRITE( NOUT, FMT = 9999 )
     $      'NEP time : Time in seconds to decompose the ' // ' matrix'
         WRITE( NOUT, FMT = 9999 )'MFLOPS  : Rate of execution '
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9999 )
     $      'The following parameter values will be used:'
         WRITE( NOUT, FMT = 9996 )'N    ',
     $      ( NVAL( I ), I = 1, MIN( NMAT, 10 ) )
         IF( NMAT.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( NVAL( I ), I = 11, NMAT )
         WRITE( NOUT, FMT = 9996 )'NB   ',
     $      ( NBVAL( I ), I = 1, MIN( NNB, 10 ) )
         IF( NNB.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( NBVAL( I ), I = 11, NNB )
         WRITE( NOUT, FMT = 9996 )'P    ',
     $      ( PVAL( I ), I = 1, MIN( NGRIDS, 10 ) )
         IF( NGRIDS.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( PVAL( I ), I = 11, NGRIDS )
         WRITE( NOUT, FMT = 9996 )'Q    ',
     $      ( QVAL( I ), I = 1, MIN( NGRIDS, 10 ) )
         IF( NGRIDS.GT.10 )
     $      WRITE( NOUT, FMT = 9997 )( QVAL( I ), I = 11, NGRIDS )
         WRITE( NOUT, FMT = * )
         WRITE( NOUT, FMT = 9995 )EPS
         WRITE( NOUT, FMT = 9998 )THRESH
*
      ELSE
*
*        If in pvm, must participate setting up virtual machine
*
         IF( NPROCS.LT.1 )
     $      CALL BLACS_SETUP( IAM, NPROCS )
*
*        Temporarily define blacs grid to include all processes so
*        information can be broadcast to all processes
*
         CALL BLACS_GET( -1, 0, ICTXT )
         CALL BLACS_GRIDINIT( ICTXT, 'Row-major', 1, NPROCS )
*
*        Compute machine epsilon
*
         EPS = PDLAMCH( ICTXT, 'eps' )
*
         CALL SGEBR2D( ICTXT, 'All', ' ', 1, 1, THRESH, 1, 0, 0 )
         CALL IGEBR2D( ICTXT, 'All', ' ', 3, 1, WORK, 3, 0, 0 )
         NMAT = WORK( 1 )
         NNB = WORK( 2 )
         NGRIDS = WORK( 3 )
*
         I = NMAT + NNB + 2*NGRIDS
         CALL IGEBR2D( ICTXT, 'All', ' ', I, 1, WORK, I, 0, 0 )
         I = 1
         CALL ICOPY( NMAT, WORK( I ), 1, NVAL, 1 )
         I = I + NMAT
         CALL ICOPY( NNB, WORK( I ), 1, NBVAL, 1 )
         I = I + NNB
         CALL ICOPY( NGRIDS, WORK( I ), 1, PVAL, 1 )
         I = I + NGRIDS
         CALL ICOPY( NGRIDS, WORK( I ), 1, QVAL, 1 )
*
      END IF
*
      CALL BLACS_GRIDEXIT( ICTXT )
*
      RETURN
*
   30 CONTINUE
      WRITE( NOUT, FMT = 9993 )
      CLOSE ( NIN )
      IF( NOUT.NE.6 .AND. NOUT.NE.0 )
     $   CLOSE ( NOUT )
      CALL BLACS_ABORT( ICTXT, 1 )
*
      STOP
*
 9999 FORMAT( A )
 9998 FORMAT( 'Routines pass computational tests if scaled residual ',
     $      'is less than ', G12.5 )
 9997 FORMAT( '                 ', 10I6 )
 9996 FORMAT( 2X, A5, '   :        ', 10I6 )
 9995 FORMAT( 'Relative machine precision (eps) is taken to be ',
     $      E18.6 )
 9994 FORMAT( ' Number of values of ', 5A,
     $      ' is less than 1 or greater ', 'than ', I2 )
 9993 FORMAT( ' Illegal input in file ', 40A, '.  Aborting run.' )
 9992 FORMAT( ' Blocking size too small at ', I2, ' must be >=6.' )
*
*     End of PZNEPINFO
*
      END
SHAR_EOF
fi # end of overwriting check
cd ..
#       End of shell archive
exit 0