ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
psgesvd.f
Go to the documentation of this file.
00001 
00002       SUBROUTINE PSGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU,
00003      +                   VT,IVT,JVT,DESCVT,WORK,LWORK,INFO)
00004 *
00005 *  -- ScaLAPACK routine (version 1.7) --
00006 *     Univ. of Tennessee, Oak Ridge National Laboratory
00007 *     and Univ. of California Berkeley.
00008 *     Jan 2006
00009 
00010 *
00011 *     .. Scalar Arguments ..
00012       CHARACTER JOBU,JOBVT
00013       INTEGER IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N
00014 *     ..
00015 *     .. Array Arguments ..
00016       INTEGER DESCA(*),DESCU(*),DESCVT(*)
00017       REAL A(*),U(*),VT(*),WORK(*)
00018       REAL S(*)
00019 *     ..
00020 *
00021 *  Purpose
00022 *  =======
00023 *
00024 *  PSGESVD computes the singular value decomposition (SVD) of an
00025 *  M-by-N matrix A, optionally computing the left and/or right
00026 *  singular vectors. The SVD is written as
00027 *
00028 *       A = U * SIGMA * transpose(V)
00029 *
00030 *  where SIGMA is an M-by-N matrix which is zero except for its
00031 *  min(M,N) diagonal elements, U is an M-by-M orthogonal matrix, and
00032 *  V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
00033 *  are the singular values of A and the columns of U and V are the
00034 *  corresponding right and left singular vectors, respectively. The
00035 *  singular values are returned in array S in decreasing order and
00036 *  only the first min(M,N) columns of U and rows of VT = V**T are
00037 *  computed.
00038 *
00039 *  Notes
00040 *  =====
00041 *  Each global data object is described by an associated description
00042 *  vector. This vector stores the information required to establish
00043 *  the mapping between an object element and its corresponding process
00044 *  and memory location.
00045 *
00046 *  Let A be a generic term for any 2D block cyclicly distributed array.
00047 *  Such a global array has an associated description vector DESCA.
00048 *  In the following comments, the character _ should be read as
00049 *  "of the global array".
00050 *
00051 *  NOTATION        STORED IN      EXPLANATION
00052 *  --------------- -------------- --------------------------------------
00053 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00054 *                                 DTYPE_A = 1.
00055 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00056 *                                 the BLACS process grid A is distribu-
00057 *                                 ted over. The context itself is glo-
00058 *                                 bal, but the handle (the integer
00059 *                                 value) may vary.
00060 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00061 *                                 array A.
00062 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00063 *                                 array A.
00064 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00065 *                                 the rows of the array.
00066 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00067 *                                 the columns of the array.
00068 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00069 *                                 row of the array A is distributed.
00070 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00071 *                                 first column of the array A is
00072 *                                 distributed.
00073 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00074 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00075 *
00076 *  Let K be the number of rows or columns of a distributed matrix, and
00077 *  assume that its process grid has dimension r x c. LOCr( K ) denotes
00078 *  the number of elements of K that a process would receive if K were
00079 *  distributed over the r processes of its process column. Similarly,
00080 *  LOCc( K ) denotes the number of elements of K that a process would
00081 *  receive if K were distributed over the c processes of its process
00082 *  row. The values of LOCr() and LOCc() may be determined via a call
00083 *  to the ScaLAPACK tool function, NUMROC:
00084 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00085 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00086 *  An upper bound for these quantities may be computed by:
00087 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00088 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00089 *
00090 *  Arguments
00091 *  =========
00092 *
00093 *          MP = number of local rows in A and U
00094 *          NQ = number of local columns in A and VT
00095 *          SIZE = min( M, N )
00096 *          SIZEQ = number of local columns in U
00097 *          SIZEP = number of local rows in VT
00098 *
00099 *  JOBU    (global input) CHARACTER*1
00100 *          Specifies options for computing U:
00101 *          = 'V':  the first SIZE columns of U (the left singular
00102 *                  vectors) are returned in the array U;
00103 *          = 'N':  no columns of U (no left singular vectors) are
00104 *                  computed.
00105 *
00106 *  JOBVT   (global input) CHARACTER*1
00107 *          Specifies options for computing V**T:
00108 *          = 'V':  the first SIZE rows of V**T (the right singular
00109 *                  vectors) are returned in the array VT;
00110 *          = 'N':  no rows of V**T (no right singular vectors) are
00111 *                  computed.
00112 *
00113 *  M       (global input) INTEGER
00114 *          The number of rows of the input matrix A.  M >= 0.
00115 *
00116 *  N       (global input) INTEGER
00117 *          The number of columns of the input matrix A.  N >= 0.
00118 *
00119 *  A       (local input/workspace) block cyclic REAL
00120 *          array,
00121 *          global dimension (M, N), local dimension (MP, NQ)
00122 *          On exit, the contents of A are destroyed.
00123 *
00124 *  IA      (global input) INTEGER
00125 *          The row index in the global array A indicating the first
00126 *          row of sub( A ).
00127 *
00128 *  JA      (global input) INTEGER
00129 *          The column index in the global array A indicating the
00130 *          first column of sub( A ).
00131 *
00132 *  DESCA   (global input) INTEGER array of dimension DLEN_
00133 *          The array descriptor for the distributed matrix A.
00134 *
00135 *  S       (global output) REAL               array, dimension SIZE
00136 *          The singular values of A, sorted so that S(i) >= S(i+1).
00137 *
00138 *  U       (local output) REAL               array, local dimension
00139 *          (MP, SIZEQ), global dimension (M, SIZE)
00140 *          if JOBU = 'V', U contains the first min(m,n) columns of U
00141 *          if JOBU = 'N', U is not referenced.
00142 *
00143 *  IU      (global input) INTEGER
00144 *          The row index in the global array U indicating the first
00145 *          row of sub( U ).
00146 *
00147 *  JU      (global input) INTEGER
00148 *          The column index in the global array U indicating the
00149 *          first column of sub( U ).
00150 *
00151 *  DESCU   (global input) INTEGER array of dimension DLEN_
00152 *          The array descriptor for the distributed matrix U.
00153 *
00154 *  VT      (local output) REAL               array, local dimension
00155 *          (SIZEP, NQ), global dimension (SIZE, N).
00156 *          If JOBVT = 'V', VT contains the first SIZE rows of
00157 *          V**T. If JOBVT = 'N', VT is not referenced.
00158 *
00159 *  IVT     (global input) INTEGER
00160 *          The row index in the global array VT indicating the first
00161 *          row of sub( VT ).
00162 *
00163 *  JVT     (global input) INTEGER
00164 *          The column index in the global array VT indicating the
00165 *          first column of sub( VT ).
00166 *
00167 *  DESCVT   (global input) INTEGER array of dimension DLEN_
00168 *          The array descriptor for the distributed matrix VT.
00169 *
00170 *  WORK    (local workspace/output) REAL               array, dimension
00171 *          (LWORK)
00172 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
00173 *
00174 *  LWORK   (local input) INTEGER
00175 *          The dimension of the array WORK.
00176 *
00177 *          LWORK >= 1 + 6*SIZEB + MAX(WATOBD, WBDTOSVD),
00178 *
00179 *          where SIZEB = MAX(M,N), and WATOBD and WBDTOSVD refer,
00180 *          respectively, to the workspace required to bidiagonalize
00181 *          the matrix A and to go from the bidiagonal matrix to the
00182 *          singular value decomposition U*S*VT.
00183 *
00184 *          For WATOBD, the following holds:
00185 *
00186 *          WATOBD = MAX(MAX(WPSLANGE,WPSGEBRD),
00187 *                       MAX(WPSLARED2D,WP(pre)LARED1D)),
00188 *
00189 *          where WPSLANGE, WPSLARED1D, WPSLARED2D, WPSGEBRD are the
00190 *          workspaces required respectively for the subprograms
00191 *          PSLANGE, PSLARED1D, PSLARED2D, PSGEBRD. Using the
00192 *          standard notation
00193 *
00194 *          MP = NUMROC( M, MB, MYROW, DESCA( CTXT_ ), NPROW),
00195 *          NQ = NUMROC( N, NB, MYCOL, DESCA( LLD_ ), NPCOL),
00196 *
00197 *          the workspaces required for the above subprograms are
00198 *
00199 *          WPSLANGE = MP,
00200 *          WPSLARED1D = NQ0,
00201 *          WPSLARED2D = MP0,
00202 *          WPSGEBRD = NB*(MP + NQ + 1) + NQ,
00203 *
00204 *          where NQ0 and MP0 refer, respectively, to the values obtained
00205 *          at MYCOL = 0 and MYROW = 0. In general, the upper limit for
00206 *          the workspace is given by a workspace required on
00207 *          processor (0,0):
00208 *
00209 *          WATOBD <= NB*(MP0 + NQ0 + 1) + NQ0.
00210 *
00211 *          In case of a homogeneous process grid this upper limit can
00212 *          be used as an estimate of the minimum workspace for every
00213 *          processor.
00214 *
00215 *          For WBDTOSVD, the following holds:
00216 *
00217 *          WBDTOSVD = SIZE*(WANTU*NRU + WANTVT*NCVT) +
00218 *                     MAX(WSBDSQR,
00219 *                         MAX(WANTU*WPSORMBRQLN, WANTVT*WPSORMBRPRT)),
00220 *
00221 *          where
00222 *
00223 *                          1, if left(right) singular vectors are wanted
00224 *          WANTU(WANTVT) =
00225 *                          0, otherwise
00226 *
00227 *          and WSBDSQR, WPSORMBRQLN and WPSORMBRPRT refer respectively
00228 *          to the workspace required for the subprograms SBDSQR,
00229 *          PSORMBR(QLN), and PSORMBR(PRT), where QLN and PRT are the
00230 *          values of the arguments VECT, SIDE, and TRANS in the call
00231 *          to PSORMBR. NRU is equal to the local number of rows of
00232 *          the matrix U when distributed 1-dimensional "column" of
00233 *          processes. Analogously, NCVT is equal to the local number
00234 *          of columns of the matrix VT when distributed across
00235 *          1-dimensional "row" of processes. Calling the LAPACK
00236 *          procedure SBDSQR requires
00237 *
00238 *          WSBDSQR = MAX(1, 4*SIZE )
00239 *
00240 *          on every processor. Finally,
00241 *
00242 *          WPSORMBRQLN = MAX( (NB*(NB-1))/2, (SIZEQ+MP)*NB)+NB*NB,
00243 *          WPSORMBRPRT = MAX( (MB*(MB-1))/2, (SIZEP+NQ)*MB )+MB*MB,
00244 *
00245 *          If LWORK = -1, then LWORK is global input and a workspace
00246 *          query is assumed; the routine only calculates the minimum
00247 *          size for the work array. The required workspace is returned
00248 *          as the first element of WORK and no error message is issued
00249 *          by PXERBLA.
00250 *
00251 *
00252 *  INFO    (output) INTEGER
00253 *          = 0:  successful exit
00254 *          < 0:  if INFO = -i, the i-th argument had an illegal value
00255 
00256 *          > 0:  if SBDSQR did not converge
00257 *                If INFO = MIN(M,N) + 1, then PSGESVD has detected
00258 *                heterogeneity by finding that eigenvalues were not
00259 *                identical across the process grid. In this case, the
00260 *                accuracy of the results from PSGESVD cannot be
00261 *                guaranteed.
00262 *
00263 *  =====================================================================
00264 *
00265 *  The results of PSGEBRD, and therefore PSGESVD, may vary slightly
00266 *  from run to run with the same input data. If repeatability is an
00267 *  issue, call BLACS_SET with the appropriate option after defining
00268 *  the process grid.
00269 *
00270 *  Alignment requirements
00271 *  ======================
00272 *
00273 *  The routine PSGESVD inherits the same alignement requirement as
00274 *  the routine PSGEBRD, namely:
00275 *
00276 *  The distributed submatrix sub( A ) must verify some alignment proper-
00277 *  ties, namely the following expressions should be true:
00278 *  ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA )
00279 *          where NB = MB_A = NB_A,
00280 *          IROFFA = MOD( IA-1, NB ), ICOFFA = MOD( JA-1, NB ),
00281 *
00282 *  =====================================================================
00283 *
00284 *
00285 *     .. Parameters ..
00286       INTEGER BLOCK_CYCLIC_2D,DLEN_,DTYPE_,CTXT_,M_,N_,MB_,NB_,RSRC_,
00287      +        CSRC_,LLD_,ITHVAL
00288       PARAMETER (BLOCK_CYCLIC_2D=1,DLEN_=9,DTYPE_=1,CTXT_=2,M_=3,N_=4,
00289      +          MB_=5,NB_=6,RSRC_=7,CSRC_=8,LLD_=9,ITHVAL=10)
00290       REAL ZERO,ONE
00291       PARAMETER (ZERO= (0.0E+0),ONE= (1.0E+0))
00292 *     ..
00293 *     .. Local Scalars ..
00294       CHARACTER UPLO
00295       INTEGER CONTEXTC,CONTEXTR,I,INDD,INDD2,INDE,INDE2,INDTAUP,INDTAUQ,
00296      +        INDU,INDV,INDWORK,IOFFD,IOFFE,ISCALE,J,K,LDU,LDVT,LLWORK,
00297      +        LWMIN,MAXIM,MB,MP,MYPCOL,MYPCOLC,MYPCOLR,MYPROW,MYPROWC,
00298      +        MYPROWR,NB,NCVT,NPCOL,NPCOLC,NPCOLR,NPROCS,NPROW,NPROWC,
00299      +        NPROWR,NQ,NRU,SIZE,SIZEB,SIZEP,SIZEPOS,SIZEQ,WANTU,WANTVT,
00300      +        WATOBD,WBDTOSVD,WSBDSQR,WPSGEBRD,WPSLANGE,WPSORMBRPRT,
00301      +        WPSORMBRQLN
00302       REAL ANRM,BIGNUM,EPS,RMAX,RMIN,SAFMIN,SIGMA,SMLNUM
00303 *     ..
00304 *     .. Local Arrays ..
00305       INTEGER DESCTU(DLEN_),DESCTVT(DLEN_),IDUM1(3),IDUM2(3)
00306       REAL C(1,1)
00307 *     ..
00308 *     .. External Functions ..
00309       LOGICAL LSAME
00310       INTEGER NUMROC
00311       REAL PSLAMCH,PSLANGE
00312       EXTERNAL LSAME,NUMROC,PDLAMCH,PZLANGE
00313 *     ..
00314 *     .. External Subroutines ..
00315       EXTERNAL BLACS_GET,BLACS_GRIDEXIT,BLACS_GRIDINFO,BLACS_GRIDINIT,
00316      +         CHK1MAT,SBDSQR,DESCINIT,SGAMN2D,SGAMX2D,SSCAL,IGAMX2D,
00317      +         IGEBR2D,IGEBS2D,PCHK1MAT,PSGEBRD,PSGEMR2D,PSLARED1D,
00318      +         PSLARED2D,PSLASCL,PSLASET,PSORMBR,PXERBLA
00319 *     ..
00320 *     .. Intrinsic Functions ..
00321       INTRINSIC MAX,MIN,SQRT,REAL
00322 *     ..
00323 *     .. Executable Statements ..
00324 *     This is just to keep ftnchek happy
00325       IF (BLOCK_CYCLIC_2D*DTYPE_*LLD_*MB_*M_*NB_*N_.LT.0) RETURN
00326 *
00327       CALL BLACS_GRIDINFO(DESCA(CTXT_),NPROW,NPCOL,MYPROW,MYPCOL)
00328       ISCALE = 0
00329       INFO = 0
00330 *
00331       IF (NPROW.EQ.-1) THEN
00332           INFO = - (800+CTXT_)
00333       ELSE
00334 *
00335           SIZE = MIN(M,N)
00336           SIZEB = MAX(M,N)
00337           NPROCS = NPROW*NPCOL
00338           IF (M.GE.N) THEN
00339               IOFFD = JA - 1
00340               IOFFE = IA - 1
00341               SIZEPOS = 1
00342           ELSE
00343               IOFFD = IA - 1
00344               IOFFE = JA - 1
00345               SIZEPOS = 3
00346           END IF
00347 *
00348           IF (LSAME(JOBU,'V')) THEN
00349               WANTU = 1
00350           ELSE
00351               WANTU = 0
00352           END IF
00353           IF (LSAME(JOBVT,'V')) THEN
00354               WANTVT = 1
00355           ELSE
00356               WANTVT = 0
00357           END IF
00358 *
00359           CALL CHK1MAT(M,3,N,4,IA,JA,DESCA,8,INFO)
00360           IF (WANTU.EQ.1) THEN
00361               CALL CHK1MAT(M,3,SIZE,SIZEPOS,IU,JU,DESCU,13,INFO)
00362           END IF
00363           IF (WANTVT.EQ.1) THEN
00364               CALL CHK1MAT(SIZE,SIZEPOS,N,4,IVT,JVT,DESCVT,17,INFO)
00365           END IF
00366           CALL IGAMX2D(DESCA(CTXT_),'A',' ',1,1,INFO,1,1,1,-1,-1,0)
00367 *
00368           IF (INFO.EQ.0) THEN
00369 *
00370 *           Set up pointers into the WORK array.
00371 *
00372               INDD = 2
00373               INDE = INDD + SIZEB + IOFFD
00374               INDD2 = INDE + SIZEB + IOFFE
00375               INDE2 = INDD2 + SIZEB + IOFFD
00376 *
00377               INDTAUQ = INDE2 + SIZEB + IOFFE
00378               INDTAUP = INDTAUQ + SIZEB + JA - 1
00379               INDWORK = INDTAUP + SIZEB + IA - 1
00380               LLWORK = LWORK - INDWORK + 1
00381 *
00382 *           Initialize contexts for "column" and "row" process matrices.
00383 *
00384               CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTC)
00385               CALL BLACS_GRIDINIT(CONTEXTC,'R',NPROCS,1)
00386               CALL BLACS_GRIDINFO(CONTEXTC,NPROWC,NPCOLC,MYPROWC,
00387      +                            MYPCOLC)
00388               CALL BLACS_GET(DESCA(CTXT_),10,CONTEXTR)
00389               CALL BLACS_GRIDINIT(CONTEXTR,'R',1,NPROCS)
00390               CALL BLACS_GRIDINFO(CONTEXTR,NPROWR,NPCOLR,MYPROWR,
00391      +                            MYPCOLR)
00392 *
00393 *           Set local dimensions of matrices (this is for MB=NB=1).
00394 *
00395               NRU = NUMROC(M,1,MYPROWC,0,NPROCS)
00396               NCVT = NUMROC(N,1,MYPCOLR,0,NPROCS)
00397               NB = DESCA(NB_)
00398               MB = DESCA(MB_)
00399               MP = NUMROC(M,MB,MYPROW,DESCA(RSRC_),NPROW)
00400               NQ = NUMROC(N,NB,MYPCOL,DESCA(CSRC_),NPCOL)
00401               IF (WANTVT.EQ.1) THEN
00402                   SIZEP = NUMROC(SIZE,DESCVT(MB_),MYPROW,DESCVT(RSRC_),
00403      +                    NPROW)
00404               ELSE
00405                   SIZEP = 0
00406               END IF
00407               IF (WANTU.EQ.1) THEN
00408                   SIZEQ = NUMROC(SIZE,DESCU(NB_),MYPCOL,DESCU(CSRC_),
00409      +                    NPCOL)
00410               ELSE
00411                   SIZEQ = 0
00412               END IF
00413 *
00414 *           Transmit MAX(NQ0, MP0).
00415 *
00416               IF (MYPROW.EQ.0 .AND. MYPCOL.EQ.0) THEN
00417                   MAXIM = MAX(NQ,MP)
00418                   CALL IGEBS2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1)
00419               ELSE
00420                   CALL IGEBR2D(DESCA(CTXT_),'All',' ',1,1,MAXIM,1,0,0)
00421               END IF
00422 *
00423               WPSLANGE = MP
00424               WPSGEBRD = NB* (MP+NQ+1) + NQ
00425               WATOBD = MAX(MAX(WPSLANGE,WPSGEBRD),MAXIM)
00426 *
00427               WSBDSQR = MAX(1,4*SIZE)
00428               WPSORMBRQLN = MAX((NB* (NB-1))/2, (SIZEQ+MP)*NB) + NB*NB
00429               WPSORMBRPRT = MAX((MB* (MB-1))/2, (SIZEP+NQ)*MB) + MB*MB
00430               WBDTOSVD = SIZE* (WANTU*NRU+WANTVT*NCVT) +
00431      +                   MAX(WSBDSQR,MAX(WANTU*WPSORMBRQLN,
00432      +                   WANTVT*WPSORMBRPRT))
00433 *
00434 *           Finally, calculate required workspace.
00435 *
00436               LWMIN = 1 + 6*SIZEB + MAX(WATOBD,WBDTOSVD)
00437               WORK(1) = REAL(LWMIN)
00438 *
00439               IF (WANTU.NE.1 .AND. .NOT. (LSAME(JOBU,'N'))) THEN
00440                   INFO = -1
00441               ELSE IF (WANTVT.NE.1 .AND. .NOT. (LSAME(JOBVT,'N'))) THEN
00442                   INFO = -2
00443               ELSE IF (LWORK.LT.LWMIN .AND. LWORK.NE.-1) THEN
00444                   INFO = -19
00445               END IF
00446 *
00447           END IF
00448 *
00449           IDUM1(1) = WANTU
00450           IDUM1(2) = WANTVT
00451           IF (LWORK.EQ.-1) THEN
00452               IDUM1(3) = -1
00453           ELSE
00454               IDUM1(3) = 1
00455           END IF
00456           IDUM2(1) = 1
00457           IDUM2(2) = 2
00458           IDUM2(3) = 19
00459           CALL PCHK1MAT(M,3,N,4,IA,JA,DESCA,8,3,IDUM1,IDUM2,INFO)
00460           IF (INFO.EQ.0) THEN
00461               IF (WANTU.EQ.1) THEN
00462                   CALL PCHK1MAT(M,3,SIZE,4,IU,JU,DESCU,13,0,IDUM1,IDUM2,
00463      +                          INFO)
00464               END IF
00465               IF (WANTVT.EQ.1) THEN
00466                   CALL PCHK1MAT(SIZE,3,N,4,IVT,JVT,DESCVT,17,0,IDUM1,
00467      +                          IDUM2,INFO)
00468               END IF
00469           END IF
00470 *
00471       END IF
00472 *
00473       IF (INFO.NE.0) THEN
00474           CALL PXERBLA(DESCA(CTXT_),'PSGESVD',-INFO)
00475           RETURN
00476       ELSE IF (LWORK.EQ.-1) THEN
00477           GO TO 40
00478       END IF
00479 *
00480 *     Quick return if possible.
00481 *
00482       IF (M.LE.0 .OR. N.LE.0) GO TO 40
00483 *
00484 *     Get machine constants.
00485 *
00486       SAFMIN = PSLAMCH(DESCA(CTXT_),'Safe minimum')
00487       EPS = PSLAMCH(DESCA(CTXT_),'Precision')
00488       SMLNUM = SAFMIN/EPS
00489       BIGNUM = ONE/SMLNUM
00490       RMIN = SQRT(SMLNUM)
00491       RMAX = MIN(SQRT(BIGNUM),ONE/SQRT(SQRT(SAFMIN)))
00492 *
00493 *     Scale matrix to allowable range, if necessary.
00494 *
00495       ANRM = PSLANGE('1',M,N,A,IA,JA,DESCA,WORK(INDWORK))
00496       IF (ANRM.GT.ZERO .AND. ANRM.LT.RMIN) THEN
00497           ISCALE = 1
00498           SIGMA = RMIN/ANRM
00499       ELSE IF (ANRM.GT.RMAX) THEN
00500           ISCALE = 1
00501           SIGMA = RMAX/ANRM
00502       END IF
00503 *
00504       IF (ISCALE.EQ.1) THEN
00505           CALL PSLASCL('G',ONE,SIGMA,M,N,A,IA,JA,DESCA,INFO)
00506       END IF
00507 *
00508       CALL PSGEBRD(M,N,A,IA,JA,DESCA,WORK(INDD),WORK(INDE),
00509      +             WORK(INDTAUQ),WORK(INDTAUP),WORK(INDWORK),LLWORK,
00510      +             INFO)
00511 *
00512 *     Copy D and E to all processes.
00513 *     Array D is in local array of dimension:
00514 *     LOCc(JA+MIN(M,N)-1) if M >= N; LOCr(IA+MIN(M,N)-1) otherwise.
00515 *     Array E is in local array of dimension
00516 *     LOCr(IA+MIN(M,N)-1) if M >= N; LOCc(JA+MIN(M,N)-2) otherwise.
00517 *
00518       IF (M.GE.N) THEN
00519 *        Distribute D
00520           CALL PSLARED1D(N+IOFFD,IA,JA,DESCA,WORK(INDD),WORK(INDD2),
00521      +                   WORK(INDWORK),LLWORK)
00522 *        Distribute E
00523           CALL PSLARED2D(M+IOFFE,IA,JA,DESCA,WORK(INDE),WORK(INDE2),
00524      +                   WORK(INDWORK),LLWORK)
00525       ELSE
00526 *        Distribute D
00527           CALL PSLARED2D(M+IOFFD,IA,JA,DESCA,WORK(INDD),WORK(INDD2),
00528      +                   WORK(INDWORK),LLWORK)
00529 *        Distribute E
00530           CALL PSLARED1D(N+IOFFE,IA,JA,DESCA,WORK(INDE),WORK(INDE2),
00531      +                   WORK(INDWORK),LLWORK)
00532       END IF
00533 *
00534 *     Prepare for calling PSBDSQR.
00535 *
00536       IF (M.GE.N) THEN
00537           UPLO = 'U'
00538       ELSE
00539           UPLO = 'L'
00540       END IF
00541 *
00542       INDU = INDWORK
00543       INDV = INDU + SIZE*NRU*WANTU
00544       INDWORK = INDV + SIZE*NCVT*WANTVT
00545 *
00546       LDU = MAX(1,NRU)
00547       LDVT = MAX(1,SIZE)
00548 *
00549       CALL DESCINIT(DESCTU,M,SIZE,1,1,0,0,CONTEXTC,LDU,INFO)
00550       CALL DESCINIT(DESCTVT,SIZE,N,1,1,0,0,CONTEXTR,LDVT,INFO)
00551 *
00552       IF (WANTU.EQ.1) THEN
00553           CALL PSLASET('Full',M,SIZE,ZERO,ONE,WORK(INDU),1,1,DESCTU)
00554       ELSE
00555           NRU = 0
00556       END IF
00557 *
00558       IF (WANTVT.EQ.1) THEN
00559           CALL PSLASET('Full',SIZE,N,ZERO,ONE,WORK(INDV),1,1,DESCTVT)
00560       ELSE
00561           NCVT = 0
00562       END IF
00563 *
00564       CALL SBDSQR(UPLO,SIZE,NCVT,NRU,0,WORK(INDD2+IOFFD),
00565      +            WORK(INDE2+IOFFE),WORK(INDV),SIZE,WORK(INDU),LDU,C,1,
00566      +            WORK(INDWORK),INFO)
00567 *
00568 *     Redistribute elements of U and VT in the block-cyclic fashion.
00569 *
00570       IF (WANTU.EQ.1) CALL PSGEMR2D(M,SIZE,WORK(INDU),1,1,DESCTU,U,IU,
00571      +                              JU,DESCU,DESCU(CTXT_))
00572 *
00573       IF (WANTVT.EQ.1) CALL PSGEMR2D(SIZE,N,WORK(INDV),1,1,DESCTVT,VT,
00574      +                               IVT,JVT,DESCVT,DESCVT(CTXT_))
00575 *
00576 *     Set to ZERO "non-square" elements of the larger matrices U, VT.
00577 *
00578       IF (M.GT.N .AND. WANTU.EQ.1) THEN
00579           CALL PSLASET('Full',M-SIZE,SIZE,ZERO,ZERO,U,IA+SIZE,JU,DESCU)
00580       ELSE IF (N.GT.M .AND. WANTVT.EQ.1) THEN
00581           CALL PSLASET('Full',SIZE,N-SIZE,ZERO,ZERO,VT,IVT,JVT+SIZE,
00582      +                 DESCVT)
00583       END IF
00584 *
00585 *     Multiply Householder rotations from bidiagonalized matrix.
00586 *
00587       IF (WANTU.EQ.1) CALL PSORMBR('Q','L','N',M,SIZE,N,A,IA,JA,DESCA,
00588      +                             WORK(INDTAUQ),U,IU,JU,DESCU,
00589      +                             WORK(INDWORK),LLWORK,INFO)
00590 *
00591       IF (WANTVT.EQ.1) CALL PSORMBR('P','R','T',SIZE,N,M,A,IA,JA,DESCA,
00592      +                              WORK(INDTAUP),VT,IVT,JVT,DESCVT,
00593      +                              WORK(INDWORK),LLWORK,INFO)
00594 *
00595 *     Copy singular values into output array S.
00596 *
00597       DO 10 I = 1,SIZE
00598           S(I) = WORK(INDD2+IOFFD+I-1)
00599    10 CONTINUE
00600 *
00601 *     If matrix was scaled, then rescale singular values appropriately.
00602 *
00603       IF (ISCALE.EQ.1) THEN
00604           CALL SSCAL(SIZE,ONE/SIGMA,S,1)
00605       END IF
00606 *
00607 *     Compare every ith eigenvalue, or all if there are only a few,
00608 *     across the process grid to check for heterogeneity.
00609 *
00610       IF (SIZE.LE.ITHVAL) THEN
00611           J = SIZE
00612           K = 1
00613       ELSE
00614           J = SIZE/ITHVAL
00615           K = ITHVAL
00616       END IF
00617 *
00618       DO 20 I = 1,J
00619           WORK(I+INDE) = S((I-1)*K+1)
00620           WORK(I+INDD2) = S((I-1)*K+1)
00621    20 CONTINUE
00622 *
00623       CALL SGAMN2D(DESCA(CTXT_),'a',' ',J,1,WORK(1+INDE),J,1,1,-1,-1,0)
00624       CALL SGAMX2D(DESCA(CTXT_),'a',' ',J,1,WORK(1+INDD2),J,1,1,-1,-1,0)
00625 *
00626       DO 30 I = 1,J
00627           IF ((WORK(I+INDE)-WORK(I+INDD2)).NE.ZERO) THEN
00628               INFO = SIZE + 1
00629           END IF
00630    30 CONTINUE
00631 *
00632    40 CONTINUE
00633 *
00634       CALL BLACS_GRIDEXIT(CONTEXTC)
00635       CALL BLACS_GRIDEXIT(CONTEXTR)
00636 *
00637 *     End of PSGESVD
00638 *
00639       RETURN
00640       END