#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include "mpi.h"
#include <complex.h>
#include "blas.h"
#include "blacs.h"
#include "lapack.h"
#include "scalapack.h"
static int max( int a, int b ){
if (a>b) return(a); else return(b);
}
static int min( int a, int b ){
if (a<b) return(a); else return(b);
}
extern double verif_orthogonality(int m, int n, double *U, int iu, int ju, int *descU);
extern double verif_representativity(int m, int n, double *A, int ia, int ja, int *descA,
double *U, int iu, int ju, int *descU,
double *VT, int ivt, int jvt, int *descVT,
double *S);
extern int driver_pdgesvd(char jobU, char jobVT, int m, int n, double *A, int ia, int ja, int *descA,
double *S_NN, double *U_NN, int iu, int ju, int *descU, double *VT_NN, int ivt, int jvt, int *descVT,
double *MPIelapsedNN);
int main(int argc, char **argv) {
int iam, nprocs;
int myrank_mpi, nprocs_mpi;
int ictxt, nprow, npcol, myrow, mycol;
int nb, m, n;
int mpA, nqA, mpU, nqU, mpVT, nqVT;
int i, j, k, itemp, min_mn;
int descA[9], descU[9], descVT[9];
double *A=NULL;
int info, infoVV;
double *U_VV=NULL;
double *VT_VV=NULL;
double *S_VV=NULL;
double orthU_VV, residF, orthVT_VV;
double eps;
/**/
int izero=0,ione=1;
/**/
double MPIelapsedVV;
char jobU, jobVT;
int iseed[4], idist;
/**/
MPI_Init( &argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
/**/
m = 100; n = 100; nprow = 2; npcol = 2; nb = 64; jobU='A'; jobVT='A';
if (myrank_mpi==0){
printf("\n");
printf("--------------------------------------------------------------------------------------------------------------------\n");
printf(" Testing pdgsevd -- double precision SVD ScaLAPACK routine \n");
printf("jobU jobVT m n nb p q || info resid orthU orthVT |SNN-SVV| time(s) cond(A) \n");
printf("--------------------------------------------------------------------------------------------------------------------\n");
}
/**/
if (nb>n)
nb = n;
if (nprow*npcol>nprocs_mpi){
if (myrank_mpi==0)
printf(" **** ERROR : we do not have enough processes available to make a p-by-q process grid ***\n");
printf(" **** Bye-bye ***\n");
MPI_Finalize(); exit(1);
}
/**/
Cblacs_pinfo( &iam, &nprocs ) ;
Cblacs_get( -1, 0, &ictxt );
Cblacs_gridinit( &ictxt, "Row", nprow, npcol );
Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
/**/
min_mn = min(m,n);
/**/
if ((myrow>-1)&(mycol>-1)&(myrow<nprow)&(mycol<npcol)){
/**/
mpA = numroc_( &m , &nb, &myrow, &izero, &nprow );
nqA = numroc_( &n , &nb, &mycol, &izero, &npcol );
mpU = numroc_( &m , &nb, &myrow, &izero, &nprow );
nqU = numroc_( &min_mn, &nb, &mycol, &izero, &npcol );
mpVT = numroc_( &min_mn, &nb, &myrow, &izero, &nprow );
nqVT = numroc_( &n , &nb, &mycol, &izero, &npcol );
/**/
A = (double *)calloc(mpA*nqA,sizeof(double)) ;
if (A==NULL){ printf("error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
/**/
idist = 2;
iseed[0] = mpA%4096;
iseed[1] = iam%4096;
iseed[2] = nqA%4096;
iseed[3] = 23;
/**/
k = 0;
for (i = 0; i < mpA; i++) {
for (j = 0; j < nqA; j++) {
dlarnv_( &idist, iseed, &ione, &(A[k]) );
k++;
}
}
/*
*
* Initialize the array descriptor for the distributed matrices A, U and VT
*
*/
itemp = max( 1, mpA );
descinit_( descA, &m, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
itemp = max( 1, mpA );
descinit_( descU, &m, &min_mn, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
itemp = max( 1, mpVT );
descinit_( descVT, &min_mn, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
/**/
eps = pdlamch_( &ictxt, "Epsilon" );
/**/
U_VV = (double *)calloc(mpU*nqU,sizeof(double)) ;
if (U_VV==NULL){ printf("error of memory allocation U_VV on proc %dx%d\n",myrow,mycol); exit(0); }
VT_VV = (double *)calloc(mpVT*nqVT,sizeof(double)) ;
if (VT_VV==NULL){ printf("error of memory allocation VT_VV on proc %dx%d\n",myrow,mycol); exit(0); }
S_VV = (double *)calloc(min_mn,sizeof(double)) ;
if (S_VV==NULL){ printf("error of memory allocation S_VV on proc %dx%d\n",myrow,mycol); exit(0); }
infoVV = driver_pdgesvd( 'V', 'V', m, n, A, 1, 1, descA,
S_VV, U_VV, 1, 1, descU, VT_VV, 1, 1, descVT,
&MPIelapsedVV);
orthU_VV = verif_orthogonality(m,min_mn,U_VV , 1, 1, descU);
orthVT_VV = verif_orthogonality(min_mn,n,VT_VV, 1, 1, descVT);
residF = verif_representativity( m, n, A, 1, 1, descA,
U_VV, 1, 1, descU,
VT_VV, 1, 1, descVT,
S_VV);
if ( iam==0 ){
printf(" V V %6d %6d %3d %3d %3d || %3d %7.1e %7.1e %7.1e %8.2f %7.1e\n",
m,n,nb,nprow,npcol,infoVV,residF,orthU_VV,orthVT_VV,MPIelapsedVV,S_VV[0]/S_VV[min_mn-1]);
printf("--------------------------------------------------------------------------------------------------------------------\n");
}
/**/
free(U_VV); free(S_VV); free(VT_VV);
free(A);
Cblacs_gridexit( 0 );
}
/**/
MPI_Finalize();
exit(0);
}
/**/
double verif_orthogonality(int m, int n, double *U, int iu, int ju, int *descU){
double *R=NULL;
int nprow, npcol, myrow, mycol;
int mpR, nqR, nb, itemp, descR[9], ictxt, info, min_mn, max_mn;
int ctxt_ = 1, nb_ = 5;
int izero = 0, ione = 1;
double *wwork=NULL;
double tmone= -1.0e+00, tpone= +1.0e+00, tzero= +0.0e+00;
double orthU;
min_mn = min(m,n);
max_mn = max(m,n);
ictxt = descU[ctxt_];
nb = descU[nb_];
Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
mpR = numroc_( &min_mn, &nb, &myrow, &izero, &nprow );
nqR = numroc_( &min_mn, &nb, &mycol, &izero, &npcol );
R = (double *)calloc(mpR*nqR,sizeof(double)) ;
if (R==NULL){ printf("error of memory allocation R on proc %dx%d\n",myrow,mycol); exit(0); }
itemp = max( 1, mpR );
descinit_( descR, &min_mn, &min_mn, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
pdlaset_( "F", &min_mn, &min_mn, &tzero, &tpone, R, &ione, &ione, descR );
if (m>n)
pdgemm_( "T", "N", &min_mn, &min_mn, &m, &tpone, U, &iu, &ju, descU, U,
&iu, &ju, descU, &tmone, R, &ione, &ione, descR );
else
pdgemm_( "N", "T", &min_mn, &min_mn, &n, &tpone, U, &iu, &ju, descU, U,
&iu, &ju, descU, &tmone, R, &ione, &ione, descR );
orthU = pdlange_( "F", &min_mn, &min_mn, R, &ione, &ione, descR, wwork );
orthU = orthU / ((double) max_mn);
free(R);
return orthU;
}
/**/
double verif_representativity(int m, int n, double *A, int ia, int ja, int *descA,
double *U, int iu, int ju, int *descU,
double *VT, int ivt, int jvt, int *descVT,
double *S){
double *Acpy=NULL, *Ucpy=NULL;
int nprow, npcol, myrow, mycol;
int min_mn, max_mn, mpA, pcol, localcol, i, nqA;
int ictxt, nbA, rsrcA, csrcA, nbU, rsrcU, csrcU, mpU, nqU;
int ctxt_ = 1, nb_ = 5, rsrc_ = 6, csrc_ = 7;
int izero = 0, ione = 1;
double *wwork=NULL;
double tmone= -1.0e+00, tpone= +1.0e+00;
double residF, AnormF;
min_mn = min(m,n);
max_mn = max(m,n);
ictxt = descA[ctxt_];
Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
nbA = descA[nb_]; rsrcA = descA[rsrc_] ; csrcA = descA[csrc_] ;
mpA = numroc_( &m , &nbA, &myrow, &rsrcA, &nprow );
nqA = numroc_( &n , &nbA, &mycol, &csrcA, &npcol );
Acpy = (double *)calloc(mpA*nqA,sizeof(double)) ;
if (Acpy==NULL){ printf("error of memory allocation Acpy on proc %dx%d\n",myrow,mycol); exit(0); }
pdlacpy_( "All", &m, &n, A, &ia, &ja, descA, Acpy, &ia, &ja, descA );
nbU = descU[nb_]; rsrcU = descU[rsrc_] ; csrcU = descU[csrc_] ;
mpU = numroc_( &m , &nbU, &myrow, &rsrcU, &nprow );
nqU = numroc_( &min_mn, &nbU, &mycol, &csrcU, &npcol );
Ucpy = (double *)calloc(mpU*nqU,sizeof(double)) ;
if (Ucpy==NULL){ printf("error of memory allocation Ucpy on proc %dx%d\n",myrow,mycol); exit(0); }
pdlacpy_( "All", &m, &min_mn, U, &iu, &ju, descU, Ucpy, &iu, &ju, descU );
AnormF = pdlange_( "F", &m, &n, A, &ia, &ja, descA, wwork);
for (i=1;i<min_mn+1;i++){
pcol = indxg2p_( &i, &(descU[5]), &izero, &izero, &npcol );
localcol = indxg2l_( &i, &(descU[5]), &izero, &izero, &npcol );
if( mycol==pcol )
dscal_( &mpA, &(S[i-1]), &(Ucpy[ ( localcol-1 )*descU[8] ]), &ione );
}
pdgemm_( "N", "N", &m, &n, &min_mn, &tpone, Ucpy, &iu, &ju, descU, VT, &ivt, &jvt, descVT,
&tmone, Acpy, &ia, &ja, descA );
residF = pdlange_( "F", &m, &n, Acpy, &ione, &ione, descA, wwork);
residF = residF/AnormF/((double) max_mn);
free(Ucpy);
free(Acpy);
return residF;
}
/**/
int driver_pdgesvd( char jobU, char jobVT, int m, int n, double *A, int ia, int ja, int *descA,
double *S_NN, double *U_NN, int iu, int ju, int *descU, double *VT_NN, int ivt, int jvt, int *descVT,
double *MPIelapsedNN){
double *Acpy=NULL, *work=NULL;
int lwork;
/**/
int ione=1;
/**/
int nprow, npcol, myrow, mycol;
int mpA, nqA;
int ictxt, nbA, rsrcA, csrcA;
int ctxt_ = 1, nb_ = 5, rsrc_ = 6, csrc_ = 7;
int infoNN;
double MPIt1, MPIt2;
/**/
ictxt = descA[ctxt_];
Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
nbA = descA[nb_]; rsrcA = descA[rsrc_] ; csrcA = descA[csrc_] ;
mpA = numroc_( &m , &nbA, &myrow, &rsrcA, &nprow );
nqA = numroc_( &n , &nbA, &mycol, &csrcA, &npcol );
Acpy = (double *)calloc(mpA*nqA,sizeof(double)) ;
if (Acpy==NULL){ printf("error of memory allocation Acpy on proc %dx%d\n",myrow,mycol); exit(0); }
pdlacpy_( "All", &m, &n, A, &ione, &ione, descA, Acpy, &ione, &ione, descA );
work = (double *)calloc(1,sizeof(double)) ;
if (work==NULL){ printf("error of memory allocation for work on proc %dx%d (1st time)\n",myrow,mycol); exit(0); }
lwork=-1;
pdgesvd_( &jobU, &jobVT, &m, &n, Acpy, &ione, &ione, descA,
S_NN, U_NN, &ione, &ione, descU, VT_NN, &ione, &ione, descVT,
work, &lwork, &infoNN);
lwork = (int) (work[0]);
free(work);
work = (double *)calloc(lwork,sizeof(double)) ;
if (work==NULL){ printf("error of memory allocation work on proc %dx%d\n",myrow,mycol); exit(0); }
/**/
MPIt1 = MPI_Wtime();
/**/
pdgesvd_( &jobU, &jobVT, &m, &n, Acpy, &ione, &ione, descA,
S_NN, U_NN, &ione, &ione, descU, VT_NN, &ione, &ione, descVT,
work, &lwork, &infoNN);
/**/
MPIt2 = MPI_Wtime();
(*MPIelapsedNN)=MPIt2-MPIt1;
/**/
free(work);
free(Acpy);
return infoNN;
}