/* --------------------------------------------------------------------- * * -- Mark R. Fahey * June 28, 2000 * * --------------------------------------------------------------------- */ /* * Include files */ #include "pblas.h" void pcrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s ) /* * Mark Fahey * June 22, 2000 */ /* * .. Scalar Arguments .. */ int * incx, * incy, * ix, * iy, * jx, * jy, * n; float * c; complex * s; /* * .. * .. Array Arguments .. */ int desc_X[], desc_Y[]; complex X[], Y[]; { /* * Purpose * ======= * * PCROT applies a plane rotation, where the cos (C) is real and the * sin (S) is complex, and the vectors CX and CY are complex, i.e., * * [ sub( X ) ] := [ C S ] [ sub( X ) ] * [ sub( Y ) ] := [ -conjg(S) C ] [ sub( Y ) ] * * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, * * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y, * * and where C*C + S*CONJG(S) = 1.0. * * Notes * ===== * * Each global data object is described by an associated description * vector. This vector stores the information required to establish * the mapping between an object element and its corresponding process * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. * Such a global array has an associated description vector DESCA. * In the following comments, the character _ should be read as * "of the global array". * * NOTATION STORED IN EXPLANATION * --------------- -------------- -------------------------------------- * DT_A (global) descA[ DT_ ] The descriptor type. In this case, * DT_A = 1. * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating * the BLACS process grid A is distribu- * ted over. The context itself is glo- * bal, but the handle (the integer * value) may vary. * M_A (global) descA[ M_ ] The number of rows in the global * array A. * N_A (global) descA[ N_ ] The number of columns in the global * array A. * MB_A (global) descA[ MB_ ] The blocking factor used to distribu- * te the rows of the array. * NB_A (global) descA[ NB_ ] The blocking factor used to distribu- * te the columns of the array. * RSRC_A (global) descA[ RSRC_ ] The process row over which the first * row of the array A is distributed. * CSRC_A (global) descA[ CSRC_ ] The process column over which the * first column of the array A is * distributed. * LLD_A (local) descA[ LLD_ ] The leading dimension of the local * array. LLD_A >= MAX(1,LOCr(M_A)). * * Let K be the number of rows or columns of a distributed matrix, * and assume that its process grid has dimension p x q. * LOCr( K ) denotes the number of elements of K that a process * would receive if K were distributed over the p processes of its * process column. * Similarly, LOCc( K ) denotes the number of elements of K that a * process would receive if K were distributed over the q processes of * its process row. * The values of LOCr() and LOCc() may be determined via a call to the * ScaLAPACK tool function, NUMROC: * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * * Because vectors may be seen as particular matrices, a distributed * vector is considered to be a distributed matrix. * * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the * process column having the first entries of sub( Y ) must also contain * the first entries of sub( X ). Moreover, the quantity * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ). * * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y. * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to * MOD( IY-1, MB_Y ). * * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y. * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to * MOD( JY-1, NB_Y ). * * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be * equal to MB_Y, and the process row having the first entries of * sub( Y ) must also contain the first entries of sub( X ). Moreover, * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ). * * Arguments * ========= * * N (input) INTEGER * The number of elements in the vectors CX and CY. * * X (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) ) * This array contains the entries of the distributed vector * sub( X ). * On output, CX is overwritten with C*X + S*Y. * * IX (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix X to operate on. * * JX (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix X to operate on. * * DESCX (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix X. * * INCX (global input) pointer to INTEGER * The global increment for the elements of X. Only two values * of INCX are supported in this version, namely 1 and M_X. * * Y (local input) COMPLEX array containing the local * pieces of a distributed matrix of dimension of at least * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) ) * This array contains the entries of the distributed vector * sub( Y ). * On output, CY is overwritten with -CONJG(S)*X + C*Y. * * IY (global input) pointer to INTEGER * The global row index of the submatrix of the distributed * matrix Y to operate on. * * JY (global input) pointer to INTEGER * The global column index of the submatrix of the distributed * matrix Y to operate on. * * DESCY (global and local input) INTEGER array of dimension 8. * The array descriptor of the distributed matrix Y. * * INCY (global input) pointer to INTEGER * The global increment for the elements of Y. Only two values * of INCY are supported in this version, namely 1 and M_Y. * * C (input) pointer to FLOAT * S (input) pointer COMPLEX * C and S define a rotation * [ C S ] * [ -conjg(S) C ] * where C*C + S*CONJG(S) = 1.0. * * ===================================================================== * * .. Local Scalars .. */ int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx, jjy, lcm, lcmp, mycol, myrow, nn, np, np0, nprow, npcol, nq, nz, ione=1, tmp1, wksz; complex xwork[1], ywork[1], zero; /* .. * .. PBLAS Buffer .. */ complex * buff; /* .. * .. External Functions .. */ void blacs_gridinfo_(); void cgerv2d_(); void cgesd2d_(); void pbchkvect(); void PB_Cabort(); char * getpbbuf(); F_INTG_FCT pbctrnv_(); F_INTG_FCT crot_(); F_INTG_FCT ilcm_(); /* .. * .. Executable Statements .. * * Get grid parameters */ ictxt = desc_X[CTXT_]; blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol ); /* * Test the input parameters */ info = 0; if( nprow == -1 ) info = -(500+CTXT_+1); else { pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx, &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info ); pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy, &iyrow, &iycol, nprow, npcol, myrow, mycol, &info ); if( info == 0 ) { if( *n != 1 ) { if( *incx == desc_X[M_] ) { /* X is distributed along a process row */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( ixcol != iycol ) || ( ( (*jx-1) % desc_X[NB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) ) info = -9; else if( desc_Y[NB_] != desc_X[NB_] ) info = -(1000+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) info = -8; else if( desc_Y[MB_] != desc_X[NB_] ) info = -(1000+MB_+1); } else { info = -11; } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed along a process column */ if( *incy == desc_Y[M_] ) { /* Y is distributed over a process row */ if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) ) info = -9; else if( desc_Y[NB_] != desc_X[MB_] ) info = -(1000+NB_+1); } else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* Y is distributed over a process column */ if( ( ixrow != iyrow ) || ( ( (*ix-1) % desc_X[MB_] ) != ( (*iy-1) % desc_Y[MB_] ) ) ) info = -8; else if( desc_Y[MB_] != desc_X[MB_] ) info = -(1000+MB_+1); } else { info = -11; } } else { info = -6; } } if( ictxt != desc_Y[CTXT_] ) info = -(1000+CTXT_+1); } } if( info ) { PB_Cabort( ictxt, "PCROT", info ); return; } /* if( info ) { pberror_( &ictxt, "PCROT", &info ); return; } */ /* * Quick return if possible. */ zero.re = ZERO; zero.im = ZERO; if( *n == 0 ) return; /* * rotation */ if( *n == 1 ) { if( ( myrow == ixrow ) && ( mycol == ixcol ) ) { buff = &X[iix-1+(jjx-1)*desc_X[LLD_]]; if( ( myrow != iyrow ) || ( mycol != iycol ) ) { cgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol ); cgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol ); } else *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]]; crot_( n, buff, n, ywork, n, c, s ); X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff; if( ( myrow == iyrow ) && ( mycol == iycol ) ) Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork; } else if( ( myrow == iyrow ) && ( mycol == iycol ) ) { cgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, &ixrow, &ixcol ); cgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol ); crot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s ); } return; } if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) ) { /* X and Y are both distributed over a process row */ nz = (*jx-1) % desc_Y[NB_]; nn = *n + nz; nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol ); if( mycol == ixcol ) nq -= nz; if( ixrow == iyrow ) { if( myrow == ixrow ) { crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s ); } } else { if( myrow == ixrow ) { cgesd2d_( &ictxt, &ione, &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &mycol ); buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) ); cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &iyrow, &mycol ); crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], buff, &ione, c, s ); } else if( myrow == iyrow ) { cgesd2d_( &ictxt, &ione, &nq, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &mycol ); buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) ); cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &ixrow, &mycol ); crot_( &nq, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s ); } } } else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) && ( *incy == 1 ) && ( *incy != desc_Y[M_] ) ) { /* X and Y are both distributed over a process column */ nz = (*ix-1) % desc_X[MB_]; nn = *n + nz; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); if( myrow == ixrow ) np -= nz; if( ixcol == iycol ) { if( mycol == ixcol ) { crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } } else { if( mycol == ixcol ) { cgesd2d_( &ictxt, &np, &ione, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &myrow, &iycol ); buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) ); cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &iycol ); crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione, c, s ); } else if( mycol == iycol ) { cgesd2d_( &ictxt, &np, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &myrow, &ixcol ); buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) ); cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &ixcol ); crot_( &np, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } } } else /* X and Y are not distributed along the same direction */ { lcm = ilcm_( &nprow, &npcol ); if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) ) { /* X is distributed over a process column */ lcmp = lcm / nprow; nz = (*jy-1) % desc_Y[NB_]; nn = *n + nz; tmp1 = nn / desc_Y[MB_]; np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow ); tmp1 = np0 / desc_X[MB_]; wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp ); wksz = np + wksz; buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) ); if( mycol == iycol ) jjy -= nz; if( myrow == ixrow ) np -= nz; pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol, &ixrow, &ixcol, buff+np ); if( mycol == ixcol ) { crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx, buff, &ione, c, s ); } pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_Y[NB_], &nz, buff, &ione, &zero, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], &ixrow, &ixcol, &iyrow, &iycol, buff+np ); } else /* Y is distributed over a process column */ { lcmp = lcm / nprow; nz = (*jx-1) % desc_X[NB_]; nn = *n + nz; tmp1 = nn / desc_X[MB_]; np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow ); np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow ); tmp1 = np0 / desc_Y[MB_]; wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp ); wksz = np + wksz; buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) ); if( myrow == iyrow ) np -= nz; pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol, &iyrow, &iycol, buff+np ); if( mycol == iycol ) { crot_( &np, buff, &ione, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s ); } pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n, &desc_X[NB_], &nz, buff, &ione, &zero, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_], &iyrow, &iycol, &ixrow, &ixcol, buff+np ); } } }