[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: ATLAS code freeze



Hi Clint!  Almost done with the L2.  Found a few possible issues with
the ger tester.

1)  Don't you need the following patch to r1test.c?

--- ../r1test.c~	Thu Nov 16 20:30:53 2000
+++ ../r1test.c	Sat Dec  2 22:10:52 2000
@@ -106,7 +106,7 @@
    Mjoin(PATL,gegen)(M, 1, X, M, N*aincY+127*50+77);
    Mjoin(PATL,gegen)(M, N, A0, M, N*M+513*7+90);
    Mjoin(PATL,gegen)(M, N, A, lda, N*M+513*7+90);
-   if (incY < 0) Y += (N-1) * aincY;
+   if (incY < 0) Y += (N-1) * aincY * ATL_sizeof / sizeof(*Y);
 
 #ifdef TCPLX
    if (CONJ)
@@ -117,7 +117,7 @@
 #endif
    dumb_ger(CONJ, M, N, one, X, 1, Y, incY, A0, M);
 
-   if (incY < 0) Y -= (N-1) * aincY;
+   if (incY < 0) Y -= (N-1) * aincY * ATL_sizeof / sizeof(*Y);
    free(Y);
    free(X);
    ierr = CheckAns(M, N, A0, M, A, lda);

2)  Without this, the tester runs over the very large numbers you
    filled in the gaps, presumably to spot errors more readily.  It
    revealed, however, (I think), an issue with the 'epsilon' used to
    determine whether the test routine and the reference agree.  Here
    is a small program which gives a small discrepancy between the
    correct SSE and normal fpu, but which is (again I think) within
    normal floating point tolerance:

=============================================================================
t.c
=============================================================================
#include <string.h>

int 
main() {

  int i;
  float m2[4]={-2.56e9,-2.56e9,-2.56e9,-2.56e9};
  float m3[4]={-2.56e9,2.56e9,-2.56e9,2.56e9};
  float m6[4]={ 0.285754889,  0.403641045,  0.302219868, -0.272048146};
  float m0[4]={ -0.108218066,  0.403083593,  -0.279999077,  0.183685213};
  float mt[4];

  memcpy(mt,m0,sizeof(mt));

  __asm__ __volatile__ (
			"movl %0,%%esi\n\t"
			"movups (%%esi),%%xmm2\n\t"
			"movl %1,%%esi\n\t"
			"movups (%%esi),%%xmm3\n\t"
			"movl %2,%%esi\n\t"
			"movups (%%esi),%%xmm6\n\t"
			"movl %3,%%esi\n\t"
			"movups (%%esi),%%xmm0\n\t"
			
			"movaps %%xmm6,%%xmm1\n\t"
			"mulps %%xmm2,%%xmm1\n\t"
			"movaps %%xmm1,%%xmm7\n\t"

			"movaps %%xmm6,%%xmm1\n\t"
			"mulps %%xmm3,%%xmm1\n\t"
			"shufps $177,%%xmm1,%%xmm1\n\t"
			"addps %%xmm1,%%xmm7\n\t"
			
			"addps %%xmm7,%%xmm0\n\t"
			"movups %%xmm0,(%%esi)\n\t"

			::"m" (m2),"m" (m3),"m"(m6),"m" (m0):"si");

  printf("%f %f %f %f\n",m0[0],m0[1],m0[2],m0[3]);

  mt[0]+=m2[0]*m6[0]+m3[1]*m6[1];
  mt[1]+=m2[1]*m6[1]+m3[0]*m6[0];
  mt[2]+=m2[2]*m6[2]+m3[3]*m6[3];
  mt[3]+=m2[3]*m6[3]+m3[2]*m6[2];

  
  printf("%f %f %f %f\n",mt[0],mt[1],mt[2],mt[3]);

}
=============================================================================
intech101:/mnt/i19/f/debian/mm/atlas/tmp/atlas-3.1.4D/tune/blas/ger/Linux_fpic$ cc -g t.c -o t 
intech101:/mnt/i19/f/debian/mm/atlas/tmp/atlas-3.1.4D/tune/blas/ger/Linux_fpic$ t
301788544.000000 -1764853632.000000 -1470126080.000000 -77239616.000000
301788544.000000 -1764853632.000000 -1470126080.000000 -77239608.000000
=============================================================================


Take care, 
-- 
Camm Maguire			     			camm@enhanced.com
==========================================================================
"The earth is but one country, and mankind its citizens."  --  Baha'u'llah