[PD-cvs] pd/src d_array.c,1.1.1.3.2.6,1.1.1.3.2.7 d_global.c,1.1.1.2.8.8,1.1.1.2.8.9 m_pd.h,1.1.1.4.2.21,1.1.1.4.2.22 m_simd.c,1.1.2.1,1.1.2.2 m_simd_def.h,1.1.2.7,1.1.2.8 m_simd_sse_gcc.c,1.1.2.8,1.1.2.9 m_simd_sse_gcc.h,1.1.2.14,1.1.2.15 m_simd_sse_vc.c,1.1.2.8,1.1.2.9 m_simd_sse_vc.h,1.1.2.8,1.1.2.9 m_simd_ve_gcc.c,1.1.2.7,1.1.2.8 m_simd_ve_gcc.h,1.1.2.8,1.1.2.9

Thu Oct 14 13:01:37 CEST 2004

Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15939

Modified Files:
      Tag: devel_0_37
	d_array.c d_global.c m_pd.h m_simd.c m_simd_def.h 
	m_simd_sse_gcc.c m_simd_sse_gcc.h m_simd_sse_vc.c 
	m_simd_sse_vc.h m_simd_ve_gcc.c m_simd_ve_gcc.h 
Log Message:
exporting basic simd functions

Index: m_simd.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd.c,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -C2 -d -r1.1.2.1 -r1.1.2.2
*** m_simd.c	6 Oct 2004 18:31:18 -0000	1.1.2.1
--- m_simd.c	14 Oct 2004 11:01:34 -0000	1.1.2.2
***************
*** 50,51 ****
--- 50,107 ----
  	}
  }
+ 
+ int simd_check1(t_int n, t_float* ptr1)
+ {
+ 	return SIMD_CHECK1(n,ptr1);
+ }
+ 
+ int simd_check2(t_int n, t_float* ptr1, t_float* ptr2)
+ {
+ 	return SIMD_CHECK2(n,ptr1,ptr2);
+ }
+ 
+ int simd_check3(t_int n, t_float* ptr1, t_float* ptr2, t_float* ptr3)
+ {
+ 	return SIMD_CHECK3(n,ptr1,ptr2,ptr3);
+ }
+ 
+ 
+ 
+ #ifdef DONTUSESIMD
+ t_int simd_runtime_check()
+ {
+ 	return 0;
+ }
+ 
+ /* tb: wrapper for simd functions */
+ void zerovec_simd(t_float *dst,int n)
+ {
+ 	zerovec_8(dst,n);
+ }
+ 
+ void setvec_simd(t_float *dst,t_float v,int n)
+ {
+ 	setvec_8(dst,v,n);
+ }
+ 
+ void copyvec_simd(t_float *dst,const t_float *src,int n)
+ {
+ 	copyvec_8(dst,src,n);
+ }
+ 
+ void addvec_simd(t_float *dst,const t_float *src,int n)
+ {
+ 	addvec_8(dst,src,n);
+ }
+ 
+ void testcopyvec_simd(t_float *dst,const t_float *src,int n)
+ {
+ 	testcopyvec_8(dst,src,n);
+ }
+ 
+ void testaddvec_simd(t_float *dst,const t_float *src,int n)
+ {
+ 	testaddvec_8(dst,src,n);
+ }
+ 
+ #endif /* DONTUSESIMD */

Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** m_simd_sse_gcc.c	9 Oct 2004 16:34:22 -0000	1.1.2.8
--- m_simd_sse_gcc.c	14 Oct 2004 11:01:34 -0000	1.1.2.9
***************
*** 10,145 ****

! /* TB: adapted from the xxx_sse_vc routines */

  /* dst is assumed to be aligned */
! void zerovec_sse_gcc(t_float *dst,int n)
  {
      asm(
! 	".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 	"xorps     %%xmm0, %%xmm0                  \n" /* zero value */
! 	"shr       $4, %0                          \n"
! 
! 	/* should we do more loop unrolling? */
! 	/* *dst = 0 */
! 	"1:                                        \n"
! 	"movaps    %%xmm0, (%1)                    \n"
! 	"movaps    %%xmm0, 4*T_FLOAT(%1)           \n"
! 	"movaps    %%xmm0, 8*T_FLOAT(%1)           \n"
! 	"movaps    %%xmm0, 12*T_FLOAT(%1)          \n"
! 
! 	"addl      $16*T_FLOAT,%1                  \n"
! 	"loop      1b                              \n"
! 	:
! 	:"c"(n),"r"(dst)
! 	:"%xmm0");
  }

  /* dst is assumed to be aligned */
! void setvec_sse_gcc(t_float *dst,t_float v,int n)
  {
      asm(
! 	".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 	"shufps    $0,%2,%2                        \n" /* load value */
! 	"shr       $4,%0                           \n"
! 
! 	/* should we do more loop unrolling? */
! 	/* *dst = v */
! 	"1:                                        \n"
! 	"movaps    %2, (%1)                        \n"
! 	"movaps    %2, 4*T_FLOAT(%1)               \n"
! 	"movaps    %2, 8*T_FLOAT(%1)               \n"
! 	"movaps    %2, 12*T_FLOAT(%1)              \n"
! 
! 	"addl      $16*T_FLOAT,%1                  \n"
! 	"loop      1b                              \n"
! 	:
! 	:"c"(n),"r"(dst),"x"((t_float)v)
! 	);
  }

  /* dst and src are assumed to be aligned */
! void copyvec_sse_gcc(t_float *dst,const t_float *src,int n)
  {
      asm(
! 	".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 	"shr       $4, %0                          \n"
! 
! 	/* loop: *dst = *src */
! 	"1:                                        \n"
! 	"movaps    (%1), %%xmm0                    \n"
! 	"movaps    4*T_FLOAT(%1), %%xmm1           \n"
! 	"movaps    8*T_FLOAT(%1), %%xmm2           \n"
! 	"movaps    12*T_FLOAT(%1), %%xmm3          \n"
! 	"movaps    %%xmm0, (%2)                    \n"
! 	"movaps    %%xmm1, 4*T_FLOAT(%2)           \n"
! 	"movaps    %%xmm2, 8*T_FLOAT(%2)           \n"
! 	"movaps    %%xmm3, 12*T_FLOAT(%2)          \n"
! 
! 	"addl      $16*T_FLOAT,%1                  \n"
! 	"addl      $16*T_FLOAT,%2                  \n"
! 	"loop      1b                              \n"
! 	:
! 	:"c"(n),"r"(src),"r"(dst)
! 	:"%xmm0","%xmm1","%xmm2","%xmm3");
  }

  /* dst and src are assumed to be aligned */
! void addvec_sse_gcc(t_float *dst,const t_float *src,int n)
  {
      asm(
! 	".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 	"shr       $4, %0                          \n"
! 
! 	/* loop: *dst += *src */
! 	"1:                                        \n"
! 	"movaps    (%2,%3),%%xmm0                  \n"
! 	"movaps    (%1,%3),%%xmm1                  \n"
! 	"addps     %%xmm0,%%xmm1                   \n"
! 	"movaps    %%xmm0,(%2,%3)                  \n"

! 	"movaps    4*T_FLOAT(%2,%3),%%xmm0         \n"
! 	"movaps    4*T_FLOAT(%1,%3),%%xmm1         \n"
! 	"addps     %%xmm0,%%xmm1                   \n"
! 	"movaps    %%xmm0,4*T_FLOAT(%2,%3)         \n"

- 	"movaps    8*T_FLOAT(%2,%3),%%xmm0         \n"
- 	"movaps    8*T_FLOAT(%1,%3),%%xmm1         \n"
- 	"addps     %%xmm0,%%xmm1                   \n"
- 	"movaps    %%xmm0,8*T_FLOAT(%2,%3)         \n"

! 	"movaps    12*T_FLOAT(%2,%3),%%xmm0        \n"
! 	"movaps    12*T_FLOAT(%1,%3),%%xmm1        \n"
! 	"addps     %%xmm0,%%xmm1                   \n"
! 	"movaps    %%xmm0,12*T_FLOAT(%2,%3)        \n"

! 	"addl      $16*T_FLOAT,%3                  \n"
! 	"loop      1b                              \n"
! 	:
! 	: "c"(n),"r"(src),"r"(dst),"r"(0)
! 	: "%xmm0","%xmm1","%xmm2","%xmm3","%xmm4","%xmm5","%xmm6","%xmm7");
  }

! t_int *zero_perf_sse_gcc(t_int *w)
  {
!     zerovec_sse_gcc((t_float *)w[1],w[2]);
      return w+3;
  }

! t_int *copy_perf_sse_gcc(t_int *w)
  {
!     copyvec_sse_gcc((t_float *)w[2],(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *sig_tilde_perf_sse_gcc(t_int *w)
  {
!     setvec_sse_gcc((t_float *)w[2],*(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *plus_perf_sse_gcc (t_int * w)
  {
      asm(
--- 10,192 ----

! /* TB: adapted from thomas' vc routines */

  /* dst is assumed to be aligned */
! void zerovec_simd(t_float *dst,int n)
  {
      asm(
! 		".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 		"xorps     %%xmm0, %%xmm0                  \n" /* zero value */
! 		"shr       $4, %0                          \n"
! 		
! 		/* should we do more loop unrolling? */
! 		/* *dst = 0 */
! 		"1:                                        \n"
! 		"movaps    %%xmm0, (%1)                    \n"
! 		"movaps    %%xmm0, 4*T_FLOAT(%1)           \n"
! 		"movaps    %%xmm0, 8*T_FLOAT(%1)           \n"
! 		"movaps    %%xmm0, 12*T_FLOAT(%1)          \n"
! 		
! 		"addl      $16*T_FLOAT,%1                  \n"
! 		"loop      1b                              \n"
! 		:
! 		:"c"(n),"r"(dst)
! 		:"%xmm0");
  }

  /* dst is assumed to be aligned */
! void setvec_simd(t_float *dst,t_float v,int n)
  {
      asm(
! 		".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 		"shufps    $0,%2,%2                        \n" /* load value */
! 		"shr       $4,%0                           \n"
! 		
! 		/* should we do more loop unrolling? */
! 		/* *dst = v */
! 		"1:                                        \n"
! 		"movaps    %2, (%1)                        \n"
! 		"movaps    %2, 4*T_FLOAT(%1)               \n"
! 		"movaps    %2, 8*T_FLOAT(%1)               \n"
! 		"movaps    %2, 12*T_FLOAT(%1)              \n"
! 		
! 		"addl      $16*T_FLOAT,%1                  \n"
! 		"loop      1b                              \n"
! 		:
! 		:"c"(n),"r"(dst),"x"((t_float)v)
! 		);
  }

  /* dst and src are assumed to be aligned */
! void copyvec_simd(t_float *dst,const t_float *src,int n)
  {
      asm(
! 		".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 		"shr       $4, %0                          \n"
! 		
! 		/* loop: *dst = *src */
! 		"1:                                        \n"
! 		"movaps    (%1), %%xmm0                    \n"
! 		"movaps    4*T_FLOAT(%1), %%xmm1           \n"
! 		"movaps    8*T_FLOAT(%1), %%xmm2           \n"
! 		"movaps    12*T_FLOAT(%1), %%xmm3          \n"
! 		"movaps    %%xmm0, (%2)                    \n"
! 		"movaps    %%xmm1, 4*T_FLOAT(%2)           \n"
! 		"movaps    %%xmm2, 8*T_FLOAT(%2)           \n"
! 		"movaps    %%xmm3, 12*T_FLOAT(%2)          \n"
! 		
! 		"addl      $16*T_FLOAT,%1                  \n"
! 		"addl      $16*T_FLOAT,%2                  \n"
! 		"loop      1b                              \n"
! 		:
! 		:"c"(n),"r"(src),"r"(dst)
! 		:"%xmm0","%xmm1","%xmm2","%xmm3");
  }

  /* dst and src are assumed to be aligned */
! void addvec_simd(t_float *dst,const t_float *src,int n)
  {
      asm(
! 		".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 		"shr       $4, %0                          \n"
! 		
! 		/* loop: *dst += *src */
! 		"1:                                        \n"
! 		"movaps    (%2,%3),%%xmm0                  \n"
! 		"movaps    (%1,%3),%%xmm1                  \n"
! 		"addps     %%xmm0,%%xmm1                   \n"
! 		"movaps    %%xmm0,(%2,%3)                  \n"
! 		
! 		"movaps    4*T_FLOAT(%2,%3),%%xmm0         \n"
! 		"movaps    4*T_FLOAT(%1,%3),%%xmm1         \n"
! 		"addps     %%xmm0,%%xmm1                   \n"
! 		"movaps    %%xmm0,4*T_FLOAT(%2,%3)         \n"
! 		
! 		"movaps    8*T_FLOAT(%2,%3),%%xmm0         \n"
! 		"movaps    8*T_FLOAT(%1,%3),%%xmm1         \n"
! 		"addps     %%xmm0,%%xmm1                   \n"
! 		"movaps    %%xmm0,8*T_FLOAT(%2,%3)         \n"
! 		
! 		"movaps    12*T_FLOAT(%2,%3),%%xmm0        \n"
! 		"movaps    12*T_FLOAT(%1,%3),%%xmm1        \n"
! 		"addps     %%xmm0,%%xmm1                   \n"
! 		"movaps    %%xmm0,12*T_FLOAT(%2,%3)        \n"
! 		
! 		"addl      $16*T_FLOAT,%3                  \n"
! 		"loop      1b                              \n"
! 		:
! 		: "c"(n),"r"(src),"r"(dst),"r"(0)
! 		: "%xmm0","%xmm1","%xmm2","%xmm3","%xmm4","%xmm5","%xmm6","%xmm7");
! }

! /* dst is assumed to be aligned */
! void testvec_simd(t_float *dst,t_float v,int n)
! {
!     asm(
! 		".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 		"shufps    $0,%2,%2                        \n" /* load value */
! 		"shr       $4,%0                           \n"
! 		
! 		/* should we do more loop unrolling? */
! 		/* *dst = v */
! 		"1:                                        \n"
! 		"movaps    %2, (%1)                        \n"
! 		"movaps    %2, 4*T_FLOAT(%1)               \n"
! 		"movaps    %2, 8*T_FLOAT(%1)               \n"
! 		"movaps    %2, 12*T_FLOAT(%1)              \n"
! 		
! 		"addl      $16*T_FLOAT,%1                  \n"
! 		"loop      1b                              \n"
! 		:
! 		:"c"(n),"r"(dst),"x"((t_float)v)
! 		);
! }

! /*
!  * if we switch on DAZ, we shouldn't have problems with denormals 
!  * any more ... tb
!  */
! void testcopyvec_simd(t_float *dst,const t_float *src,int n)
! {
! #ifdef DAZ
! 	copyvec_simd(dst,src,n);
! #else
! 	testcopyvec_8(dst,src,n); /* SIMD not implemented */
! #endif
! }

! void testaddvec_simd(t_float *dst,const t_float *src,int n)
! {
! #ifdef DAZ
! 	addvec_simd(dst,src,n);
! #else
! 	testaddvec_8(dst,src,n); /* SIMD not implemented */
! #endif
  }

! 
! t_int *zero_perf_simd(t_int *w)
  {
!     zerovec_simd((t_float *)w[1],w[2]);
      return w+3;
  }

! t_int *copy_perf_simd(t_int *w)
  {
!     copyvec_simd((t_float *)w[2],(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *sig_tilde_perf_simd(t_int *w)
  {
!     setvec_simd((t_float *)w[2],*(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *plus_perf_simd (t_int * w)
  {
      asm(
***************
*** 181,185 ****

! t_int *scalarplus_perf_sse_gcc(t_int *w)
  {
      asm(
--- 228,232 ----

! t_int *scalarplus_perf_simd(t_int *w)
  {
      asm(
***************
*** 218,222 ****
  }

! t_int *minus_perf_sse_gcc(t_int *w)
  {
      asm(
--- 265,269 ----
  }

! t_int *minus_perf_simd(t_int *w)
  {
      asm(
***************
*** 257,261 ****
  }

! t_int* scalarminus_perf_sse_gcc(t_int *w)
  {
      asm(
--- 304,308 ----
  }

! t_int* scalarminus_perf_simd(t_int *w)
  {
      asm(
***************
*** 295,299 ****

! t_int *times_perf_sse_gcc(t_int *w)
  {
      asm(
--- 342,346 ----

! t_int *times_perf_simd(t_int *w)
  {
      asm(
***************
*** 334,338 ****
  }

! t_int* scalartimes_perf_sse_gcc(t_int *w)
  {
      asm(
--- 381,385 ----
  }

! t_int* scalartimes_perf_simd(t_int *w)
  {
      asm(
***************
*** 371,375 ****
  }

! t_int *sqr_perf_sse_gcc(t_int *w)
  {
      asm(
--- 418,422 ----
  }

! t_int *sqr_perf_simd(t_int *w)
  {
      asm(
***************
*** 407,411 ****

! t_int* over_perf_sse_gcc(t_int * w)
  {
      asm(
--- 454,458 ----

! t_int* over_perf_simd(t_int * w)
  {
      asm(
***************
*** 446,450 ****
  }

! t_int* scalarover_perf_sse_gcc(t_int *w)
  {
      asm(
--- 493,497 ----
  }

! t_int* scalarover_perf_simd(t_int *w)
  {
      asm(
***************
*** 484,488 ****

! t_int* min_perf_sse_gcc(t_int * w)
  {
      asm(
--- 531,535 ----

! t_int* min_perf_simd(t_int * w)
  {
      asm(
***************
*** 524,528 ****

! t_int* scalarmin_perf_sse_gcc(t_int *w)
  {
      asm(
--- 571,575 ----

! t_int* scalarmin_perf_simd(t_int *w)
  {
      asm(
***************
*** 562,566 ****

! t_int* max_perf_sse_gcc(t_int * w)
  {
      asm(
--- 609,613 ----

! t_int* max_perf_simd(t_int * w)
  {
      asm(
***************
*** 602,611 ****

! t_int* scalarmax_perf_sse_gcc(t_int *w)
  {
      asm(
  	".set T_FLOAT,4                            \n"

!         "shufps    $0, %1, %1                      \n"
  	"shrl      $4, %3                          \n" /* divide by 16 */

--- 649,658 ----

! t_int* scalarmax_perf_simd(t_int *w)
  {
      asm(
  	".set T_FLOAT,4                            \n"

!     "shufps    $0, %1, %1                      \n"
  	"shrl      $4, %3                          \n" /* divide by 16 */

Index: m_simd_ve_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.c,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** m_simd_ve_gcc.c	7 Oct 2004 17:56:38 -0000	1.1.2.7
--- m_simd_ve_gcc.c	14 Oct 2004 11:01:34 -0000	1.1.2.8
***************
*** 39,43 ****
  #define LoadValue(where) LoadUnaligned((const float *)(where))

! void zerovec_ve_gcc(t_float *dst,int n)
  {
  	const vector float zero = (vector float)(0);
--- 39,43 ----
  #define LoadValue(where) LoadUnaligned((const float *)(where))

! void zerovec_simd(t_float *dst,int n)
  {
  	const vector float zero = (vector float)(0);
***************
*** 50,54 ****
  }

! void setvec_ve_gcc(t_float *dst,t_float v,int n)
  {
  	const vector float arg = LoadValue(&v);
--- 50,54 ----
  }

! void setvec_simd(t_float *dst,t_float v,int n)
  {
  	const vector float arg = LoadValue(&v);
***************
*** 61,65 ****
  }

! void copyvec_ve_gcc(t_float *dst,const t_float *src,int n)
  {
  	for(n >>= 4; n--; src += 16,dst += 16) {
--- 61,65 ----
  }

! void copyvec_simd(t_float *dst,const t_float *src,int n)
  {
  	for(n >>= 4; n--; src += 16,dst += 16) {
***************
*** 75,79 ****
  }

! void addvec_ve_gcc(t_float *dst,const t_float *src,int n)
  {
  #ifdef USEVECLIB
--- 75,79 ----
  }

! void addvec_simd(t_float *dst,const t_float *src,int n)
  {
  #ifdef USEVECLIB
***************
*** 99,121 ****
  }

! t_int *zero_perf_ve_gcc(t_int *w)
  {
!     zerovec_ve_gcc((t_float *)w[1],w[2]);
      return w+3;
  }

! t_int *copy_perf_ve_gcc(t_int *w)
  {
!     copyvec_ve_gcc((t_float *)w[2],(const t_float *)w[1],w[3]);
  	return w+4;
  }

! t_int *sig_tilde_perf_ve_gcc(t_int *w)
  {
!     setvec_ve_gcc((t_float *)w[2],*(const t_float *)w[1],w[3]);
  	return w+4;
  }

! t_int *plus_perf_ve_gcc(t_int *w)
  {
  #ifdef USEVECLIB
--- 99,132 ----
  }

! /* no bad float testing for PPC! */
! void testcopyvec_simd(t_float *dst,const t_float *src,int n)
  {
! 	copyvec_simd(dst,src,n);
! }
! 
! void testaddvec_simd(t_float *dst,const t_float *src,int n)
! {
! 	addvec_simd(dst,src,n);
! }
! 
! t_int *zero_perf_simd(t_int *w)
! {
!     zerovec_simd((t_float *)w[1],w[2]);
      return w+3;
  }

! t_int *copy_perf_simd(t_int *w)
  {
!     copyvec_simd((t_float *)w[2],(const t_float *)w[1],w[3]);
  	return w+4;
  }

! t_int *sig_tilde_perf_simd(t_int *w)
  {
!     setvec_simd((t_float *)w[2],*(const t_float *)w[1],w[3]);
  	return w+4;
  }

! t_int *plus_perf_simd(t_int *w)
  {
  #ifdef USEVECLIB
***************
*** 147,151 ****
  }

! t_int *scalarplus_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 158,162 ----
  }

! t_int *scalarplus_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 173,177 ****
  }

! t_int *minus_perf_ve_gcc(t_int *w)
  {
  #if 0 //def USEVECLIB
--- 184,188 ----
  }

! t_int *minus_perf_simd(t_int *w)
  {
  #if 0 //def USEVECLIB
***************
*** 204,208 ****
  }

! t_int *scalarminus_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 215,219 ----
  }

! t_int *scalarminus_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 230,234 ****
  }

! t_int *times_perf_ve_gcc(t_int *w)
  {
  #ifdef USEVECLIB
--- 241,245 ----
  }

! t_int *times_perf_simd(t_int *w)
  {
  #ifdef USEVECLIB
***************
*** 261,265 ****
  }

! t_int *scalartimes_perf_ve_gcc(t_int *w)
  {
  #ifdef USEVECLIB
--- 272,276 ----
  }

! t_int *scalartimes_perf_simd(t_int *w)
  {
  #ifdef USEVECLIB
***************
*** 292,296 ****
  }

! t_int *sqr_perf_ve_gcc(t_int *w)
  {
  #ifdef USEVECLIB
--- 303,307 ----
  }

! t_int *sqr_perf_simd(t_int *w)
  {
  #ifdef USEVECLIB
***************
*** 322,326 ****
  }

! t_int *over_perf_ve_gcc(t_int *w)
  {
      const t_float *src1 = (const t_float *)w[1];
--- 333,337 ----
  }

! t_int *over_perf_simd(t_int *w)
  {
      const t_float *src1 = (const t_float *)w[1];
***************
*** 384,388 ****
  }

! t_int *scalarover_perf_ve_gcc(t_int *w)
  {
      t_float *dst = (t_float *)w[3];
--- 395,399 ----
  }

! t_int *scalarover_perf_simd(t_int *w)
  {
      t_float *dst = (t_float *)w[3];
***************
*** 432,436 ****
  }

! t_int *min_perf_ve_gcc(t_int *w)
  {
      const t_float *src1 = (const t_float *)w[1];
--- 443,447 ----
  }

! t_int *min_perf_simd(t_int *w)
  {
      const t_float *src1 = (const t_float *)w[1];
***************
*** 458,462 ****
  }

! t_int *scalarmin_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 469,473 ----
  }

! t_int *scalarmin_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 484,488 ****
  }

! t_int *max_perf_ve_gcc(t_int *w)
  {
      const t_float *src1 = (const t_float *)w[1];
--- 495,499 ----
  }

! t_int *max_perf_simd(t_int *w)
  {
      const t_float *src1 = (const t_float *)w[1];
***************
*** 510,514 ****
  }

! t_int *scalarmax_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 521,525 ----
  }

! t_int *scalarmax_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 536,540 ****
  }

! t_int *clip_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 547,551 ----
  }

! t_int *clip_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 588,592 ****
  }

! t_int *sigwrap_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 599,603 ----
  }

! t_int *sigwrap_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 613,617 ****
  }

! t_int *sigsqrt_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 624,628 ----
  }

! t_int *sigsqrt_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 669,673 ****

  /* Attention: there's a difference to sigsqrt_perform which delivers non-zero for a zero input... i don't think the latter is intended... */
! t_int *sigrsqrt_perf_ve_gcc(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
--- 680,684 ----

  /* Attention: there's a difference to sigsqrt_perform which delivers non-zero for a zero input... i don't think the latter is intended... */
! t_int *sigrsqrt_perf_simd(t_int *w)
  {
      const t_float *src = (const t_float *)w[1];
***************
*** 728,730 ****
--- 739,747 ----
  }

+ t_int simd_runtime_check()
+ {
+ 	return 1;
+ }
+ 
+ 
  #endif

Index: m_pd.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/m_pd.h,v
retrieving revision 1.1.1.4.2.21
retrieving revision 1.1.1.4.2.22
diff -C2 -d -r1.1.1.4.2.21 -r1.1.1.4.2.22
*** m_pd.h	10 Oct 2004 18:16:31 -0000	1.1.1.4.2.21
--- m_pd.h	14 Oct 2004 11:01:33 -0000	1.1.1.4.2.22
***************
*** 577,580 ****
--- 577,608 ----
  /* } IOhannes */

+ 
+ /* tb: exporting basic simd coded dsp functions { */
+ 
+ /* vectorized, not simd functions*/
+ /* EXTERN void zerovec_8(t_float *dst,int n); */
+ /* EXTERN void setvec_8(t_float *dst,t_float v,int n); */
+ /* EXTERN void copyvec_8(t_float *dst,const t_float *src,int n); */
+ /* EXTERN void addvec_8(t_float *dst,const t_float *src,int n); */
+ /* EXTERN void testcopyvec_8(t_float *dst,const t_float *src,int n); */
+ /* EXTERN void testaddvec_8(t_float *dst,const t_float *src,int n); */
+ 
+ /* vectorized, simd functions *
+  * dst and src are assumed to be aligned */
+ /* EXTERN t_int simd_runtime_check(void); */
+ /* EXTERN void zerovec_simd(t_float *dst,int n); */
+ /* EXTERN void setvec_simd(t_float *dst,t_float v,int n); */
+ /* EXTERN void copyvec_simd(t_float *dst,const t_float *src,int n); */
+ /* EXTERN void addvec_simd(t_float *dst,const t_float *src,int n); */
+ /* EXTERN void testcopyvec_simd(t_float *dst,const t_float *src,int n); */
+ /* EXTERN void testaddvec_simd(t_float *dst,const t_float *src,int n); */
+ 
+ EXTERN int simd_check1(t_int n, t_float* ptr1);
+ EXTERN int simd_check2(t_int n, t_float* ptr1, t_float* ptr2);
+ EXTERN int simd_check3(t_int n, t_float* ptr1, t_float* ptr2, t_float* ptr3);
+ 
+ /* } tb */
+ 
+ 
  /* ----------------------- utility functions for signals -------------- */
  EXTERN float mtof(float);

Index: m_simd_sse_vc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.c,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** m_simd_sse_vc.c	7 Oct 2004 17:56:38 -0000	1.1.2.8
--- m_simd_sse_vc.c	14 Oct 2004 11:01:34 -0000	1.1.2.9
***************
*** 10,14 ****

  /* dst is assumed to be aligned */
! void zerovec_sse_vc(t_float *dst,int n)
  {
  	__asm {
--- 10,14 ----

  /* dst is assumed to be aligned */
! void zerovec_simd(t_float *dst,int n)
  {
  	__asm {
***************
*** 33,37 ****

  /* dst is assumed to be aligned */
! void setvec_sse_vc(t_float *dst,t_float v,int n)
  {
  	__asm {
--- 33,37 ----

  /* dst is assumed to be aligned */
! void setvec_simd(t_float *dst,t_float v,int n)
  {
  	__asm {
***************
*** 59,63 ****

  /* dst and src are assumed to be aligned */
! void copyvec_sse_vc(t_float *dst,const t_float *src,int n)
  {
  	__asm {
--- 59,63 ----

  /* dst and src are assumed to be aligned */
! void copyvec_simd(t_float *dst,const t_float *src,int n)
  {
  	__asm {
***************
*** 92,96 ****
  }

! void addvec_sse_vc(t_float *dst,const t_float *src,int n)
  {
  	__asm {
--- 92,96 ----
  }

! void addvec_simd(t_float *dst,const t_float *src,int n)
  {
  	__asm {
***************
*** 139,180 ****
  }

! void testcopyvec(t_float *dst,const t_float *src,int n)
  {
!     while(n--) {
!         *dst = (PD_BIGORSMALL(*src) ? 0 : *src);
! 	    dst++;
! 	    src++;
! 	}
  }

! void testaddvec(t_float *dst,const t_float *src,int n)
  {
!     while(n--) {
!         *dst += (PD_BIGORSMALL(*src) ? 0 : *src);
! 	    dst++;
! 	    src++;
! 	}
  }

! t_int *zero_perf_sse_vc(t_int *w)
  {
!     zerovec_sse_vc((t_float *)w[1],w[2]);
      return w+3;
  }

! t_int *copy_perf_sse_vc(t_int *w)
  {
!     copyvec_sse_vc((t_float *)w[2],(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *sig_tilde_perf_sse_vc(t_int *w)
  {
!     setvec_sse_vc((t_float *)w[2],*(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *plus_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 139,172 ----
  }

! void testcopyvec_simd(t_float *dst,const t_float *src,int n)
  {
! 	testcopyvec_8(dst,src,n)
  }

! void testaddvec_simd(t_float *dst,const t_float *src,int n)
  {
! 	testaddvec_8(dst,src,n)
  }

! t_int *zero_perf_simd(t_int *w)
  {
!     zerovec_simd((t_float *)w[1],w[2]);
      return w+3;
  }

! t_int *copy_perf_simd(t_int *w)
  {
!     copyvec_simd((t_float *)w[2],(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *sig_tilde_perf_simd(t_int *w)
  {
!     setvec_simd((t_float *)w[2],*(const t_float *)w[1],w[3]);
      return w+4;
  }

! t_int *plus_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 228,232 ****
  }

! t_int *scalarplus_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 220,224 ----
  }

! t_int *scalarplus_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 276,280 ****
  }

! t_int *minus_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 268,272 ----
  }

! t_int *minus_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 331,335 ****
  }

! t_int *scalarminus_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 323,327 ----
  }

! t_int *scalarminus_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 378,382 ****
  }

! t_int *times_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 370,374 ----
  }

! t_int *times_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 434,438 ****
  }

! t_int *scalartimes_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 426,430 ----
  }

! t_int *scalartimes_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 482,486 ****

  /* no checking for 0 yet!! */
! t_int *over_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 474,478 ----

  /* no checking for 0 yet!! */
! t_int *over_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 538,542 ****
  }

! t_int *scalarover_perf_sse_vc(t_int *w)
  {
      static const float one = 1.f;
--- 530,534 ----
  }

! t_int *scalarover_perf_simd(t_int *w)
  {
      static const float one = 1.f;
***************
*** 599,603 ****
  }

! t_int *max_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 591,595 ----
  }

! t_int *max_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 654,658 ****
  }

! t_int *scalarmax_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 646,650 ----
  }

! t_int *scalarmax_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 701,705 ****
  }

! t_int *min_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 693,697 ----
  }

! t_int *min_perf_simd(t_int *w)
  {
  	__asm {
***************
*** 754,758 ****
  }

! t_int *scalarmin_perf_sse_vc(t_int *w)
  {
  	__asm {
--- 746,750 ----
  }

! t_int *scalarmin_perf_simd(t_int *w)
  {
  	__asm {

Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.2.14
retrieving revision 1.1.2.15
diff -C2 -d -r1.1.2.14 -r1.1.2.15
*** m_simd_sse_gcc.h	10 Oct 2004 21:59:53 -0000	1.1.2.14
--- m_simd_sse_gcc.h	14 Oct 2004 11:01:34 -0000	1.1.2.15
***************
*** 9,90 ****
  #include "m_pd.h"

! /* SIMD functions for SSE with GCC */
! void zerovec_sse_gcc(t_float *dst,int n);
! void setvec_sse_gcc(t_float *dst,t_float v,int n);
! void copyvec_sse_gcc(t_float *dst,const t_float *src,int n);
! void addvec_sse_gcc(t_float *dst,const t_float *src,int n);
! //void testcopyvec_sse_gcc(t_float *dst,const t_float *src,int n);
! //void testaddvec_sse_gcc(t_float *dst,const t_float *src,int n);
! 
! t_int *zero_perf_sse_gcc(t_int *w);
! t_int *copy_perf_sse_gcc(t_int *w);
! t_int *sig_tilde_perf_sse_gcc(t_int *w);
! t_int *plus_perf_sse_gcc(t_int *w);
! t_int *scalarplus_perf_sse_gcc(t_int *w);
! t_int *minus_perf_sse_gcc(t_int *w);
! t_int *scalarminus_perf_sse_gcc(t_int *w);
! t_int *times_perf_sse_gcc(t_int *w);
! t_int *scalartimes_perf_sse_gcc(t_int *w);
! t_int *sqr_perf_sse_gcc(t_int *w);
! t_int *over_perf_sse_gcc(t_int *w);
! t_int *scalarover_perf_sse_gcc(t_int *w);
! t_int *max_perf_sse_gcc(t_int *w);
! t_int *scalarmax_perf_sse_gcc(t_int *w);
! t_int *min_perf_sse_gcc(t_int *w);
! t_int *scalarmin_perf_sse_gcc(t_int *w);
! t_int *clip_perf_sse_gcc(t_int *w);
! t_int *sigwrap_perf_sse_gcc(t_int *w);
! t_int *sigsqrt_perf_sse_gcc(t_int *w);
! t_int *sigrsqrt_perf_sse_gcc(t_int *w);
! 
! 
! #define zerovec                 zerovec_sse_gcc
! #define setvec                  setvec_sse_gcc
! #define copyvec                 copyvec_sse_gcc
! #define addvec                  addvec_sse_gcc
! 
! /* if we switch on DAZ, we shouldn't have problems with denormals 
!  * any more ... tb
!  */
! #ifdef DAZ 
! #define testcopyvec             testcopyvec_8 /* SIMD not implemented */
! #define testaddvec              testaddvec_8 /* SIMD not implemented */
! #else /* DAZ */
! #define testcopyvec             copyvec_sse_gcc
! #define testaddvec              addvec_sse_gcc
! #endif /* DAZ */

  /* functions in d_ugen.c */
! #define zero_perf_simd          zero_perf_sse_gcc

  /* functions in d_dac.c */
! #define copy_perf_simd          copy_perf_sse_gcc

  /* functions in d_ctl.c */
! #define sig_tilde_perf_simd     sig_tilde_perf_sse_gcc

  /* functions in d_arithmetic.c */
! #define plus_perf_simd          plus_perf_sse_gcc
! #define scalarplus_perf_simd    scalarplus_perf_sse_gcc
! #define minus_perf_simd         minus_perf_sse_gcc
! #define scalarminus_perf_simd   scalarminus_perf_sse_gcc
! #define times_perf_simd         times_perf_sse_gcc
! #define scalartimes_perf_simd   scalartimes_perf_sse_gcc
! #define sqr_perf_simd           sqr_perf_sse_gcc //8 /* SIMD not implemented */
! #define over_perf_simd          over_perf_sse_gcc
! #define scalarover_perf_simd    scalarover_perf_sse_gcc
! #define min_perf_simd           min_perf_sse_gcc
! #define scalarmin_perf_simd     scalarmin_perf_sse_gcc
! #define max_perf_simd           max_perf_sse_gcc
! #define scalarmax_perf_simd     scalarmax_perf_sse_gcc

  /* functions in d_math.c */
! #define clip_perf_simd          clip_perform  /* SIMD not implemented */
! #define sigwrap_perf_simd       sigwrap_perform  /* SIMD not implemented */
! #define sigsqrt_perf_simd       sigsqrt_perform  /* SIMD not implemented */
! #define sigrsqrt_perf_simd      sigrsqrt_perform /* SIMD not implemented */
! 
! /* TB: runtime check */
! extern t_int simd_runtime_check(void);

  #endif /* __M_SIMD_SSE_GCC_H */
--- 9,47 ----
  #include "m_pd.h"

! /* SIMD functions for SSE with gcc */

  /* functions in d_ugen.c */
! t_int *zero_perf_simd(t_int *w);

  /* functions in d_dac.c */
! t_int *copy_perf_simd(t_int *w);

  /* functions in d_ctl.c */
! t_int *sig_tilde_perf_simd(t_int *w);

  /* functions in d_arithmetic.c */
! t_int *plus_perf_simd(t_int *w);
! t_int *scalarplus_perf_simd(t_int *w);
! t_int *minus_perf_simd(t_int *w);
! t_int *scalarminus_perf_simd(t_int *w);
! t_int *times_perf_simd(t_int *w);
! t_int *scalartimes_perf_simd(t_int *w);
! t_int *sqr_perf_simd(t_int *w);
! t_int *over_perf_simd(t_int *w);
! t_int *scalarover_perf_simd(t_int *w);
! t_int *max_perf_simd(t_int *w);
! t_int *scalarmax_perf_simd(t_int *w);
! t_int *min_perf_simd(t_int *w);
! t_int *scalarmin_perf_simd(t_int *w);
! t_int *clip_perf_simd(t_int *w);
! t_int *sigwrap_perf_simd(t_int *w);
! t_int *sigsqrt_perf_simd(t_int *w);
! t_int *sigrsqrt_perf_simd(t_int *w);

  /* functions in d_math.c */
! #define clip_perf_simd             clip_perform     /* SIMD not implemented */
! #define sigwrap_perf_simd          sigwrap_perform  /* SIMD not implemented */
! #define sigsqrt_perf_simd          sigsqrt_perform  /* SIMD not implemented */
! #define sigrsqrt_perf_simd         sigrsqrt_perform /* SIMD not implemented */

  #endif /* __M_SIMD_SSE_GCC_H */

Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** m_simd_sse_vc.h	7 Oct 2004 17:56:38 -0000	1.1.2.8
--- m_simd_sse_vc.h	14 Oct 2004 11:01:34 -0000	1.1.2.9
***************
*** 10,72 ****

  /* SIMD functions for SSE with VC++ */
- void zerovec_sse_vc(t_float *dst,int n);
- void setvec_sse_vc(t_float *dst,t_float v,int n);
- void copyvec_sse_vc(t_float *dst,const t_float *src,int n);
- void addvec_sse_vc(t_float *dst,const t_float *src,int n);
- void testcopyvec_sse_vc(t_float *dst,const t_float *src,int n);
- void testaddvec_sse_vc(t_float *dst,const t_float *src,int n);
- 
- t_int *zero_perf_sse_vc(t_int *w);
- t_int *copy_perf_sse_vc(t_int *w);
- t_int *sig_tilde_perf_sse_vc(t_int *w);
- t_int *plus_perf_sse_vc(t_int *w);
- t_int *scalarplus_perf_sse_vc(t_int *w);
- t_int *minus_perf_sse_vc(t_int *w);
- t_int *scalarminus_perf_sse_vc(t_int *w);
- t_int *times_perf_sse_vc(t_int *w);
- t_int *scalartimes_perf_sse_vc(t_int *w);
- t_int *sqr_perf_sse_vc(t_int *w);
- t_int *over_perf_sse_vc(t_int *w);
- t_int *scalarover_perf_sse_vc(t_int *w);
- t_int *max_perf_sse_vc(t_int *w);
- t_int *scalarmax_perf_sse_vc(t_int *w);
- t_int *min_perf_sse_vc(t_int *w);
- t_int *scalarmin_perf_sse_vc(t_int *w);
- t_int *clip_perf_sse_vc(t_int *w);
- t_int *sigwrap_perf_sse_vc(t_int *w);
- t_int *sigsqrt_perf_sse_vc(t_int *w);
- t_int *sigrsqrt_perf_sse_vc(t_int *w);
- 
- 
- #define zerovec                 zerovec_sse_vc
- #define setvec                  setvec_sse_vc
- #define copyvec                 copyvec_sse_vc
- #define addvec                  addvec_sse_vc
- #define testcopyvec             testcopyvec_sse_vc
- #define testaddvec              testaddvec_sse_vc

  /* functions in d_ugen.c */
! #define zero_perf_simd          zero_perf_sse_vc

  /* functions in d_dac.c */
! #define copy_perf_simd          copy_perf_sse_vc

  /* functions in d_ctl.c */
! #define sig_tilde_perf_simd     sig_tilde_perf_sse_vc

  /* functions in d_arithmetic.c */
- #define plus_perf_simd          plus_perf_sse_vc
- #define scalarplus_perf_simd    scalarplus_perf_sse_vc
- #define minus_perf_simd         minus_perf_sse_vc
- #define scalarminus_perf_simd   scalarminus_perf_sse_vc
- #define times_perf_simd         times_perf_sse_vc
- #define scalartimes_perf_simd   scalartimes_perf_sse_vc
  #define sqr_perf_simd           sqr_perf8 /* SIMD not implemented */
  #define over_perf_simd          over_perf8 /* SIMD not implemented */
- #define scalarover_perf_simd    scalarover_perf_sse_vc
- #define min_perf_simd           min_perf_sse_vc
- #define scalarmin_perf_simd     scalarmin_perf_sse_vc
- #define max_perf_simd           max_perf_sse_vc
- #define scalarmax_perf_simd     scalarmax_perf_sse_vc

  /* functions in d_math.c */
--- 10,46 ----

  /* SIMD functions for SSE with VC++ */

  /* functions in d_ugen.c */
! t_int *zero_perf_simd(t_int *w);

  /* functions in d_dac.c */
! t_int *copy_perf_simd(t_int *w);

  /* functions in d_ctl.c */
! t_int *sig_tilde_perf_simd(t_int *w);
! 
! /* functions in d_arithmetic.c */
! t_int *plus_perf_simd(t_int *w);
! t_int *scalarplus_perf_simd(t_int *w);
! t_int *minus_perf_simd(t_int *w);
! t_int *scalarminus_perf_simd(t_int *w);
! t_int *times_perf_simd(t_int *w);
! t_int *scalartimes_perf_simd(t_int *w);
! t_int *sqr_perf_simd(t_int *w);
! t_int *over_perf_simd(t_int *w);
! t_int *scalarover_perf_simd(t_int *w);
! t_int *max_perf_simd(t_int *w);
! t_int *scalarmax_perf_simd(t_int *w);
! t_int *min_perf_simd(t_int *w);
! t_int *scalarmin_perf_simd(t_int *w);
! t_int *clip_perf_simd(t_int *w);
! t_int *sigwrap_perf_simd(t_int *w);
! t_int *sigsqrt_perf_simd(t_int *w);
! t_int *sigrsqrt_perf_simd(t_int *w);
! 

  /* functions in d_arithmetic.c */
  #define sqr_perf_simd           sqr_perf8 /* SIMD not implemented */
  #define over_perf_simd          over_perf8 /* SIMD not implemented */

  /* functions in d_math.c */
***************
*** 76,82 ****
  #define sigrsqrt_perf_simd      sigrsqrt_perform /* SIMD not implemented */

- /* TB: runtime check */
- t_int simd_runtime_check();
- 
- 
  #endif /* __M_SIMD_SSE_VC_H */
--- 50,52 ----

Index: m_simd_def.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_def.h,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** m_simd_def.h	7 Oct 2004 17:56:38 -0000	1.1.2.7
--- m_simd_def.h	14 Oct 2004 11:01:34 -0000	1.1.2.8
***************
*** 12,23 ****
  /* These are the functions that can be coded for SIMD */

- /* functions in m_simd_def.c */
- #define zerovec                 zerovec_8
- #define setvec                  setvec_8
- #define copyvec                 copyvec_8
- #define addvec                  addvec_8
- #define testcopyvec             testcopyvec_8
- #define testaddvec              testaddvec_8
- 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf8
--- 12,15 ----
***************
*** 50,56 ****
  #define sigrsqrt_perf_simd      sigrsqrt_perform /* SIMD not implemented */

- /* TB: runtime check */
- #define simd_runtime_check()    0
- 
- 
  #endif /* __M_SIMD_DEF_H */
--- 42,44 ----

Index: m_simd_ve_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.h,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** m_simd_ve_gcc.h	7 Oct 2004 17:56:38 -0000	1.1.2.8
--- m_simd_ve_gcc.h	14 Oct 2004 11:01:34 -0000	1.1.2.9
***************
*** 10,80 ****

  /* SIMD functions for VE with GCC */
- void zerovec_ve_gcc(t_float *dst,int n);
- void setvec_ve_gcc(t_float *dst,t_float v,int n);
- void copyvec_ve_gcc(t_float *dst,const t_float *src,int n);
- void addvec_ve_gcc(t_float *dst,const t_float *src,int n);
- 
- t_int *zero_perf_ve_gcc(t_int *w);
- t_int *copy_perf_ve_gcc(t_int *w);
- t_int *sig_tilde_perf_ve_gcc(t_int *w);
- t_int *plus_perf_ve_gcc(t_int *w);
- t_int *scalarplus_perf_ve_gcc(t_int *w);
- t_int *minus_perf_ve_gcc(t_int *w);
- t_int *scalarminus_perf_ve_gcc(t_int *w);
- t_int *times_perf_ve_gcc(t_int *w);
- t_int *scalartimes_perf_ve_gcc(t_int *w);
- t_int *sqr_perf_ve_gcc(t_int *w);
- t_int *over_perf_ve_gcc(t_int *w);
- t_int *scalarover_perf_ve_gcc(t_int *w);
- t_int *max_perf_ve_gcc(t_int *w);
- t_int *scalarmax_perf_ve_gcc(t_int *w);
- t_int *min_perf_ve_gcc(t_int *w);
- t_int *scalarmin_perf_ve_gcc(t_int *w);
- t_int *clip_perf_ve_gcc(t_int *w);
- t_int *sigwrap_perf_ve_gcc(t_int *w);
- t_int *sigsqrt_perf_ve_gcc(t_int *w);
- t_int *sigrsqrt_perf_ve_gcc(t_int *w);
- 
- 
- #define zerovec                 zerovec_ve_gcc
- #define setvec                  setvec_ve_gcc
- #define copyvec                 copyvec_ve_gcc
- #define addvec                  addvec_ve_gcc
- /* no bad float testing for PPC! */
- #define testcopyvec             copyvec_ve_gcc
- #define testaddvec              addvec_ve_gcc

  /* functions in d_ugen.c */
! #define zero_perf_simd          zero_perf_ve_gcc

  /* functions in d_dac.c */
! #define copy_perf_simd          copy_perf_ve_gcc

  /* functions in d_ctl.c */
! #define sig_tilde_perf_simd     sig_tilde_perf_ve_gcc

  /* functions in d_arithmetic.c */
! #define plus_perf_simd          plus_perf_ve_gcc
! #define scalarplus_perf_simd    scalarplus_perf_ve_gcc
! #define minus_perf_simd         minus_perf_ve_gcc
! #define scalarminus_perf_simd   scalarminus_perf_ve_gcc
! #define times_perf_simd         times_perf_ve_gcc
! #define scalartimes_perf_simd   scalartimes_perf_ve_gcc
! #define sqr_perf_simd           sqr_perf_ve_gcc
! #define over_perf_simd          over_perf_ve_gcc
! #define scalarover_perf_simd    scalarover_perf_ve_gcc
! #define min_perf_simd           min_perf_ve_gcc
! #define scalarmin_perf_simd     scalarmin_perf_ve_gcc
! #define max_perf_simd           max_perf_ve_gcc
! #define scalarmax_perf_simd     scalarmax_perf_ve_gcc

  /* functions in d_math.c */
! #define clip_perf_simd          clip_perf_ve_gcc
! #define sigwrap_perf_simd       sigwrap_perf_ve_gcc
! #define sigsqrt_perf_simd       sigsqrt_perf_ve_gcc
! #define sigrsqrt_perf_simd      sigrsqrt_perf_ve_gcc
! 
! /* TB: runtime check */
! #define simd_runtime_check()    1

  #endif /* __M_SIMD_VE_GCC_H */
--- 10,43 ----

  /* SIMD functions for VE with GCC */

  /* functions in d_ugen.c */
! t_int *zero_perf_simd(t_int *w);

  /* functions in d_dac.c */
! t_int *copy_perf_simd(t_int *w);

  /* functions in d_ctl.c */
! t_int *sig_tilde_perf_simd(t_int *w);

  /* functions in d_arithmetic.c */
! t_int *plus_perf_simd(t_int *w);
! t_int *scalarplus_perf_simd(t_int *w);
! t_int *minus_perf_simd(t_int *w);
! t_int *scalarminus_perf_simd(t_int *w);
! t_int *times_perf_simd(t_int *w);
! t_int *scalartimes_perf_simd(t_int *w);
! t_int *sqr_perf_simd(t_int *w);
! t_int *over_perf_simd(t_int *w);
! t_int *scalarover_perf_simd(t_int *w);
! t_int *max_perf_simd(t_int *w);
! t_int *scalarmax_perf_simd(t_int *w);
! t_int *min_perf_simd(t_int *w);
! t_int *scalarmin_perf_simd(t_int *w);

  /* functions in d_math.c */
! t_int *clip_perf_simd(t_int *w);
! t_int *sigwrap_perf_simd(t_int *w);
! t_int *sigsqrt_perf_simd(t_int *w);
! t_int *sigrsqrt_perf_simd(t_int *w);

  #endif /* __M_SIMD_VE_GCC_H */

Index: d_global.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_global.c,v
retrieving revision 1.1.1.2.8.8
retrieving revision 1.1.1.2.8.9
diff -C2 -d -r1.1.1.2.8.8 -r1.1.1.2.8.9
*** d_global.c	7 Oct 2004 17:56:37 -0000	1.1.1.2.8.8
--- d_global.c	14 Oct 2004 11:01:33 -0000	1.1.1.2.8.9
***************
*** 42,48 ****
      while (n--)
      {
! 	    *out = (PD_BIGORSMALL(*in) ? 0 : *in);
! 	    out++;
! 	    in++;
      }
      return (w+4);
--- 42,48 ----
      while (n--)
      {
! 	*out = (PD_BIGORSMALL(*in) ? 0 : *in);
! 	out++;
! 	in++;
      }
      return (w+4);
***************
*** 52,56 ****
  static t_int *sigsend_perfsimd(t_int *w)
  {
!     testcopyvec((t_float *)w[2],(t_float *)w[1],w[3]);
      return (w+4);
  }
--- 52,56 ----
  static t_int *sigsend_perfsimd(t_int *w)
  {
!     testcopyvec_simd((t_float *)w[2],(t_float *)w[1],w[3]);
      return (w+4);
  }
***************
*** 142,148 ****
      t_float *in = x->x_wherefrom;
      if(in) 
!         copyvec((t_float *)w[2],in,w[3]);
      else 
!         zerovec((t_float *)w[2],w[3]);
      return (w+4);
  }
--- 142,148 ----
      t_float *in = x->x_wherefrom;
      if(in) 
!         copyvec_simd((t_float *)w[2],in,w[3]);
      else 
!         zerovec_simd((t_float *)w[2],w[3]);
      return (w+4);
  }
***************
*** 245,250 ****
  static t_int *sigcatch_perfsimd(t_int *w)
  {
!     copyvec((t_float *)w[2],(t_float *)w[1],w[3]);
!     zerovec((t_float *)w[1],w[3]);
      return (w+4);
  }
--- 245,250 ----
  static t_int *sigcatch_perfsimd(t_int *w)
  {
!     copyvec_simd((t_float *)w[2],(t_float *)w[1],w[3]);
!     zerovec_simd((t_float *)w[1],w[3]);
      return (w+4);
  }
***************
*** 324,328 ****
      t_sigthrow *x = (t_sigthrow *)(w[1]);
      t_float *out = x->x_whereto;
!     if(out) testaddvec(out,(t_float *)w[2],w[3]);
      return (w+4);
  }
--- 324,328 ----
      t_sigthrow *x = (t_sigthrow *)(w[1]);
      t_float *out = x->x_whereto;
!     if(out) testaddvec_simd(out,(t_float *)w[2],w[3]);
      return (w+4);
  }

Index: d_array.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_array.c,v
retrieving revision 1.1.1.3.2.6
retrieving revision 1.1.1.3.2.7
diff -C2 -d -r1.1.1.3.2.6 -r1.1.1.3.2.7
*** d_array.c	7 Oct 2004 23:11:56 -0000	1.1.1.3.2.6
--- d_array.c	14 Oct 2004 11:01:33 -0000	1.1.1.3.2.7
***************
*** 295,299 ****
      int n = (int)(w[4]);    
      int maxindex;
!     float *buf = x->x_vec, *fp;
      int i;

--- 295,299 ----
      int n = (int)(w[4]);    
      int maxindex;
!     float *buf = x->x_vec/* , *fp unused ... tb */;
      int i;

***************
*** 419,423 ****
  	int index = findex;
  	float frac,  a,  b,  c,  d, cminusb;
! 	static int count;
  	if (index < 1)
  	    index = 1, frac = 0;
--- 419,423 ----
  	int index = findex;
  	float frac,  a,  b,  c,  d, cminusb;
! 	/* static int count; tb: unused */
  	if (index < 1)
  	    index = 1, frac = 0;
***************
*** 594,600 ****
      int mask = fnpoints - 1;
      float conv = fnpoints * x->x_conv;
!     int maxindex;
      float *tab = x->x_vec, *addr;
!     int i;
      double dphase = fnpoints * x->x_phase + UNITBIT32;

--- 594,600 ----
      int mask = fnpoints - 1;
      float conv = fnpoints * x->x_conv;
!     /* int maxindex; unused ... tb */
      float *tab = x->x_vec, *addr;
!     /* int i; unused ... tb */
      double dphase = fnpoints * x->x_phase + UNITBIT32;

***************
*** 742,746 ****
          }
      else if(SIMD_CHECK2(n,in,dest))
!         testcopyvec(dest,in,n);
      else
          testcopyvec_8(dest,in,n);
--- 742,746 ----
          }
      else if(SIMD_CHECK2(n,in,dest))
!         testcopyvec_simd(dest,in,n);
      else
          testcopyvec_8(dest,in,n);
***************
*** 758,762 ****
  static void tabsend_dsp(t_tabsend *x, t_signal **sp)
  {
!     int i, vecsize;
      t_garray *a;

--- 758,762 ----
  static void tabsend_dsp(t_tabsend *x, t_signal **sp)
  {
!     int /* i, unused ... tb */vecsize;
      t_garray *a;

***************
*** 839,845 ****
      t_float *from = x->x_vec;
      if(from) 
!         copyvec((t_float *)(w[2]),from,w[3]);
      else 
!         zerovec((t_float *)(w[2]),w[3]);
      return (w+4);
  }
--- 839,845 ----
      t_float *from = x->x_vec;
      if(from) 
!         copyvec_simd((t_float *)(w[2]),from,w[3]);
      else 
!         zerovec_simd((t_float *)(w[2]),w[3]);
      return (w+4);
  }
***************
*** 1032,1039 ****
  static void tabwrite_float(t_tabwrite *x, t_float f)
  {
!     int i, vecsize;
      t_garray *a;
      t_float *vec;
! 
      if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class)))
      	pd_error(x, "%s: no such array", x->x_arrayname->s_name);
--- 1032,1039 ----
  static void tabwrite_float(t_tabwrite *x, t_float f)
  {
!     int /* i, unused ... tb*/vecsize;
      t_garray *a;
      t_float *vec;
!     
      if (!(a = (t_garray *)pd_findbyclass(x->x_arrayname, garray_class)))
      	pd_error(x, "%s: no such array", x->x_arrayname->s_name);