Wed Oct 6 21:24:17 CEST 2004

Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18870

Modified Files:
      Tag: devel_0_37
	d_array.c d_ctl.c d_global.c d_ugen.c g_array.c g_io.c 
	m_memory.c m_pd.h m_simd.h m_simd_def.h m_simd_sse_gcc.h 
	m_simd_sse_vc.c m_simd_sse_vc.h m_simd_ve_gcc.c 
	m_simd_ve_gcc.h 
Log Message:
checked back in the old versions since my editor screwed up the formatting....

Index: d_ctl.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ctl.c,v
retrieving revision 1.1.1.3.2.10
retrieving revision 1.1.1.3.2.11
diff -C2 -d -r1.1.1.3.2.10 -r1.1.1.3.2.11
*** d_ctl.c	6 Oct 2004 18:20:03 -0000	1.1.1.3.2.10
--- d_ctl.c	6 Oct 2004 19:24:14 -0000	1.1.1.3.2.11
***************
*** 34,44 ****
  static t_int *sig_tilde_perf8(t_int *w)
  {
!     t_float in = *(t_float *)(w[1]);
      t_float *out = (t_float *)(w[2]);
      int n = (int)(w[3]);

!     for(; n; n -= 8, out += 8)
      {
!     	out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = in;
      }
      return (w+4);
--- 34,51 ----
  static t_int *sig_tilde_perf8(t_int *w)
  {
!     t_float f = *(t_float *)(w[1]);
      t_float *out = (t_float *)(w[2]);
      int n = (int)(w[3]);

!     for (; n; n -= 8, out += 8)
      {
!     	out[0] = f;
!     	out[1] = f;
!     	out[2] = f;
!     	out[3] = f;
!     	out[4] = f;
!     	out[5] = f;
!     	out[6] = f;
!     	out[7] = f;
      }
      return (w+4);
***************
*** 129,136 ****
      {
      	float g = x->x_value = x->x_target;
!         if(n&7)
!     	    while (n--) *out++ = g;
!         else
!             setvec_8(out,g,n);
      }
      return (w+4);
--- 136,180 ----
      {
      	float g = x->x_value = x->x_target;
!     	while (n--) 
! 	    *out++ = g;
!     }
!     return (w+4);
! }
! 
! /* TB: vectorized version */
! static t_int *line_tilde_perf8(t_int *w)
! {
!     t_line *x = (t_line *)(w[1]);
!     t_float *out = (t_float *)(w[2]);
!     int n = (int)(w[3]);
!     float f = x->x_value;
! 
!     if (PD_BIGORSMALL(f))
! 	    x->x_value = f = 0;
!     if (x->x_retarget)
!     {
!     	int nticks = x->x_inletwas * x->x_dspticktomsec;
!     	if (!nticks) nticks = 1;
!     	x->x_ticksleft = nticks;
!     	x->x_biginc = (x->x_target - x->x_value)/(float)nticks;
!     	x->x_inc = x->x_1overn * x->x_biginc;
!     	x->x_retarget = 0;
!     }
!     if (x->x_ticksleft)
!     {
!     	float f = x->x_value;
!     	while (n--) *out++ = f, f += x->x_inc;
!     	x->x_value += x->x_biginc;
!     	x->x_ticksleft--;
!     }
!     else
!     {
! 	float f = x->x_value = x->x_target;
! 	for (; n; n -= 8, out += 8)
! 	{
! 	    out[0] = f; out[1] = f; out[2] = f; out[3] = f; 
! 	    out[4] = f; out[5] = f; out[6] = f; out[7] = f;
! 	}
! 
      }
      return (w+4);
***************
*** 161,165 ****
--- 205,212 ----
  static void line_tilde_dsp(t_line *x, t_signal **sp)
  {
+     if(sp[0]->s_n&7)
  	dsp_add(line_tilde_perform, 3, x, sp[0]->s_vec, sp[0]->s_n);
+     else
+ 	dsp_add(line_tilde_perf8, 3, x, sp[0]->s_vec, sp[0]->s_n);
      x->x_1overn = 1./sp[0]->s_n;
      x->x_dspticktomsec = sp[0]->s_sr / (1000 * sp[0]->s_n);

Index: m_simd_ve_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.c,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -C2 -d -r1.1.2.5 -r1.1.2.6
*** m_simd_ve_gcc.c	6 Oct 2004 18:52:29 -0000	1.1.2.5
--- m_simd_ve_gcc.c	6 Oct 2004 19:24:14 -0000	1.1.2.6
***************
*** 39,46 ****
  #define LoadValue(where) LoadUnaligned((const float *)(where))

! void zerovec_ve_gcc(t_float *dst,int n)
  {
  	const vector float zero = (vector float)(0);
! 	for(n >>= 4; n--; dst += 16) {
  		vec_st(zero, 0,dst);
  		vec_st(zero,16,dst);
--- 39,49 ----
  #define LoadValue(where) LoadUnaligned((const float *)(where))

! t_int *zero_perf_ve_gcc(t_int *w)
  {
  	const vector float zero = (vector float)(0);
!     t_float *dst = (t_float *)w[1];
!     int n = w[2]>>4;
! 
! 	for(; n--; dst += 16) {
  		vec_st(zero, 0,dst);
  		vec_st(zero,16,dst);
***************
*** 48,67 ****
  		vec_st(zero,48,dst);
  	}
  }

! void setvec_ve_gcc(t_float *dst,t_float v,int n)
  {
! 	const vector float arg = LoadValue(&v);
! 	for(n >>= 4; n--; dst += 16) {
! 		vec_st(arg, 0,dst);
! 		vec_st(arg,16,dst);
! 		vec_st(arg,32,dst);
! 		vec_st(arg,48,dst);
! 	}
! }

! void copyvec_ve_gcc(t_float *dst,const t_float *src,int n)
! {
! 	for(n >>= 4; n--; src += 16,dst += 16) {
  		vector float a1 = vec_ld( 0,src);
  		vector float a2 = vec_ld(16,src);
--- 51,64 ----
  		vec_st(zero,48,dst);
  	}
+     return w+3;
  }

! t_int *copy_perf_ve_gcc(t_int *w)
  {
!     const t_float *src = (const t_float *)w[1];
!     t_float *dst = (t_float *)w[2];
!     int n = w[3]>>4;

! 	for(; n--; src += 16,dst += 16) {
  		vector float a1 = vec_ld( 0,src);
  		vector float a2 = vec_ld(16,src);
***************
*** 73,111 ****
  		vec_st(a4,48,dst);
  	}
- }
- 
- void addvec_ve_gcc(t_float *dst,const t_float *src,int n)
- {
- #ifdef USEVECLIB
- 	vadd(dst,1,src,1,dst,1,n);
- #else
- 	for(n >>= 4; n--; src += 16,dst += 16) {
- 		vector float a1 = vec_ld( 0,dst),b1 = vec_ld( 0,src);
- 		vector float a2 = vec_ld(16,dst),b2 = vec_ld(16,src);
- 		vector float a3 = vec_ld(32,dst),b3 = vec_ld(32,src);
- 		vector float a4 = vec_ld(48,dst),b4 = vec_ld(48,src);
- 		
- 		a1 = vec_add(a1,b1);
- 		a2 = vec_add(a2,b2);
- 		a3 = vec_add(a3,b3);
- 		a4 = vec_add(a4,b4);
- 
- 		vec_st(a1, 0,dst);
- 		vec_st(a2,16,dst);
- 		vec_st(a3,32,dst);
- 		vec_st(a4,48,dst);
- 	}
- #endif
- }
- 
- t_int *zero_perf_ve_gcc(t_int *w)
- {
-     zerovec_ve_gcc((t_float *)w[1],w[2]);
-     return w+3;
- }
- 
- t_int *copy_perf_ve_gcc(t_int *w)
- {
-     copyvec_ve_gcc((t_float *)w[2],(const t_float *)w[1],w[3]);
  	return w+4;
  }
--- 70,73 ----
***************
*** 113,118 ****
  t_int *sig_tilde_perf_ve_gcc(t_int *w)
  {
!     setvec_ve_gcc((t_float *)w[2],*(const t_float *)w[1],w[3]);
! 	return w+4;
  }

--- 75,89 ----
  t_int *sig_tilde_perf_ve_gcc(t_int *w)
  {
! 	const vector float arg = LoadValue(w[1]);
!     t_float *dst = (t_float *)w[2];
!     int n = w[3]>>4;
! 
! 	for(; n--; dst += 16) {
! 		vec_st(arg, 0,dst);
! 		vec_st(arg,16,dst);
! 		vec_st(arg,32,dst);
! 		vec_st(arg,48,dst);
! 	}
!     return w+4;
  }

***************
*** 175,180 ****
  t_int *minus_perf_ve_gcc(t_int *w)
  {
! #if 0 //def USEVECLIB
!     /* vsub is buggy for some OSX versions! */
  	vsub((const t_float *)w[1],1,(const t_float *)w[2],1,(t_float *)w[3],1,w[4]);
  #else
--- 146,150 ----
  t_int *minus_perf_ve_gcc(t_int *w)
  {
! #ifdef USEVECLIB
  	vsub((const t_float *)w[1],1,(const t_float *)w[2],1,(t_float *)w[3],1,w[4]);
  #else

Index: m_simd.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd.h,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** m_simd.h	6 Oct 2004 18:20:04 -0000	1.1.2.6
--- m_simd.h	6 Oct 2004 19:24:14 -0000	1.1.2.7
***************
*** 7,17 ****
  #define __M_SIMD_H

- /* general vector functions */
- void zerovec_8(t_float *dst,int n);
- void setvec_8(t_float *dst,t_float v,int n);
- void copyvec_8(t_float *dst,const t_float *src,int n);
- void addvec_8(t_float *dst,const t_float *src,int n);
- void testcopyvec_8(t_float *dst,const t_float *src,int n);
- void testaddvec_8(t_float *dst,const t_float *src,int n);

  #ifdef DONTUSESIMD
--- 7,10 ----

Index: m_pd.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/m_pd.h,v
retrieving revision 1.1.1.4.2.18
retrieving revision 1.1.1.4.2.19
diff -C2 -d -r1.1.1.4.2.18 -r1.1.1.4.2.19
*** m_pd.h	6 Oct 2004 18:20:04 -0000	1.1.1.4.2.18
--- m_pd.h	6 Oct 2004 19:24:14 -0000	1.1.1.4.2.19
***************
*** 251,259 ****
  EXTERN void *resizebytes(void *x, size_t oldsize, size_t newsize);

- /* T.Grill - functions for aligned memory (according to CPU SIMD architecture) */
- EXTERN void *getalignedbytes(size_t nbytes);
- EXTERN void freealignedbytes(void *x,size_t nbytes);
- EXTERN void *resizealignedbytes(void *x,size_t oldsize, size_t newsize);
- 
  /* -------------------- atoms ----------------------------- */

--- 251,254 ----

Index: g_array.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/g_array.c,v
retrieving revision 1.1.1.3.2.11
retrieving revision 1.1.1.3.2.12
diff -C2 -d -r1.1.1.3.2.11 -r1.1.1.3.2.12
*** g_array.c	6 Oct 2004 18:20:03 -0000	1.1.1.3.2.11
--- g_array.c	6 Oct 2004 19:24:14 -0000	1.1.1.3.2.12
***************
*** 51,56 ****
      x->a_n = 1;
      x->a_elemsize = sizeof(t_word) * template->t_n;
!     /* T.Grill - get aligned memory - good for tabsend~ and tabreceive~ */
!     x->a_vec = (char *)getalignedbytes(x->a_elemsize); 
      	/* note here we blithely copy a gpointer instead of "setting" a
  	new one; this gpointer isn't accounted for and needn't be since
--- 51,55 ----
      x->a_n = 1;
      x->a_elemsize = sizeof(t_word) * template->t_n;
!     x->a_vec = (char *)getbytes(x->a_elemsize);
      	/* note here we blithely copy a gpointer instead of "setting" a
  	new one; this gpointer isn't accounted for and needn't be since
***************
*** 73,77 ****
      elemsize = sizeof(t_word) * template->t_n;

!     x->a_vec = (char *)resizealignedbytes(x->a_vec, oldn * elemsize,n * elemsize);
      x->a_n = n;
      if (n > oldn)
--- 72,77 ----
      elemsize = sizeof(t_word) * template->t_n;

!     x->a_vec = (char *)resizebytes(x->a_vec, oldn * elemsize,
!     	n * elemsize);
      x->a_n = n;
      if (n > oldn)
***************
*** 101,106 ****
  	word_free(wp, scalartemplate);
      }
!     /* T.Grill - changed to aligned allocation */
!     freealignedbytes(x->a_vec, x->a_elemsize * x->a_n);
      freebytes(x, sizeof *x);
  }
--- 101,105 ----
  	word_free(wp, scalartemplate);
      }
!     freebytes(x->a_vec, x->a_elemsize * x->a_n);
      freebytes(x, sizeof *x);
  }
***************
*** 214,219 ****
      x->x_n = n;
      x->x_elemsize = nwords * sizeof(t_word);
!     /* T.Grill - changed to aligned allocation */
!     x->x_vec = getalignedbytes(x->x_n * x->x_elemsize);
      memset(x->x_vec, 0, x->x_n * x->x_elemsize);
      	/* LATER should check that malloc */
--- 213,217 ----
      x->x_n = n;
      x->x_elemsize = nwords * sizeof(t_word);
!     x->x_vec = getbytes(x->x_n * x->x_elemsize);
      memset(x->x_vec, 0, x->x_n * x->x_elemsize);
      	/* LATER should check that malloc */
***************
*** 332,337 ****
      while (x2 = pd_findbyclass(gensym("#A"), garray_class))
      	pd_unbind(x2, gensym("#A"));
!     /* T.Grill - changed to aligned allocation */
!     freealignedbytes(x->x_vec, x->x_n * x->x_elemsize);
  }

--- 330,334 ----
      while (x2 = pd_findbyclass(gensym("#A"), garray_class))
      	pd_unbind(x2, gensym("#A"));
!     freebytes(x->x_vec, x->x_n * x->x_elemsize);
  }

***************
*** 1572,1576 ****
      if (n < 1) n = 1;
      elemsize = template_findbyname(x->x_templatesym)->t_n * sizeof(t_word);
!     nvec = resizealignedbytes(x->x_vec, was * elemsize, n * elemsize);
      if (!nvec)
      {
--- 1569,1573 ----
      if (n < 1) n = 1;
      elemsize = template_findbyname(x->x_templatesym)->t_n * sizeof(t_word);
!     nvec = t_resizebytes(x->x_vec, was * elemsize, n * elemsize);
      if (!nvec)
      {

Index: m_simd_sse_vc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.c,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** m_simd_sse_vc.c	6 Oct 2004 18:20:04 -0000	1.1.2.6
--- m_simd_sse_vc.c	6 Oct 2004 19:24:14 -0000	1.1.2.7
***************
*** 9,46 ****
  #if defined(NT) && defined(_MSC_VER) && !(defined DONTUSESIMD)

! /* dst is assumed to be aligned */
! void zerovec_sse_vc(t_float *dst,int n)
! {
! 	__asm {
! 		mov		edx,dword ptr [dst] /* out */
! 		xorps   xmm0,xmm0 /* zero value */
! 
! 		mov		ecx,[n] /* n */
! 		shr		ecx,4
! 
! 		/* should we do more loop unrolling? */
! loopa:
! 		movaps	xmmword ptr[edx],xmm0
! 		movaps	xmmword ptr[edx+4*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+8*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+12*TYPE t_float],xmm0
! 
! 		add		edx,16*TYPE t_float
! 		/* very short loop - let's assume that branch prediction does its job nicely */
! 		loop	loopa
! 	}
! }
! 
! /* dst is assumed to be aligned */
! void setvec_sse_vc(t_float *dst,t_float v,int n)
  {
  	__asm {
! 		mov		edx,dword ptr [dst] /* out */

!         /* load value ... this is not very clean.. */
! 		movss	xmm0,xmmword ptr [v]
! 		shufps	xmm0,xmm0,0

! 		mov		ecx,[n] /* n */
  		shr		ecx,4

--- 9,22 ----
  #if defined(NT) && defined(_MSC_VER) && !(defined DONTUSESIMD)

! t_int *zero_perf_sse_vc(t_int *w)
  {
  	__asm {
! 		mov		esi,dword ptr [w]

! 		mov		edx,dword ptr [esi + 1*TYPE t_int] /* out */
! 		/* load zero */
! 		xorps	xmm0,xmm0

! 		mov		ecx,[esi + 2*TYPE t_int] /* n */
  		shr		ecx,4

***************
*** 56,70 ****
  		loop	loopa
  	}
  }

! /* dst and src are assumed to be aligned */
! void copyvec_sse_vc(t_float *dst,const t_float *src,int n)
  {
  	__asm {
! 		mov		ebx,dword ptr [src] /* in1 */
  /*		prefetcht0 [ebx] */
! 		mov		edx,dword ptr [dst] /* out */

! 		mov		ecx,dword ptr [n] /* n */
  		shr		ecx,4

--- 32,49 ----
  		loop	loopa
  	}
+     return (w+3);
  }

! 
! t_int *copy_perf_sse_vc(t_int *w)
  {
  	__asm {
! 		mov		esi,dword ptr [w]
! 
! 		mov		ebx,dword ptr [esi + 1*TYPE t_int] /* in1 */
  /*		prefetcht0 [ebx] */
! 		mov		edx,dword ptr [esi + 2*TYPE t_int] /* out */

! 		mov		ecx,dword ptr [esi + 3*TYPE t_int] /* n */
  		shr		ecx,4

***************
*** 90,176 ****
  		loop	loopa 
  	}
  }

! void addvec_sse_vc(t_float *dst,const t_float *src,int n)
  {
  	__asm {
! 		mov		eax,dword ptr [src] /* in1 */
! /*      prefetcht0 [eax] prefetch first cache line */	
! 		mov		edx,dword ptr [dst] /* out */
! 		mov		ecx,dword ptr [n] /* n */
! 		shr		ecx,4 /* divide by 16 */

!         xor     esi,esi /* reset index */
! /*
! 		prefetcht0 [eax+8*TYPE t_float]
! 		prefetcht0 [ebx+8*TYPE t_float]
! */
! loopa:
! /*
!         prefetcht0 [eax+16*TYPE t_float]
! 		prefetcht0 [ebx+16*TYPE t_float]
! */
!         movaps	xmm0,xmmword ptr[edx+esi]
! 		movaps	xmm1,xmmword ptr[eax+esi]
! 		addps	xmm0,xmm1
! 		movaps	xmmword ptr[edx+esi],xmm0

! 		movaps	xmm2,xmmword ptr[edx+esi+4*TYPE t_float]
! 		movaps	xmm3,xmmword ptr[eax+esi+4*TYPE t_float]
! 		addps	xmm2,xmm3
! 		movaps	xmmword ptr[edx+esi+4*TYPE t_float],xmm2
! /*
!         prefetcht0 [eax+24*TYPE t_float]
! 		prefetcht0 [ebx+24*TYPE t_float]
! */
! 		movaps	xmm4,xmmword ptr[edx+esi+8*TYPE t_float]
! 		movaps	xmm5,xmmword ptr[eax+esi+8*TYPE t_float]
! 		addps	xmm4,xmm5
! 		movaps	xmmword ptr[edx+esi+8*TYPE t_float],xmm4

! 		movaps	xmm6,xmmword ptr[edx+esi+12*TYPE t_float]
! 		movaps	xmm7,xmmword ptr[eax+esi+12*TYPE t_float]
! 		addps	xmm6,xmm7
! 		movaps	xmmword ptr[edx+esi+12*TYPE t_float],xmm6

!         add     esi,16*TYPE t_float
  		loop	loopa 
  	}
! }
! 
! void testcopyvec(t_float *dst,const t_float *src,int n)
! {
!     while(n--) {
!         *dst = (PD_BIGORSMALL(*src) ? 0 : *src);
! 	    dst++;
! 	    src++;
! 	}
! }
! 
! void testaddvec(t_float *dst,const t_float *src,int n)
! {
!     while(n--) {
!         *dst += (PD_BIGORSMALL(*src) ? 0 : *src);
! 	    dst++;
! 	    src++;
! 	}
! }
! 
! t_int *zero_perf_sse_vc(t_int *w)
! {
!     zerovec_sse_vc((t_float *)w[1],w[2]);
!     return w+3;
! }
! 
! t_int *copy_perf_sse_vc(t_int *w)
! {
!     copyvec_sse_vc((t_float *)w[2],(const t_float *)w[1],w[3]);
!     return w+4;
! }
! 
! t_int *sig_tilde_perf_sse_vc(t_int *w)
! {
!     setvec_sse_vc((t_float *)w[2],*(const t_float *)w[1],w[3]);
!     return w+4;
  }

--- 69,100 ----
  		loop	loopa 
  	}
+     return (w+4);
  }

! 
! t_int *sig_tilde_perf_sse_vc(t_int *w)
  {
  	__asm {
! 		mov		esi,dword ptr [w]

! 		mov		edx,dword ptr [esi + 2*TYPE t_int] /* out */
! 		/* load value ... this is not very clean.. */
! 		mov		eax,dword ptr [esi + 1*TYPE t_int] /* f */
! 		movss	xmm0,xmmword ptr [eax]
! 		shufps	xmm0,xmm0,0

! 		mov		ecx,dword ptr [esi + 3*TYPE t_int] /* n */
! 		shr		ecx,4

! loopa:
! 		movaps	xmmword ptr[edx],xmm0
! 		movaps	xmmword ptr[edx+4*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+8*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+12*TYPE t_float],xmm0

! 		add		edx,16*TYPE t_float
  		loop	loopa 
  	}
!     return (w+4);
  }

Index: g_io.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/g_io.c,v
retrieving revision 1.1.1.1.16.4
retrieving revision 1.1.1.1.16.5
diff -C2 -d -r1.1.1.1.16.4 -r1.1.1.1.16.5
*** g_io.c	6 Oct 2004 18:20:04 -0000	1.1.1.1.16.4
--- g_io.c	6 Oct 2004 19:24:14 -0000	1.1.1.1.16.5
***************
*** 24,28 ****

  #include "m_pd.h"
- #include "m_simd.h"
  #include "g_canvas.h"
  #include <string.h>
--- 24,27 ----
***************
*** 148,162 ****
  }

- /* T.Grill: SIMD version */
- t_int *vinlet_perfsimd(t_int *w)
- {
-     t_vinlet *x = (t_vinlet *)(w[1]);
-     t_float *in = x->x_read;
-     copyvec((t_float *)w[2],in,w[3]);
-     if (in == x->x_endbuf) in = x->x_buf;
-     x->x_read = in;
-     return (w+4);
- }
- 
  static void vinlet_dsp(t_vinlet *x, t_signal **sp)
  {
--- 147,150 ----
***************
*** 172,184 ****
      else
      {
!         const int n = outsig->s_n;
! 	    if(n&7)
! 	        dsp_add(vinlet_perform, 3, x, outsig->s_vec,n);
! 	    else if(SIMD_CHECK1(n,outsig->s_vec))
!             /* if the outsig->s_vec is aligned the x->x_read will also be... */
! 	        dsp_add(vinlet_perfsimd, 3, x, outsig->s_vec,n);
! 	    else
! 	        dsp_add(vinlet_perf8, 3, x, outsig->s_vec,n);
! 	    x->x_read = x->x_buf;
      }
  }
--- 160,168 ----
      else
      {
! 	if (sp[0]->s_n & 7)
! 	    dsp_add(vinlet_perform, 3, x, outsig->s_vec, outsig->s_n);
! 	else
! 	    dsp_add(vinlet_perf8, 3, x, outsig->s_vec, outsig->s_n);
! 	x->x_read = x->x_buf;
      }
  }
***************
*** 250,254 ****
  	{
      	    t_float *buf = x->x_buf;
!     	    buf = (t_float *)resizealignedbytes(buf,oldbufsize * sizeof(*buf),bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
--- 234,239 ----
  	{
      	    t_float *buf = x->x_buf;
!     	    t_freebytes(buf, oldbufsize * sizeof(*buf));
!     	    buf = (t_float *)t_getbytes(bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
***************
*** 294,298 ****
      x->x_canvas = canvas_getcurrent();
      x->x_inlet = canvas_addinlet(x->x_canvas, &x->x_obj.ob_pd, &s_signal,s);
!     x->x_endbuf = x->x_buf = (t_float *)getalignedbytes(0);
      x->x_bufsize = 0;
      x->x_directsignal = 0;
--- 279,283 ----
      x->x_canvas = canvas_getcurrent();
      x->x_inlet = canvas_addinlet(x->x_canvas, &x->x_obj.ob_pd, &s_signal,s);
!     x->x_endbuf = x->x_buf = (t_float *)getbytes(0);
      x->x_bufsize = 0;
      x->x_directsignal = 0;
***************
*** 425,436 ****
      if (tot < 5) post("-buf %x endbuf %x", x->x_buf, x->x_endbuf);
  #endif
-     t_float *end = x->x_endbuf;
      while (n--)
      {
      	*out++ += *in++;
!     	if (out == end) out = x->x_buf;
      }
      outwas += x->x_hop;
!     if (outwas >= end) outwas = x->x_buf;
      x->x_write = outwas;
      return (w+4);
--- 410,420 ----
      if (tot < 5) post("-buf %x endbuf %x", x->x_buf, x->x_endbuf);
  #endif
      while (n--)
      {
      	*out++ += *in++;
!     	if (out == x->x_endbuf) out = x->x_buf;
      }
      outwas += x->x_hop;
!     if (outwas >= x->x_endbuf) outwas = x->x_buf;
      x->x_write = outwas;
      return (w+4);
***************
*** 555,559 ****
  	{
      	    t_float *buf = x->x_buf;
!     	    buf = (t_float *)resizealignedbytes(buf,oldbufsize * sizeof(*buf),bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
--- 539,544 ----
  	{
      	    t_float *buf = x->x_buf;
!     	    t_freebytes(buf, oldbufsize * sizeof(*buf));
!     	    buf = (t_float *)t_getbytes(bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
***************
*** 606,610 ****
      	&x->x_obj.ob_pd, &s_signal);
      inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_signal, &s_signal);
!     x->x_endbuf = x->x_buf = (t_float *)getalignedbytes(0);
      x->x_bufsize = 0;

--- 591,595 ----
      	&x->x_obj.ob_pd, &s_signal);
      inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_signal, &s_signal);
!     x->x_endbuf = x->x_buf = (t_float *)getbytes(0);
      x->x_bufsize = 0;

Index: m_memory.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/m_memory.c,v
retrieving revision 1.1.1.2.2.1
retrieving revision 1.1.1.2.2.2
diff -C2 -d -r1.1.1.2.2.1 -r1.1.1.2.2.2
*** m_memory.c	6 Oct 2004 18:20:04 -0000	1.1.1.2.2.1
--- m_memory.c	6 Oct 2004 19:24:14 -0000	1.1.1.2.2.2
***************
*** 8,21 ****
  #include "m_imp.h"

- /* T.Grill - include SIMD functionality */
- #include "m_simd.h"
- /* T.Grill - bit alignment for signal vectors (must be a multiple of 8!) */
- /* if undefined no alignment occurs */
- #ifdef SIMD_BYTEALIGN
-     #define VECTORALIGNMENT (SIMD_BYTEALIGN*8)
- #else
-     #define VECTORALIGNMENT 128
- #endif
- 
  /* #define LOUD */
  #ifdef LOUD
--- 8,11 ----
***************
*** 90,124 ****
  }

- /* T.Grill - get aligned memory */
- void *getalignedbytes(size_t nbytes)
- {
- 	/* to align the region we also need some extra memory to save the original pointer location
- 		it is saved immediately before the aligned vector memory
- 	*/
-    	void *vec = getbytes(nbytes+ (VECTORALIGNMENT/8-1)+sizeof(void *));
- 	int alignment = ((unsigned long)vec+sizeof(void *))&(VECTORALIGNMENT/8-1);  /* get alignment of first possible signal vector byte */
- 	void *ret = (unsigned char *)vec+sizeof(void *)+(alignment == 0?0:VECTORALIGNMENT/8-alignment); /* calculate aligned pointer */
- 	*(void **)((unsigned char *)ret-sizeof(void *)) = vec; /* save original memory location */
-     return ret;
- }
- 
- /* T.Grill - free aligned vector memory */
- void freealignedbytes(void *ptr,size_t nbytes)
- {
- 	void *ori = *(void **)((unsigned char *)ptr-sizeof(void *)); /* get original memory location */
- 	freebytes(ori,nbytes+(VECTORALIGNMENT/8-1)+sizeof(void *));
- }
- 
- /* T.Grill - resize aligned vector memory */
- void *resizealignedbytes(void *ptr,size_t oldsize, size_t newsize)
- {
- 	void *ori = *(void **)((unsigned char *)ptr-sizeof(void *)); /* get original memory location */
-     void *vec = resizebytes(ori,oldsize+(VECTORALIGNMENT/8-1)+sizeof(void *),newsize+ (VECTORALIGNMENT/8-1)+sizeof(void *));
- 	int alignment = ((unsigned long)vec+sizeof(void *))&(VECTORALIGNMENT/8-1);  /* get alignment of first possible signal vector byte */
- 	void *ret = (unsigned char *)vec+sizeof(void *)+(alignment == 0?0:VECTORALIGNMENT/8-alignment); /* calculate aligned pointer */
- 	*(void **)((unsigned char *)ret-sizeof(void *)) = vec; /* save original memory location */
-     return ret;
- }
- 
  #ifdef DEBUGMEM
  #include <stdio.h>
--- 80,83 ----

Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** m_simd_sse_gcc.h	6 Oct 2004 18:20:04 -0000	1.1.2.7
--- m_simd_sse_gcc.h	6 Oct 2004 19:24:14 -0000	1.1.2.8
***************
*** 10,20 ****

  /* SIMD functions for SSE with GCC */
- //void zerovec_sse_gcc(t_float *dst,int n);
- //void setvec_sse_gcc(t_float *dst,t_float v,int n);
- //void copyvec_sse_gcc(t_float *dst,const t_float *src,int n);
- //void addvec_sse_gcc(t_float *dst,const t_float *src,int n);
- //void testcopyvec_sse_gcc(t_float *dst,const t_float *src,int n);
- //void testaddvec_sse_gcc(t_float *dst,const t_float *src,int n);
- 
  t_int *zero_perf_sse_gcc(t_int *w);
  t_int *copy_perf_sse_gcc(t_int *w);
--- 10,13 ----
***************
*** 39,49 ****

- #define zerovec                 zerovec_8 /* SIMD not implemented */
- #define setvec                  setvec_8 /* SIMD not implemented */
- #define copyvec                 copyvec_8 /* SIMD not implemented */
- #define addvec                  addvec_8 /* SIMD not implemented */
- #define testcopyvec             testcopyvec_8 /* SIMD not implemented */
- #define testaddvec              testaddvec_8 /* SIMD not implemented */
- 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf_sse_gcc
--- 32,35 ----

Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** m_simd_sse_vc.h	6 Oct 2004 18:20:04 -0000	1.1.2.6
--- m_simd_sse_vc.h	6 Oct 2004 19:24:14 -0000	1.1.2.7
***************
*** 10,20 ****

  /* SIMD functions for SSE with VC++ */
- void zerovec_sse_vc(t_float *dst,int n);
- void setvec_sse_vc(t_float *dst,t_float v,int n);
- void copyvec_sse_vc(t_float *dst,const t_float *src,int n);
- void addvec_sse_vc(t_float *dst,const t_float *src,int n);
- void testcopyvec_sse_vc(t_float *dst,const t_float *src,int n);
- void testaddvec_sse_vc(t_float *dst,const t_float *src,int n);
- 
  t_int *zero_perf_sse_vc(t_int *w);
  t_int *copy_perf_sse_vc(t_int *w);
--- 10,13 ----
***************
*** 39,49 ****

- #define zerovec                 zerovec_sse_vc
- #define setvec                  setvec_sse_vc
- #define copyvec                 copyvec_sse_vc
- #define addvec                  addvec_sse_vc
- #define testcopyvec             testcopyvec_sse_vc
- #define testaddvec              testaddvec_sse_vc
- 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf_sse_vc
--- 32,35 ----

Index: m_simd_def.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_def.h,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -C2 -d -r1.1.2.5 -r1.1.2.6
*** m_simd_def.h	6 Oct 2004 18:20:04 -0000	1.1.2.5
--- m_simd_def.h	6 Oct 2004 19:24:14 -0000	1.1.2.6
***************
*** 12,23 ****
  /* These are the functions that can be coded for SIMD */

- /* functions in m_simd_def.c */
- #define zerovec                 zerovec_8
- #define setvec                  setvec_8
- #define copyvec                 copyvec_8
- #define addvec                  addvec_8
- #define testcopyvec             testcopyvec_8
- #define testaddvec              testaddvec_8
- 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf8
--- 12,15 ----

Index: d_ugen.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ugen.c,v
retrieving revision 1.1.1.2.2.7
retrieving revision 1.1.1.2.2.8
diff -C2 -d -r1.1.1.2.2.7 -r1.1.1.2.2.8
*** d_ugen.c	6 Oct 2004 18:20:03 -0000	1.1.1.2.2.7
--- d_ugen.c	6 Oct 2004 19:24:14 -0000	1.1.1.2.2.8
***************
*** 66,71 ****
  t_int *zero_perf8(t_int *w)
  {
!     zerovec_8((t_float *)w[1],w[2]);
!     return w+3;
  }

--- 66,84 ----
  t_int *zero_perf8(t_int *w)
  {
!     t_float *out = (t_float *)(w[1]);
!     int n = (int)(w[2]);
! 
!     for (; n; n -= 8, out += 8)
!     {
!     	out[0] = 0;
!     	out[1] = 0;
!     	out[2] = 0;
!     	out[3] = 0;
!     	out[4] = 0;
!     	out[5] = 0;
!     	out[6] = 0;
!     	out[7] = 0;
!     }
!     return (w+3);
  }

***************
*** 341,345 ****
  			t_freebytes(sig->s_vec, sig->s_n * sizeof (*sig->s_vec));
  #else
!             freealignedbytes(sig->s_vec, sig->s_n * sizeof (*sig->s_vec));
  #endif
  		}
--- 354,360 ----
  			t_freebytes(sig->s_vec, sig->s_n * sizeof (*sig->s_vec));
  #else
! 			/* T.Grill - free aligned vector memory */
! 			t_sample *ori = *(t_sample **)((unsigned char *)sig->s_vec-sizeof(t_sample *)); /* get original memory location */
! 			t_freebytes(ori, sig->s_n * sizeof (*ori)+(VECTORALIGNMENT/8-1)+sizeof(t_sample *));
  #endif
  		}
***************
*** 426,444 ****
      	    /* LATER figure out what to do for out-of-space here! */
      	ret = (t_signal *)t_getbytes(sizeof *ret);
! 	    if (n)
! 	    {
  #ifndef VECTORALIGNMENT
!        	    ret->s_vec = (t_sample *)getbytes(n * sizeof (*ret->s_vec));
  #else
! 		    /* T.Grill - make signal vectors aligned! */
!             ret->s_vec = (t_sample *)getalignedbytes(n * sizeof (*ret->s_vec));
  #endif
! 	        ret->s_isborrowed = 0;
!     	    }
! 	    else
! 	    {
! 	        ret->s_vec = 0;
! 	        ret->s_isborrowed = 1;
! 	    }
      	ret->s_nextused = signal_usedlist;
      	signal_usedlist = ret;
--- 441,466 ----
      	    /* LATER figure out what to do for out-of-space here! */
      	ret = (t_signal *)t_getbytes(sizeof *ret);
! 	if (n)
! 	{
  #ifndef VECTORALIGNMENT
!    	    ret->s_vec = (t_sample *)getbytes(n * sizeof (*ret->s_vec));
  #else
! 		/* T.Grill - make signal vectors aligned! */
! 
! 		/* to align the signal vector we also need some extra memory to save the original pointer location
! 			it is saved immediately before the aligned vector memory
! 		*/
!    	    t_sample *vec = (t_sample *)getbytes(n * sizeof (*vec)+ (VECTORALIGNMENT/8-1)+sizeof(t_sample *));
! 		int alignment = ((unsigned long)vec+sizeof(t_sample *))&(VECTORALIGNMENT/8-1);  /* get alignment of first possible signal vector byte */
! 		ret->s_vec = (t_sample *)((unsigned char *)vec+sizeof(t_sample *)+(alignment == 0?0:VECTORALIGNMENT/8-alignment)); /* calculate aligned pointer */
! 		*(t_sample **)((unsigned char *)ret->s_vec-sizeof(t_sample *)) = vec; /* save original memory location */
  #endif
! 	    ret->s_isborrowed = 0;
!     	}
! 	else
! 	{
! 	    ret->s_vec = 0;
! 	    ret->s_isborrowed = 1;
! 	}
      	ret->s_nextused = signal_usedlist;
      	signal_usedlist = ret;

Index: m_simd_ve_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.h,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** m_simd_ve_gcc.h	6 Oct 2004 18:52:29 -0000	1.1.2.6
--- m_simd_ve_gcc.h	6 Oct 2004 19:24:14 -0000	1.1.2.7
***************
*** 10,18 ****

  /* SIMD functions for VE with GCC */
- void zerovec_ve_gcc(t_float *dst,int n);
- void setvec_ve_gcc(t_float *dst,t_float v,int n);
- void copyvec_ve_gcc(t_float *dst,const t_float *src,int n);
- void addvec_ve_gcc(t_float *dst,const t_float *src,int n);
- 
  t_int *zero_perf_ve_gcc(t_int *w);
  t_int *copy_perf_ve_gcc(t_int *w);
--- 10,13 ----
***************
*** 37,48 ****

- #define zerovec                 zerovec_ve_gcc
- #define setvec                  setvec_ve_gcc
- #define copyvec                 copyvec_ve_gcc
- #define addvec                  addvec_ve_gcc
- /* no bad float testing for PPC! */
- #define testcopyvec             copyvec_ve_gcc
- #define testaddvec              addvec_ve_gcc
- 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf_ve_gcc
--- 32,35 ----

Index: d_global.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_global.c,v
retrieving revision 1.1.1.2.8.6
retrieving revision 1.1.1.2.8.7
diff -C2 -d -r1.1.1.2.8.6 -r1.1.1.2.8.7
*** d_global.c	6 Oct 2004 18:20:03 -0000	1.1.1.2.8.6
--- d_global.c	6 Oct 2004 19:24:14 -0000	1.1.1.2.8.7
***************
*** 6,10 ****

  #include "m_pd.h"
- #include "m_simd.h"
  #include <string.h>

--- 6,9 ----
***************
*** 29,33 ****
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getalignedbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      x->x_f = 0;
--- 28,32 ----
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      x->x_f = 0;
***************
*** 42,70 ****
      while (n--)
      {
! 	    *out = (PD_BIGORSMALL(*in) ? 0 : *in);
! 	    out++;
! 	    in++;
      }
      return (w+4);
  }

- /* T.Grill - SIMD version */
- static t_int *sigsend_perfsimd(t_int *w)
- {
-     testcopyvec((t_float *)w[2],(t_float *)w[1],w[3]);
-     return (w+4);
- }
- 
  static void sigsend_dsp(t_sigsend *x, t_signal **sp)
  {
!     const int n = x->x_n;
!     if(n == sp[0]->s_n) {
!         if(SIMD_CHECK1(n,sp[0]->s_vec)) /* x->x_vec is aligned in any case */
!     	    dsp_add(sigsend_perfsimd, 3, sp[0]->s_vec, x->x_vec, n);
!         else
!     	    dsp_add(sigsend_perform, 3, sp[0]->s_vec, x->x_vec, n);
!     }
!     else 
!         error("sigsend %s: unexpected vector size", x->x_sym->s_name);
  }

--- 41,56 ----
      while (n--)
      {
! 	*out = (PD_BIGORSMALL(*in) ? 0 : *in);
! 	out++;
! 	in++;
      }
      return (w+4);
  }

  static void sigsend_dsp(t_sigsend *x, t_signal **sp)
  {
!     if (x->x_n == sp[0]->s_n)
!     	dsp_add(sigsend_perform, 3, sp[0]->s_vec, x->x_vec, sp[0]->s_n);
!     else error("sigsend %s: unexpected vector size", x->x_sym->s_name);
  }

***************
*** 72,76 ****
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freealignedbytes(x->x_vec,x->x_n* sizeof(float));
  }

--- 58,62 ----
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freebytes(x->x_vec, x->x_n * sizeof(float));
  }

***************
*** 128,148 ****
  {
      t_sigreceive *x = (t_sigreceive *)(w[1]);
      t_float *in = x->x_wherefrom;
!     if(in) 
!         copyvec_8((t_float *)w[2],in,w[3]);
!     else 
!         zerovec_8((t_float *)w[2],w[3]);
!     return (w+4);
! }
! 
! /* T.Grill - SIMD version */
! static t_int *sigreceive_perfsimd(t_int *w)
! {
!     t_sigreceive *x = (t_sigreceive *)(w[1]);
!     t_float *in = x->x_wherefrom;
!     if(in) 
!         copyvec((t_float *)w[2],in,w[3]);
!     else 
!         zerovec((t_float *)w[2],w[3]);
      return (w+4);
  }
--- 114,136 ----
  {
      t_sigreceive *x = (t_sigreceive *)(w[1]);
+     t_float *out = (t_float *)(w[2]);
+     int n = (int)(w[3]);
      t_float *in = x->x_wherefrom;
!     if (in)
!     {
! 	for (; n; n -= 8, in += 8, out += 8)
! 	{
! 	    out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; 
! 	    out[4] = in[4]; out[5] = in[5]; out[6] = in[6]; out[7] = in[7]; 
! 	}
!     }
!     else
!     {
! 	for (; n; n -= 8, in += 8, out += 8)
! 	{
! 	    out[0] = 0; out[1] = 0; out[2] = 0; out[3] = 0; 
! 	    out[4] = 0; out[5] = 0; out[6] = 0; out[7] = 0; 
! 	}
!     }
      return (w+4);
  }
***************
*** 156,164 ****
      	if (sender->x_n == x->x_n)
      	    x->x_wherefrom = sender->x_vec;
! 	    else
! 	    {
! 	        pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
! 	        x->x_wherefrom = 0;
! 	    }
      }
      else
--- 144,152 ----
      	if (sender->x_n == x->x_n)
      	    x->x_wherefrom = sender->x_vec;
! 	else
! 	{
! 	    pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
! 	    x->x_wherefrom = 0;
! 	}
      }
      else
***************
*** 171,176 ****
  static void sigreceive_dsp(t_sigreceive *x, t_signal **sp)
  {
!     const int n = x->x_n;
!     if (sp[0]->s_n != n)
      {
      	pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
--- 159,163 ----
  static void sigreceive_dsp(t_sigreceive *x, t_signal **sp)
  {
!     if (sp[0]->s_n != x->x_n)
      {
      	pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
***************
*** 179,189 ****
      {
      	sigreceive_set(x, x->x_sym);
! 	    if(n&7)
! 	        dsp_add(sigreceive_perform, 3, x, sp[0]->s_vec, n);
!         else if(SIMD_CHECK1(n,sp[0]->s_vec))
!             /* x->x_wherefrom is aligned because we aligned the sender memory buffer */
! 	        dsp_add(sigreceive_perfsimd, 3, x, sp[0]->s_vec, n);
! 	    else
! 	        dsp_add(sigreceive_perf8, 3, x, sp[0]->s_vec, n);
      }
  }
--- 166,175 ----
      {
      	sigreceive_set(x, x->x_sym);
! 	if(sp[0]->s_n&7)
! 	    dsp_add(sigreceive_perform, 3,
! 		    x, sp[0]->s_vec, sp[0]->s_n);
! 	else
! 	    dsp_add(sigreceive_perf8, 3,
! 		    x, sp[0]->s_vec, sp[0]->s_n);
      }
  }
***************
*** 219,223 ****
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getalignedbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      outlet_new(&x->x_obj, &s_signal);
--- 205,209 ----
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      outlet_new(&x->x_obj, &s_signal);
***************
*** 237,250 ****
  static t_int *sigcatch_perf8(t_int *w)
  {
!     copyvec_8((t_float *)w[2],(t_float *)w[1],w[3]);
!     zerovec_8((t_float *)w[1],w[3]);
!     return (w+4);
! }
! 
! /* T.Grill: SIMD catch function */
! static t_int *sigcatch_perfsimd(t_int *w)
! {
!     copyvec((t_float *)w[2],(t_float *)w[1],w[3]);
!     zerovec((t_float *)w[1],w[3]);
      return (w+4);
  }
--- 223,237 ----
  static t_int *sigcatch_perf8(t_int *w)
  {
!     t_float *in = (t_float *)(w[1]);
!     t_float *out = (t_float *)(w[2]);
!     int n = (int)(w[3]);
!     for (; n; n -= 8, in += 8, out += 8)
!     {
! 	out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; 
! 	out[4] = in[4]; out[5] = in[5]; out[6] = in[6]; out[7] = in[7]; 
!     
! 	in[0] = 0; in[1] = 0; in[2] = 0; in[3] = 0; 
! 	in[4] = 0; in[5] = 0; in[6] = 0; in[7] = 0; 
!     }
      return (w+4);
  }
***************
*** 252,264 ****
  static void sigcatch_dsp(t_sigcatch *x, t_signal **sp)
  {
!     const int n = sp[0]->s_n;
!     if (x->x_n == n)
      {
!     	if(n&7)
!     	    dsp_add(sigcatch_perform, 3, x->x_vec, sp[0]->s_vec, n);
!     	else if(SIMD_CHECK2(n,x->x_vec,sp[0]->s_vec))
!     	    dsp_add(sigcatch_perfsimd, 3, x->x_vec, sp[0]->s_vec, n);
! 	    else
! 	        dsp_add(sigcatch_perf8, 3, x->x_vec, sp[0]->s_vec, n);
      }
      else error("sigcatch %s: unexpected vector size", x->x_sym->s_name);
--- 239,248 ----
  static void sigcatch_dsp(t_sigcatch *x, t_signal **sp)
  {
!     if (x->x_n == sp[0]->s_n)
      {
!     	if(sp[0]->s_n&7)
! 	dsp_add(sigcatch_perform, 3, x->x_vec, sp[0]->s_vec, sp[0]->s_n);
! 	else
! 	dsp_add(sigcatch_perf8, 3, x->x_vec, sp[0]->s_vec, sp[0]->s_n);
      }
      else error("sigcatch %s: unexpected vector size", x->x_sym->s_name);
***************
*** 268,272 ****
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freealignedbytes(x->x_vec,x->x_n* sizeof(float));
  }

--- 252,256 ----
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freebytes(x->x_vec, x->x_n * sizeof(float));
  }

***************
*** 310,331 ****
      {
      	while (n--)
! 	    {
!             *out += (PD_BIGORSMALL(*in) ? 0 : *in);
! 	        out++;
! 	        in++;
! 	    }
      }
      return (w+4);
  }

- /* T.Grill - SIMD version */
- static t_int *sigthrow_perfsimd(t_int *w)
- {
-     t_sigthrow *x = (t_sigthrow *)(w[1]);
-     t_float *out = x->x_whereto;
-     if(out) testaddvec(out,(t_float *)w[2],w[3]);
-     return (w+4);
- }
- 
  static void sigthrow_set(t_sigthrow *x, t_symbol *s)
  {
--- 294,306 ----
      {
      	while (n--)
! 	{
!     	    *out += (PD_BIGORSMALL(*in) ? 0 : *in);
! 	    out++;
! 	    in++;
! 	}
      }
      return (w+4);
  }

  static void sigthrow_set(t_sigthrow *x, t_symbol *s)
  {
***************
*** 351,356 ****
  static void sigthrow_dsp(t_sigthrow *x, t_signal **sp)
  {
!     const int n = x->x_n;
!     if (sp[0]->s_n != n)
      {
      	pd_error(x, "throw~ %s: vector size mismatch", x->x_sym->s_name);
--- 326,330 ----
  static void sigthrow_dsp(t_sigthrow *x, t_signal **sp)
  {
!     if (sp[0]->s_n != x->x_n)
      {
      	pd_error(x, "throw~ %s: vector size mismatch", x->x_sym->s_name);
***************
*** 359,367 ****
      {
      	sigthrow_set(x, x->x_sym);
!         if(SIMD_CHECK1(n,sp[0]->s_vec))
!             /* the memory of the catcher is aligned in any case */
!     	    dsp_add(sigthrow_perfsimd, 3, x, sp[0]->s_vec, n);
!         else
!     	    dsp_add(sigthrow_perform, 3, x, sp[0]->s_vec, n);
      }
  }
--- 333,338 ----
      {
      	sigthrow_set(x, x->x_sym);
!     	dsp_add(sigthrow_perform, 3,
!     	    x, sp[0]->s_vec, sp[0]->s_n);
      }
  }

Index: d_array.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_array.c,v
retrieving revision 1.1.1.3.2.3
retrieving revision 1.1.1.3.2.4
diff -C2 -d -r1.1.1.3.2.3 -r1.1.1.3.2.4
*** d_array.c	6 Oct 2004 18:20:03 -0000	1.1.1.3.2.3
--- d_array.c	6 Oct 2004 19:24:14 -0000	1.1.1.3.2.4
***************
*** 8,12 ****

  #include "m_pd.h"
! #include "m_simd.h"

  /* ------------------------- tabwrite~ -------------------------- */
--- 8,12 ----

  #include "m_pd.h"
! 

  /* ------------------------- tabwrite~ -------------------------- */
***************
*** 50,60 ****
      	if (nxfer > n) nxfer = n;
      	phase += nxfer;
!     	while (nxfer--) {
! 	        float f = *in++;
!     	    if (PD_BIGORSMALL(f)) f = 0;
! 	        *fp++ = f;
      	}
! 
!     	if (phase >= endphase)
      	{
      	    clock_delay(x->x_clock, 0);
--- 50,61 ----
      	if (nxfer > n) nxfer = n;
      	phase += nxfer;
!     	while (nxfer--)
! 	{
! 	    float f = *in++;
!     	    if (PD_BIGORSMALL(f))
! 	    	f = 0;
! 	    *fp++ = f;
      	}
! 	if (phase >= endphase)
      	{
      	    clock_delay(x->x_clock, 0);
***************
*** 185,190 ****
      	clock_delay(x->x_clock, 0);
      	x->x_phase = 0x7fffffff;
! 	    while (n3--)
! 	        *out++ = 0;
      }
      else x->x_phase = phase;
--- 186,191 ----
      	clock_delay(x->x_clock, 0);
      	x->x_phase = 0x7fffffff;
! 	while (n3--)
! 	    *out++ = 0;
      }
      else x->x_phase = phase;
***************
*** 731,745 ****
      if (!x->x_vec) goto bad;

!     if(n&7)
!         while(n--) {	
!     	    float f = *in++;
!     	    if (PD_BIGORSMALL(f)) f = 0;
! 	        *dest++ = f;
!         }
!     else if(SIMD_CHECK2(n,in,dest))
!         testcopyvec(dest,in,n);
!     else
!         testcopyvec_8(dest,in,n);
! 
      if (!i--)
      {
--- 732,742 ----
      if (!x->x_vec) goto bad;

!     while (n--)
!     {	
!     	float f = *in++;
!     	if (PD_BIGORSMALL(f))
! 	    f = 0;
! 	 *dest++ = f;
!     }
      if (!i--)
      {
***************
*** 819,844 ****
  }

- static t_int *tabreceive_perf8(t_int *w)
- {
-     t_tabreceive *x = (t_tabreceive *)(w[1]);
-     t_float *from = x->x_vec;
-     if (from) 
-         copyvec_8((t_float *)(w[2]),from,w[3]);
-     else 
-         zerovec_8((t_float *)(w[2]),w[3]);
-     return (w+4);
- }
- 
- static t_int *tabreceive_perfsimd(t_int *w)
- {
-     t_tabreceive *x = (t_tabreceive *)(w[1]);
-     t_float *from = x->x_vec;
-     if(from) 
-         copyvec((t_float *)(w[2]),from,w[3]);
-     else 
-         zerovec((t_float *)(w[2]),w[3]);
-     return (w+4);
- }
- 
  static void tabreceive_dsp(t_tabreceive *x, t_signal **sp)
  {
--- 816,819 ----
***************
*** 858,868 ****
      	if (n < vecsize) vecsize = n;
      	garray_usedindsp(a);
!         if(vecsize&7)
!     	    dsp_add(tabreceive_perform, 3, x, sp[0]->s_vec, vecsize);
!         else if(SIMD_CHECK1(vecsize,sp[0]->s_vec))
!             /* the array is aligned in any case */
!     	    dsp_add(tabreceive_perfsimd, 3, x, sp[0]->s_vec, vecsize);
!         else
!     	    dsp_add(tabreceive_perf8, 3, x, sp[0]->s_vec, vecsize);
      }
  }
--- 833,837 ----
      	if (n < vecsize) vecsize = n;
      	garray_usedindsp(a);
!     	dsp_add(tabreceive_perform, 3, x, sp[0]->s_vec, vecsize);
      }
  }