[PD-cvs] pd/src cvs_changes.txt,1.1.2.7,1.1.2.8 d_array.c,1.1.1.3.2.4,1.1.1.3.2.5 d_ctl.c,1.1.1.3.2.11,1.1.1.3.2.12 d_global.c,1.1.1.2.8.7,1.1.1.2.8.8 d_ugen.c,1.1.1.2.2.8,1.1.1.2.2.9 g_array.c,1.1.1.3.2.12,1.1.1.3.2.13 g_io.c,1.1.1.1.16.5,1.1.1.1.16.6 m_memory.c,1.1.1.2.2.2,1.1.1.2.2.3 m_pd.h,1.1.1.4.2.19,1.1.1.4.2.20 m_simd.h,1.1.2.7,1.1.2.8 m_simd_def.h,1.1.2.6,1.1.2.7 m_simd_sse_gcc.h,1.1.2.8,1.1.2.9 m_simd_sse_vc.c,1.1.2.7,1.1.2.8 m_simd_sse_vc.h,1.1.2.7,1.1.2.8 m_simd_ve_gcc.c,1.1.2.6,1.1.2.7 m_simd_ve_gcc.h,1.1.2.7,1.1.2.8 makefile.nt,1.1.1.3.2.2,1.1.1.3.2.3 s_inter.c,1.1.1.3.2.15,1.1.1.3.2.16

Thomas Grill xovo at users.sourceforge.net
Thu Oct 7 19:56:41 CEST 2004


Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16139

Modified Files:
      Tag: devel_0_37
	cvs_changes.txt d_array.c d_ctl.c d_global.c d_ugen.c 
	g_array.c g_io.c m_memory.c m_pd.h m_simd.h m_simd_def.h 
	m_simd_sse_gcc.h m_simd_sse_vc.c m_simd_sse_vc.h 
	m_simd_ve_gcc.c m_simd_ve_gcc.h makefile.nt s_inter.c 
Log Message:
more functions using SIMD instructions
functions for aligned memory allocation (also public in m_pd.h)
added new file m_simd.c !!!
updated makefile.nt


Index: cvs_changes.txt
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/cvs_changes.txt,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** cvs_changes.txt	6 Oct 2004 18:20:03 -0000	1.1.2.7
--- cvs_changes.txt	7 Oct 2004 17:56:37 -0000	1.1.2.8
***************
*** 20,24 ****
  - TB: threaded soundfiler (--enable-threadedsf)
  
! - TB: port of thomas grill's vectorized simd funcions for vc to gcc
  
  - TB: PD_BADFLOAT kills almost denormal numbers
--- 20,24 ----
  - TB: threaded soundfiler (--enable-threadedsf)
  
! - TB: port of thomas grill's vectorized simd functions for vc to gcc
  
  - TB: PD_BADFLOAT kills almost denormal numbers
***************
*** 26,34 ****
  - TB: improvements for line~, catch~ and receive~
  
- - TG: more objects now using SIMD instructions
-       added aligned memory functions
-       added new file m_simd.c
- 
  - TG: squaring
  
  - TG: prevent division by zero
--- 26,35 ----
  - TB: improvements for line~, catch~ and receive~
  
  - TG: squaring
  
  - TG: prevent division by zero
+ 
+ - TG: more objects now using SIMD instructions
+       added aligned memory functions
+       added new file m_simd.c
+       updated poor neglected makefile.nt

Index: makefile.nt
===================================================================
RCS file: /cvsroot/pure-data/pd/src/makefile.nt,v
retrieving revision 1.1.1.3.2.2
retrieving revision 1.1.1.3.2.3
diff -C2 -d -r1.1.1.3.2.2 -r1.1.1.3.2.3
*** makefile.nt	27 Dec 2003 11:32:43 -0000	1.1.1.3.2.2
--- makefile.nt	7 Oct 2004 17:56:38 -0000	1.1.1.3.2.3
***************
*** 1,20 ****
  # Makefile for PD on MSW
  
  all: pd gui ..\bin\pd.tk ..\bin\pdsend.exe ..\bin\pdreceive.exe
  
  VC = "C:\Program Files\Microsoft Visual Studio\VC98"
  #VC="\Program Files\DevStudio\Vc"
! INCLUDE = -I.\ -I..\Tcl\include -I$(VC)\include
  
! LDIR = $(VC)\lib
  
! LIB = /NODEFAULTLIB:libc /NODEFAULTLIB:oldnames  /NODEFAULTLIB:kernel \
!     /NODEFAULTLIB:uuid \
!     $(LDIR)\libc.lib $(LDIR)\oldnames.lib $(LDIR)\kernel32.lib \
!     $(LDIR)\wsock32.lib $(LDIR)\winmm.lib ..\bin\pthreadVC.lib
  
! GLIB =  $(LIB) ..\bin\tcl83.lib ..\bin\tk83.lib
  CFLAGS = /nologo /W3 /DMSW /DNT /DPD /DPD_INTERNAL /DWIN32 /DWINDOWS /Ox \
! 	-DPA_LITTLE_ENDIAN -DUSEAPI_MMIO -DUSEAPI_PORTAUDIO 
  LFLAGS = /nologo
  
--- 1,95 ----
  # Makefile for PD on MSW
+ # 
+ # This should ideally be started from the compiler console so that it will have
+ # the compiler, include and lib paths defined by environment settings
+ #
+ # nmake -f makefile.nt
  
  all: pd gui ..\bin\pd.tk ..\bin\pdsend.exe ..\bin\pdreceive.exe
  
+ #######################################################
+ # define some directories where to find external stuff
+ # these are ifdef'd so you can define them on the nmake command line
+ 
+ !ifndef VC
  VC = "C:\Program Files\Microsoft Visual Studio\VC98"
  #VC="\Program Files\DevStudio\Vc"
! !endif
  
! # PTHREADS says where the Pthreads project can be found
  
! !ifndef PTHREADS
! PTHREADS = ..\pthreads
! !endif
  
! # I doubt that at the current state an external Portaudio version can be used...
! # better let PADIR be the default...
! 
! !ifndef PADIR
! PADIR = ..\portaudio
! PAPDDIR = $(PADIR)\pablio
! !else
! PAPDDIR = .\pa
! !endif
! 
! # Portmidi should work
! 
! !ifndef PMDIR
! PMDIR = ..\portmidi
! !endif
! 
! # You can set ASIODIR to the ASIO SDK directory
! 
! !ifndef ASIODIR
! ASIODIR = ..\asio
! ASIOINC = -I$(PADIR)\pa_asio
! !else
! ASIOINC = -I$(ASIODIR)\common -I$(ASIODIR)\host -I$(ASIODIR)\host\pc
! !endif
! 
! # Define the TCL version with TCLVER 
! # and the directory of your TCL installation with TCLDIR
! 
! !ifndef TCLVER
! TCLVER = 83
! !else
! WISHDEF = -DWISHAPP="\"wish$(TCLVER).exe\""
! !endif
! 
! !ifndef TCLDIR
! !ifndef TCLINC
! TCLINC = ..\Tcl\include
! !endif
! 
! !ifndef TCLLIB
! TCLLIB = ..\bin
! !endif
! !else
! TCLINC = $(TCLDIR)\include
! TCLLIB = $(TCLDIR)\lib
! !endif
! 
! ########################################################
! 
! INC = -I.\ -I$(TCLINC) -I$(PTHREADS)
! !ifdef VC
! INC = $(INC) -I$(VC)\include 
! !endif
! 
! # the library directory might automatically be found
! !ifdef LIB
! LDIR = 
! !else
! # the trailing slash is important!!
! LDIR = $(VC)\lib\  
! !endif
! 
! LIBS = /NODEFAULTLIB:libc /NODEFAULTLIB:oldnames  /NODEFAULTLIB:kernel \
!     /NODEFAULTLIB:uuid \
!     $(LDIR)libc.lib $(LDIR)oldnames.lib $(LDIR)kernel32.lib \
!     $(LDIR)wsock32.lib $(LDIR)winmm.lib $(PTHREADS)\pthreadVC.lib
! GLIB =  $(LIBS) $(TCLLIB)\tcl$(TCLVER).lib $(TCLLIB)\tk$(TCLVER).lib
  CFLAGS = /nologo /W3 /DMSW /DNT /DPD /DPD_INTERNAL /DWIN32 /DWINDOWS /Ox \
! 	-DPA_LITTLE_ENDIAN -DUSEAPI_MMIO -DUSEAPI_PORTAUDIO $(WISHDEF)
  LFLAGS = /nologo
  
***************
*** 34,54 ****
      x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \
      x_time.c x_acoustics.c x_net.c x_qlist.c x_gui.c d_soundfile.c \
!     m_simd_sse_vc.c m_simd_sse_gcc.c m_simd_ve_gcc.c \
      $(SYSSRC)
  
! PADIR = ..\portaudio
! INCPA = -I$(PADIR) -I$(PADIR)\pa_common -I$(PADIR)\pablio -I..\lib\asio
! SRCPA = $(PADIR)/pa_common/pa_lib.c $(PADIR)/pa_common/pa_trace.c \
! 	$(PADIR)/pablio/pablio_pd.c $(PADIR)/pablio/ringbuffer_pd.c
! SRCASIO = $(PADIR)/pa_asio/pa_asio.cpp 
  
! ASIOLIB = $(LDIR)\user32.lib $(LDIR)\gdi32.lib $(LDIR)\winspool.lib $(LDIR)\comdlg32.lib \
! $(LDIR)\advapi32.lib $(LDIR)\shell32.lib $(LDIR)\ole32.lib $(LDIR)\oleaut32.lib $(LDIR)\uuid.lib \
! $(LDIR)\odbc32.lib $(LDIR)\odbccp32.lib ..\lib\asio\asiolib.lib
  
  
  PAOBJ = pa_lib.obj pa_trace.obj pablio_pd.obj ringbuffer_pd.obj pa_asio.obj
  
- PMDIR = ..\portmidi
  INCPM = -I$(PMDIR)\pm_common -I$(PMDIR)\pm_win -I$(PMDIR)\porttime
  SRCPM = \
--- 109,130 ----
      x_arithmetic.c x_connective.c x_interface.c x_midi.c x_misc.c \
      x_time.c x_acoustics.c x_net.c x_qlist.c x_gui.c d_soundfile.c \
!     m_simd.c m_simd_sse_vc.c m_simd_sse_gcc.c m_simd_ve_gcc.c \
      $(SYSSRC)
  
! INCPA = -I$(PADIR) -I$(PADIR)\pa_common -I$(PADIR)\pablio -I$(PAPDDIR)
  
! ASIOLIBS = $(LDIR)user32.lib $(LDIR)gdi32.lib $(LDIR)winspool.lib $(LDIR)comdlg32.lib \
! $(LDIR)advapi32.lib $(LDIR)shell32.lib $(LDIR)ole32.lib $(LDIR)oleaut32.lib $(LDIR)uuid.lib \
! $(LDIR)odbc32.lib $(LDIR)odbccp32.lib $(ASIODIR)\asiolib.lib
  
  
  PAOBJ = pa_lib.obj pa_trace.obj pablio_pd.obj ringbuffer_pd.obj pa_asio.obj
+ #PAOBJ = \
+ #    pa_allocation.obj pa_converters.obj pa_cpuload.obj \
+ #    pa_stream.obj pa_dither.obj pa_front.obj pa_process.obj \
+ #    pa_skeleton.obj pa_trace.obj \
+ #    pablio_pd.obj ringbuffer_pd.obj \
+ #    pa_asio.obj
  
  INCPM = -I$(PMDIR)\pm_common -I$(PMDIR)\pm_win -I$(PMDIR)\porttime
  SRCPM = \
***************
*** 69,73 ****
  .PHONY: pd gui
  
! ALLCF = $(CFLAGS)  $(INCLUDE) $(INCASIO) $(INCPA) $(INCPM) /D_WINDOWS /DPA_NO_DS
  
  .c.obj:
--- 145,149 ----
  .PHONY: pd gui
  
! ALLCF = $(CFLAGS)  $(INC) $(INCASIO) $(INCPA) $(INCPM) /D_WINDOWS /DPA_NO_DS
  
  .c.obj:
***************
*** 80,88 ****
  ..\bin\pd.exe: s_entry.obj ..\bin\pd.lib
  	link $(LFLAGS) /out:..\bin\pd.exe /INCREMENTAL:NO s_entry.obj \
! 	    ..\bin\pd.lib $(LIB) $(ASIOLIB)
  
  ..\bin\pd.dll ..\bin\pd.lib: $(OBJC) $(OBJASIO)
  	link $(LFLAGS) /dll /export:sys_main /out:..\bin\pd.dll $(OBJC) \
! 	    $(OBJASIO) $(LIB) $(ASIOLIB)
  
  ..\bin\pdtcl.dll: t_tkcmd.obj
--- 156,164 ----
  ..\bin\pd.exe: s_entry.obj ..\bin\pd.lib
  	link $(LFLAGS) /out:..\bin\pd.exe /INCREMENTAL:NO s_entry.obj \
! 	    ..\bin\pd.lib $(LIBS) $(ASIOLIBS)
  
  ..\bin\pd.dll ..\bin\pd.lib: $(OBJC) $(OBJASIO)
  	link $(LFLAGS) /dll /export:sys_main /out:..\bin\pd.dll $(OBJC) \
! 	    $(OBJASIO) $(LIBS) $(ASIOLIBS)
  
  ..\bin\pdtcl.dll: t_tkcmd.obj
***************
*** 94,128 ****
  ..\bin\pdsend.exe: u_pdsend.obj
  	link $(LFLAGS) /out:..\bin\pdsend.exe /INCREMENTAL:NO u_pdsend.obj \
! 	    $(LIB)
  
  ..\bin\pdreceive.exe: u_pdreceive.obj
  	link $(LFLAGS) /out:..\bin\pdreceive.exe /INCREMENTAL:NO u_pdreceive.obj \
! 	    $(LIB)
  
  # explicit rules to compile portaudio and portmidi sources:
  pa_lib.obj: $(PADIR)\pa_common\pa_lib.c
! 	cl /c $(ALLCF) $(PADIR)\pa_common\pa_lib.c
  pa_trace.obj: $(PADIR)\pa_common\pa_trace.c
! 	cl /c $(ALLCF) $(PADIR)\pa_common\pa_trace.c
! pablio_pd.obj: $(PADIR)\pablio\pablio_pd.c
! 	cl /c $(ALLCF) $(PADIR)\pablio\pablio_pd.c
! ringbuffer_pd.obj: $(PADIR)\pablio\ringbuffer_pd.c
! 	cl /c $(ALLCF) $(PADIR)\pablio\ringbuffer_pd.c
  
  pa_asio.obj: $(PADIR)\pa_asio\pa_asio.cpp
! 	cl /c $(ALLCF) $(PADIR)\pa_asio\pa_asio.cpp
  
  portmidi.obj: $(PMDIR)\pm_common\portmidi.c
! 	cl /c $(ALLCF) $(PMDIR)\pm_common\portmidi.c
  pmutil.obj: $(PMDIR)\pm_common\pmutil.c
! 	cl /c $(ALLCF) $(PMDIR)\pm_common\pmutil.c
  pmwin.obj: $(PMDIR)\pm_win\pmwin.c
! 	cl /c $(ALLCF) $(PMDIR)\pm_win\pmwin.c
  pmwinmm.obj: $(PMDIR)\pm_win\pmwinmm.c
! 	cl /c $(ALLCF) $(PMDIR)\pm_win\pmwinmm.c
  porttime.obj: $(PMDIR)\porttime\porttime.c
! 	cl /c $(ALLCF) $(PMDIR)\porttime\porttime.c
  ptwinmm.obj: $(PMDIR)\porttime\ptwinmm.c
! 	cl /c $(ALLCF) $(PMDIR)\porttime\ptwinmm.c
  
  # the following should also clean up "bin" but it doesn't because "bin" holds
--- 170,222 ----
  ..\bin\pdsend.exe: u_pdsend.obj
  	link $(LFLAGS) /out:..\bin\pdsend.exe /INCREMENTAL:NO u_pdsend.obj \
! 	    $(LIBS)
  
  ..\bin\pdreceive.exe: u_pdreceive.obj
  	link $(LFLAGS) /out:..\bin\pdreceive.exe /INCREMENTAL:NO u_pdreceive.obj \
! 	    $(LIBS)
  
  # explicit rules to compile portaudio and portmidi sources:
  pa_lib.obj: $(PADIR)\pa_common\pa_lib.c
! 	cl /c $(ALLCF) $**
  pa_trace.obj: $(PADIR)\pa_common\pa_trace.c
! 	cl /c $(ALLCF) $**
! pa_stream.obj: $(PADIR)\pa_common\pa_stream.c
! 	cl /c $(ALLCF) $**
! pa_front.obj: $(PADIR)\pa_common\pa_front.c
! 	cl /c $(ALLCF) $**
! pa_allocation.obj: $(PADIR)\pa_common\pa_allocation.c
! 	cl /c $(ALLCF) $**
! pa_converters.obj: $(PADIR)\pa_common\pa_converters.c
! 	cl /c $(ALLCF) $**
! pa_cpuload.obj: $(PADIR)\pa_common\pa_cpuload.c
! 	cl /c $(ALLCF) $**
! pa_dither.obj: $(PADIR)\pa_common\pa_dither.c
! 	cl /c $(ALLCF) $**
! pa_process.obj: $(PADIR)\pa_common\pa_process.c
! 	cl /c $(ALLCF) $**
! pa_skeleton.obj: $(PADIR)\pa_common\pa_skeleton.c
! 	cl /c $(ALLCF) $**
! pa_winutil.obj: $(PADIR)\pa_win\pa_winutil.c
! 	cl /c $(ALLCF) $**
! pablio_pd.obj: $(PAPDDIR)\pablio_pd.c
! 	cl /c $(ALLCF) $**
! ringbuffer_pd.obj: $(PAPDDIR)\ringbuffer_pd.c
! 	cl /c $(ALLCF) $**
  
  pa_asio.obj: $(PADIR)\pa_asio\pa_asio.cpp
! 	cl /c $(ALLCF) $(ASIOINC) $**
  
  portmidi.obj: $(PMDIR)\pm_common\portmidi.c
! 	cl /c $(ALLCF) $**
  pmutil.obj: $(PMDIR)\pm_common\pmutil.c
! 	cl /c $(ALLCF) $**
  pmwin.obj: $(PMDIR)\pm_win\pmwin.c
! 	cl /c $(ALLCF) $**
  pmwinmm.obj: $(PMDIR)\pm_win\pmwinmm.c
! 	cl /c $(ALLCF) $**
  porttime.obj: $(PMDIR)\porttime\porttime.c
! 	cl /c $(ALLCF) $**
  ptwinmm.obj: $(PMDIR)\porttime\ptwinmm.c
! 	cl /c $(ALLCF) $**
  
  # the following should also clean up "bin" but it doesn't because "bin" holds

Index: s_inter.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/s_inter.c,v
retrieving revision 1.1.1.3.2.15
retrieving revision 1.1.1.3.2.16
diff -C2 -d -r1.1.1.3.2.15 -r1.1.1.3.2.16
*** s_inter.c	7 Sep 2004 19:24:17 -0000	1.1.1.3.2.15
--- s_inter.c	7 Oct 2004 17:56:38 -0000	1.1.1.3.2.16
***************
*** 54,58 ****
  
  #ifndef WISHAPP
! #define WISHAPP "wish85.exe"
  #endif
  
--- 54,58 ----
  
  #ifndef WISHAPP
! #define WISHAPP "wish83.exe"
  #endif
  

Index: d_ctl.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ctl.c,v
retrieving revision 1.1.1.3.2.11
retrieving revision 1.1.1.3.2.12
diff -C2 -d -r1.1.1.3.2.11 -r1.1.1.3.2.12
*** d_ctl.c	6 Oct 2004 19:24:14 -0000	1.1.1.3.2.11
--- d_ctl.c	7 Oct 2004 17:56:37 -0000	1.1.1.3.2.12
***************
*** 41,51 ****
      {
      	out[0] = f;
!     	out[1] = f;
!     	out[2] = f;
!     	out[3] = f;
!     	out[4] = f;
!     	out[5] = f;
!     	out[6] = f;
!     	out[7] = f;
      }
      return (w+4);
--- 41,51 ----
      {
      	out[0] = f;
!         out[1] = f;
!         out[2] = f;
!         out[3] = f;
!         out[4] = f;
!         out[5] = f;
!         out[6] = f;
!         out[7] = f;
      }
      return (w+4);
***************
*** 136,180 ****
      {
      	float g = x->x_value = x->x_target;
!     	while (n--) 
! 	    *out++ = g;
!     }
!     return (w+4);
! }
! 
! /* TB: vectorized version */
! static t_int *line_tilde_perf8(t_int *w)
! {
!     t_line *x = (t_line *)(w[1]);
!     t_float *out = (t_float *)(w[2]);
!     int n = (int)(w[3]);
!     float f = x->x_value;
! 
!     if (PD_BIGORSMALL(f))
! 	    x->x_value = f = 0;
!     if (x->x_retarget)
!     {
!     	int nticks = x->x_inletwas * x->x_dspticktomsec;
!     	if (!nticks) nticks = 1;
!     	x->x_ticksleft = nticks;
!     	x->x_biginc = (x->x_target - x->x_value)/(float)nticks;
!     	x->x_inc = x->x_1overn * x->x_biginc;
!     	x->x_retarget = 0;
!     }
!     if (x->x_ticksleft)
!     {
!     	float f = x->x_value;
!     	while (n--) *out++ = f, f += x->x_inc;
!     	x->x_value += x->x_biginc;
!     	x->x_ticksleft--;
!     }
!     else
!     {
! 	float f = x->x_value = x->x_target;
! 	for (; n; n -= 8, out += 8)
! 	{
! 	    out[0] = f; out[1] = f; out[2] = f; out[3] = f; 
! 	    out[4] = f; out[5] = f; out[6] = f; out[7] = f;
! 	}
! 
      }
      return (w+4);
--- 136,143 ----
      {
      	float g = x->x_value = x->x_target;
!         if(n&7)
!     	    while (n--) *out++ = g;
!         else
!             setvec_8(out,g,n);
      }
      return (w+4);
***************
*** 205,212 ****
  static void line_tilde_dsp(t_line *x, t_signal **sp)
  {
-     if(sp[0]->s_n&7)
  	dsp_add(line_tilde_perform, 3, x, sp[0]->s_vec, sp[0]->s_n);
-     else
- 	dsp_add(line_tilde_perf8, 3, x, sp[0]->s_vec, sp[0]->s_n);
      x->x_1overn = 1./sp[0]->s_n;
      x->x_dspticktomsec = sp[0]->s_sr / (1000 * sp[0]->s_n);
--- 168,172 ----

Index: m_simd_ve_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.c,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** m_simd_ve_gcc.c	6 Oct 2004 19:24:14 -0000	1.1.2.6
--- m_simd_ve_gcc.c	7 Oct 2004 17:56:38 -0000	1.1.2.7
***************
*** 39,49 ****
  #define LoadValue(where) LoadUnaligned((const float *)(where))
  
! t_int *zero_perf_ve_gcc(t_int *w)
  {
  	const vector float zero = (vector float)(0);
!     t_float *dst = (t_float *)w[1];
!     int n = w[2]>>4;
! 
! 	for(; n--; dst += 16) {
  		vec_st(zero, 0,dst);
  		vec_st(zero,16,dst);
--- 39,46 ----
  #define LoadValue(where) LoadUnaligned((const float *)(where))
  
! void zerovec_ve_gcc(t_float *dst,int n)
  {
  	const vector float zero = (vector float)(0);
! 	for(n >>= 4; n--; dst += 16) {
  		vec_st(zero, 0,dst);
  		vec_st(zero,16,dst);
***************
*** 51,64 ****
  		vec_st(zero,48,dst);
  	}
-     return w+3;
  }
  
! t_int *copy_perf_ve_gcc(t_int *w)
  {
!     const t_float *src = (const t_float *)w[1];
!     t_float *dst = (t_float *)w[2];
!     int n = w[3]>>4;
  
! 	for(; n--; src += 16,dst += 16) {
  		vector float a1 = vec_ld( 0,src);
  		vector float a2 = vec_ld(16,src);
--- 48,67 ----
  		vec_st(zero,48,dst);
  	}
  }
  
! void setvec_ve_gcc(t_float *dst,t_float v,int n)
  {
! 	const vector float arg = LoadValue(&v);
! 	for(n >>= 4; n--; dst += 16) {
! 		vec_st(arg, 0,dst);
! 		vec_st(arg,16,dst);
! 		vec_st(arg,32,dst);
! 		vec_st(arg,48,dst);
! 	}
! }
  
! void copyvec_ve_gcc(t_float *dst,const t_float *src,int n)
! {
! 	for(n >>= 4; n--; src += 16,dst += 16) {
  		vector float a1 = vec_ld( 0,src);
  		vector float a2 = vec_ld(16,src);
***************
*** 70,89 ****
  		vec_st(a4,48,dst);
  	}
- 	return w+4;
  }
  
! t_int *sig_tilde_perf_ve_gcc(t_int *w)
  {
! 	const vector float arg = LoadValue(w[1]);
!     t_float *dst = (t_float *)w[2];
!     int n = w[3]>>4;
  
! 	for(; n--; dst += 16) {
! 		vec_st(arg, 0,dst);
! 		vec_st(arg,16,dst);
! 		vec_st(arg,32,dst);
! 		vec_st(arg,48,dst);
  	}
!     return w+4;
  }
  
--- 73,118 ----
  		vec_st(a4,48,dst);
  	}
  }
  
! void addvec_ve_gcc(t_float *dst,const t_float *src,int n)
  {
! #ifdef USEVECLIB
! 	vadd(dst,1,src,1,dst,1,n);
! #else
! 	for(n >>= 4; n--; src += 16,dst += 16) {
! 		vector float a1 = vec_ld( 0,dst),b1 = vec_ld( 0,src);
! 		vector float a2 = vec_ld(16,dst),b2 = vec_ld(16,src);
! 		vector float a3 = vec_ld(32,dst),b3 = vec_ld(32,src);
! 		vector float a4 = vec_ld(48,dst),b4 = vec_ld(48,src);
! 		
! 		a1 = vec_add(a1,b1);
! 		a2 = vec_add(a2,b2);
! 		a3 = vec_add(a3,b3);
! 		a4 = vec_add(a4,b4);
  
! 		vec_st(a1, 0,dst);
! 		vec_st(a2,16,dst);
! 		vec_st(a3,32,dst);
! 		vec_st(a4,48,dst);
  	}
! #endif
! }
! 
! t_int *zero_perf_ve_gcc(t_int *w)
! {
!     zerovec_ve_gcc((t_float *)w[1],w[2]);
!     return w+3;
! }
! 
! t_int *copy_perf_ve_gcc(t_int *w)
! {
!     copyvec_ve_gcc((t_float *)w[2],(const t_float *)w[1],w[3]);
! 	return w+4;
! }
! 
! t_int *sig_tilde_perf_ve_gcc(t_int *w)
! {
!     setvec_ve_gcc((t_float *)w[2],*(const t_float *)w[1],w[3]);
! 	return w+4;
  }
  
***************
*** 146,150 ****
  t_int *minus_perf_ve_gcc(t_int *w)
  {
! #ifdef USEVECLIB
  	vsub((const t_float *)w[1],1,(const t_float *)w[2],1,(t_float *)w[3],1,w[4]);
  #else
--- 175,180 ----
  t_int *minus_perf_ve_gcc(t_int *w)
  {
! #if 0 //def USEVECLIB
!     /* vsub is buggy for some OSX versions! */
  	vsub((const t_float *)w[1],1,(const t_float *)w[2],1,(t_float *)w[3],1,w[4]);
  #else

Index: m_simd.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd.h,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** m_simd.h	6 Oct 2004 19:24:14 -0000	1.1.2.7
--- m_simd.h	7 Oct 2004 17:56:38 -0000	1.1.2.8
***************
*** 7,10 ****
--- 7,17 ----
  #define __M_SIMD_H
  
+ /* general vector functions */
+ void zerovec_8(t_float *dst,int n);
+ void setvec_8(t_float *dst,t_float v,int n);
+ void copyvec_8(t_float *dst,const t_float *src,int n);
+ void addvec_8(t_float *dst,const t_float *src,int n);
+ void testcopyvec_8(t_float *dst,const t_float *src,int n);
+ void testaddvec_8(t_float *dst,const t_float *src,int n);
  
  #ifdef DONTUSESIMD

Index: m_pd.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/m_pd.h,v
retrieving revision 1.1.1.4.2.19
retrieving revision 1.1.1.4.2.20
diff -C2 -d -r1.1.1.4.2.19 -r1.1.1.4.2.20
*** m_pd.h	6 Oct 2004 19:24:14 -0000	1.1.1.4.2.19
--- m_pd.h	7 Oct 2004 17:56:38 -0000	1.1.1.4.2.20
***************
*** 251,254 ****
--- 251,259 ----
  EXTERN void *resizebytes(void *x, size_t oldsize, size_t newsize);
  
+ /* T.Grill - functions for aligned memory (according to CPU SIMD architecture) */
+ EXTERN void *getalignedbytes(size_t nbytes);
+ EXTERN void freealignedbytes(void *x,size_t nbytes);
+ EXTERN void *resizealignedbytes(void *x,size_t oldsize, size_t newsize);
+ 
  /* -------------------- atoms ----------------------------- */
  

Index: g_array.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/g_array.c,v
retrieving revision 1.1.1.3.2.12
retrieving revision 1.1.1.3.2.13
diff -C2 -d -r1.1.1.3.2.12 -r1.1.1.3.2.13
*** g_array.c	6 Oct 2004 19:24:14 -0000	1.1.1.3.2.12
--- g_array.c	7 Oct 2004 17:56:37 -0000	1.1.1.3.2.13
***************
*** 51,55 ****
      x->a_n = 1;
      x->a_elemsize = sizeof(t_word) * template->t_n;
!     x->a_vec = (char *)getbytes(x->a_elemsize);
      	/* note here we blithely copy a gpointer instead of "setting" a
  	new one; this gpointer isn't accounted for and needn't be since
--- 51,56 ----
      x->a_n = 1;
      x->a_elemsize = sizeof(t_word) * template->t_n;
!     /* T.Grill - get aligned memory - good for tabsend~ and tabreceive~ */
!     x->a_vec = (char *)getalignedbytes(x->a_elemsize); 
      	/* note here we blithely copy a gpointer instead of "setting" a
  	new one; this gpointer isn't accounted for and needn't be since
***************
*** 72,77 ****
      elemsize = sizeof(t_word) * template->t_n;
      
!     x->a_vec = (char *)resizebytes(x->a_vec, oldn * elemsize,
!     	n * elemsize);
      x->a_n = n;
      if (n > oldn)
--- 73,77 ----
      elemsize = sizeof(t_word) * template->t_n;
      
!     x->a_vec = (char *)resizealignedbytes(x->a_vec, oldn * elemsize,n * elemsize);
      x->a_n = n;
      if (n > oldn)
***************
*** 101,105 ****
  	word_free(wp, scalartemplate);
      }
!     freebytes(x->a_vec, x->a_elemsize * x->a_n);
      freebytes(x, sizeof *x);
  }
--- 101,106 ----
  	word_free(wp, scalartemplate);
      }
!     /* T.Grill - changed to aligned allocation */
!     freealignedbytes(x->a_vec, x->a_elemsize * x->a_n);
      freebytes(x, sizeof *x);
  }
***************
*** 213,217 ****
      x->x_n = n;
      x->x_elemsize = nwords * sizeof(t_word);
!     x->x_vec = getbytes(x->x_n * x->x_elemsize);
      memset(x->x_vec, 0, x->x_n * x->x_elemsize);
      	/* LATER should check that malloc */
--- 214,219 ----
      x->x_n = n;
      x->x_elemsize = nwords * sizeof(t_word);
!     /* T.Grill - changed to aligned allocation */
!     x->x_vec = getalignedbytes(x->x_n * x->x_elemsize);
      memset(x->x_vec, 0, x->x_n * x->x_elemsize);
      	/* LATER should check that malloc */
***************
*** 330,334 ****
      while (x2 = pd_findbyclass(gensym("#A"), garray_class))
      	pd_unbind(x2, gensym("#A"));
!     freebytes(x->x_vec, x->x_n * x->x_elemsize);
  }
  
--- 332,337 ----
      while (x2 = pd_findbyclass(gensym("#A"), garray_class))
      	pd_unbind(x2, gensym("#A"));
!     /* T.Grill - changed to aligned allocation */
!     freealignedbytes(x->x_vec, x->x_n * x->x_elemsize);
  }
  
***************
*** 1569,1573 ****
      if (n < 1) n = 1;
      elemsize = template_findbyname(x->x_templatesym)->t_n * sizeof(t_word);
!     nvec = t_resizebytes(x->x_vec, was * elemsize, n * elemsize);
      if (!nvec)
      {
--- 1572,1576 ----
      if (n < 1) n = 1;
      elemsize = template_findbyname(x->x_templatesym)->t_n * sizeof(t_word);
!     nvec = resizealignedbytes(x->x_vec, was * elemsize, n * elemsize);
      if (!nvec)
      {

Index: m_simd_sse_vc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.c,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** m_simd_sse_vc.c	6 Oct 2004 19:24:14 -0000	1.1.2.7
--- m_simd_sse_vc.c	7 Oct 2004 17:56:38 -0000	1.1.2.8
***************
*** 9,22 ****
  #if defined(NT) && defined(_MSC_VER) && !(defined DONTUSESIMD)
  
! t_int *zero_perf_sse_vc(t_int *w)
  {
  	__asm {
! 		mov		esi,dword ptr [w]
! 
! 		mov		edx,dword ptr [esi + 1*TYPE t_int] /* out */
! 		/* load zero */
! 		xorps	xmm0,xmm0
  
! 		mov		ecx,[esi + 2*TYPE t_int] /* n */
  		shr		ecx,4
  
--- 9,20 ----
  #if defined(NT) && defined(_MSC_VER) && !(defined DONTUSESIMD)
  
! /* dst is assumed to be aligned */
! void zerovec_sse_vc(t_float *dst,int n)
  {
  	__asm {
! 		mov		edx,dword ptr [dst] /* out */
! 		xorps   xmm0,xmm0 /* zero value */
  
! 		mov		ecx,[n] /* n */
  		shr		ecx,4
  
***************
*** 32,49 ****
  		loop	loopa
  	}
-     return (w+3);
  }
  
! 
! t_int *copy_perf_sse_vc(t_int *w)
  {
  	__asm {
! 		mov		esi,dword ptr [w]
  
! 		mov		ebx,dword ptr [esi + 1*TYPE t_int] /* in1 */
  /*		prefetcht0 [ebx] */
! 		mov		edx,dword ptr [esi + 2*TYPE t_int] /* out */
  
! 		mov		ecx,dword ptr [esi + 3*TYPE t_int] /* n */
  		shr		ecx,4
  
--- 30,70 ----
  		loop	loopa
  	}
  }
  
! /* dst is assumed to be aligned */
! void setvec_sse_vc(t_float *dst,t_float v,int n)
  {
  	__asm {
! 		mov		edx,dword ptr [dst] /* out */
  
!         /* load value ... this is not very clean.. */
! 		movss	xmm0,xmmword ptr [v]
! 		shufps	xmm0,xmm0,0
! 
! 		mov		ecx,[n] /* n */
! 		shr		ecx,4
! 
! 		/* should we do more loop unrolling? */
! loopa:
! 		movaps	xmmword ptr[edx],xmm0
! 		movaps	xmmword ptr[edx+4*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+8*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+12*TYPE t_float],xmm0
! 
! 		add		edx,16*TYPE t_float
! 		/* very short loop - let's assume that branch prediction does its job nicely */
! 		loop	loopa
! 	}
! }
! 
! /* dst and src are assumed to be aligned */
! void copyvec_sse_vc(t_float *dst,const t_float *src,int n)
! {
! 	__asm {
! 		mov		ebx,dword ptr [src] /* in1 */
  /*		prefetcht0 [ebx] */
! 		mov		edx,dword ptr [dst] /* out */
  
! 		mov		ecx,dword ptr [n] /* n */
  		shr		ecx,4
  
***************
*** 69,100 ****
  		loop	loopa 
  	}
-     return (w+4);
  }
  
! 
! t_int *sig_tilde_perf_sse_vc(t_int *w)
  {
  	__asm {
! 		mov		esi,dword ptr [w]
  
! 		mov		edx,dword ptr [esi + 2*TYPE t_int] /* out */
! 		/* load value ... this is not very clean.. */
! 		mov		eax,dword ptr [esi + 1*TYPE t_int] /* f */
! 		movss	xmm0,xmmword ptr [eax]
! 		shufps	xmm0,xmm0,0
  
! 		mov		ecx,dword ptr [esi + 3*TYPE t_int] /* n */
! 		shr		ecx,4
  
! loopa:
! 		movaps	xmmword ptr[edx],xmm0
! 		movaps	xmmword ptr[edx+4*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+8*TYPE t_float],xmm0
! 		movaps	xmmword ptr[edx+12*TYPE t_float],xmm0
  
! 		add		edx,16*TYPE t_float
  		loop	loopa 
  	}
!     return (w+4);
  }
  
--- 90,176 ----
  		loop	loopa 
  	}
  }
  
! void addvec_sse_vc(t_float *dst,const t_float *src,int n)
  {
  	__asm {
! 		mov		eax,dword ptr [src] /* in1 */
! /*      prefetcht0 [eax] prefetch first cache line */	
! 		mov		edx,dword ptr [dst] /* out */
! 		mov		ecx,dword ptr [n] /* n */
! 		shr		ecx,4 /* divide by 16 */
  
!         xor     esi,esi /* reset index */
! /*
! 		prefetcht0 [eax+8*TYPE t_float]
! 		prefetcht0 [ebx+8*TYPE t_float]
! */
! loopa:
! /*
!         prefetcht0 [eax+16*TYPE t_float]
! 		prefetcht0 [ebx+16*TYPE t_float]
! */
!         movaps	xmm0,xmmword ptr[edx+esi]
! 		movaps	xmm1,xmmword ptr[eax+esi]
! 		addps	xmm0,xmm1
! 		movaps	xmmword ptr[edx+esi],xmm0
  
! 		movaps	xmm2,xmmword ptr[edx+esi+4*TYPE t_float]
! 		movaps	xmm3,xmmword ptr[eax+esi+4*TYPE t_float]
! 		addps	xmm2,xmm3
! 		movaps	xmmword ptr[edx+esi+4*TYPE t_float],xmm2
! /*
!         prefetcht0 [eax+24*TYPE t_float]
! 		prefetcht0 [ebx+24*TYPE t_float]
! */
! 		movaps	xmm4,xmmword ptr[edx+esi+8*TYPE t_float]
! 		movaps	xmm5,xmmword ptr[eax+esi+8*TYPE t_float]
! 		addps	xmm4,xmm5
! 		movaps	xmmword ptr[edx+esi+8*TYPE t_float],xmm4
  
! 		movaps	xmm6,xmmword ptr[edx+esi+12*TYPE t_float]
! 		movaps	xmm7,xmmword ptr[eax+esi+12*TYPE t_float]
! 		addps	xmm6,xmm7
! 		movaps	xmmword ptr[edx+esi+12*TYPE t_float],xmm6
  
!         add     esi,16*TYPE t_float
  		loop	loopa 
  	}
! }
! 
! void testcopyvec(t_float *dst,const t_float *src,int n)
! {
!     while(n--) {
!         *dst = (PD_BIGORSMALL(*src) ? 0 : *src);
! 	    dst++;
! 	    src++;
! 	}
! }
! 
! void testaddvec(t_float *dst,const t_float *src,int n)
! {
!     while(n--) {
!         *dst += (PD_BIGORSMALL(*src) ? 0 : *src);
! 	    dst++;
! 	    src++;
! 	}
! }
! 
! t_int *zero_perf_sse_vc(t_int *w)
! {
!     zerovec_sse_vc((t_float *)w[1],w[2]);
!     return w+3;
! }
! 
! t_int *copy_perf_sse_vc(t_int *w)
! {
!     copyvec_sse_vc((t_float *)w[2],(const t_float *)w[1],w[3]);
!     return w+4;
! }
! 
! t_int *sig_tilde_perf_sse_vc(t_int *w)
! {
!     setvec_sse_vc((t_float *)w[2],*(const t_float *)w[1],w[3]);
!     return w+4;
  }
  

Index: g_io.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/g_io.c,v
retrieving revision 1.1.1.1.16.5
retrieving revision 1.1.1.1.16.6
diff -C2 -d -r1.1.1.1.16.5 -r1.1.1.1.16.6
*** g_io.c	6 Oct 2004 19:24:14 -0000	1.1.1.1.16.5
--- g_io.c	7 Oct 2004 17:56:38 -0000	1.1.1.1.16.6
***************
*** 24,27 ****
--- 24,28 ----
  
  #include "m_pd.h"
+ #include "m_simd.h"
  #include "g_canvas.h"
  #include <string.h>
***************
*** 147,150 ****
--- 148,162 ----
  }
  
+ /* T.Grill: SIMD version */
+ t_int *vinlet_perfsimd(t_int *w)
+ {
+     t_vinlet *x = (t_vinlet *)(w[1]);
+     t_float *in = x->x_read;
+     copyvec((t_float *)w[2],in,w[3]);
+     if (in == x->x_endbuf) in = x->x_buf;
+     x->x_read = in;
+     return (w+4);
+ }
+ 
  static void vinlet_dsp(t_vinlet *x, t_signal **sp)
  {
***************
*** 160,168 ****
      else
      {
! 	if (sp[0]->s_n & 7)
! 	    dsp_add(vinlet_perform, 3, x, outsig->s_vec, outsig->s_n);
! 	else
! 	    dsp_add(vinlet_perf8, 3, x, outsig->s_vec, outsig->s_n);
! 	x->x_read = x->x_buf;
      }
  }
--- 172,184 ----
      else
      {
!         const int n = outsig->s_n;
! 	    if(n&7)
! 	        dsp_add(vinlet_perform, 3, x, outsig->s_vec,n);
! 	    else if(SIMD_CHECK1(n,outsig->s_vec))
!             /* if the outsig->s_vec is aligned the x->x_read will also be... */
! 	        dsp_add(vinlet_perfsimd, 3, x, outsig->s_vec,n);
! 	    else
! 	        dsp_add(vinlet_perf8, 3, x, outsig->s_vec,n);
! 	    x->x_read = x->x_buf;
      }
  }
***************
*** 234,239 ****
  	{
      	    t_float *buf = x->x_buf;
!     	    t_freebytes(buf, oldbufsize * sizeof(*buf));
!     	    buf = (t_float *)t_getbytes(bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
--- 250,254 ----
  	{
      	    t_float *buf = x->x_buf;
!     	    buf = (t_float *)resizealignedbytes(buf,oldbufsize * sizeof(*buf),bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
***************
*** 279,283 ****
      x->x_canvas = canvas_getcurrent();
      x->x_inlet = canvas_addinlet(x->x_canvas, &x->x_obj.ob_pd, &s_signal,s);
!     x->x_endbuf = x->x_buf = (t_float *)getbytes(0);
      x->x_bufsize = 0;
      x->x_directsignal = 0;
--- 294,298 ----
      x->x_canvas = canvas_getcurrent();
      x->x_inlet = canvas_addinlet(x->x_canvas, &x->x_obj.ob_pd, &s_signal,s);
!     x->x_endbuf = x->x_buf = (t_float *)getalignedbytes(0);
      x->x_bufsize = 0;
      x->x_directsignal = 0;
***************
*** 410,420 ****
      if (tot < 5) post("-buf %x endbuf %x", x->x_buf, x->x_endbuf);
  #endif
      while (n--)
      {
      	*out++ += *in++;
!     	if (out == x->x_endbuf) out = x->x_buf;
      }
      outwas += x->x_hop;
!     if (outwas >= x->x_endbuf) outwas = x->x_buf;
      x->x_write = outwas;
      return (w+4);
--- 425,436 ----
      if (tot < 5) post("-buf %x endbuf %x", x->x_buf, x->x_endbuf);
  #endif
+     t_float *end = x->x_endbuf;
      while (n--)
      {
      	*out++ += *in++;
!     	if (out == end) out = x->x_buf;
      }
      outwas += x->x_hop;
!     if (outwas >= end) outwas = x->x_buf;
      x->x_write = outwas;
      return (w+4);
***************
*** 539,544 ****
  	{
      	    t_float *buf = x->x_buf;
!     	    t_freebytes(buf, oldbufsize * sizeof(*buf));
!     	    buf = (t_float *)t_getbytes(bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
--- 555,559 ----
  	{
      	    t_float *buf = x->x_buf;
!     	    buf = (t_float *)resizealignedbytes(buf,oldbufsize * sizeof(*buf),bufsize * sizeof(*buf));
      	    memset((char *)buf, 0, bufsize * sizeof(*buf));
      	    x->x_bufsize = bufsize;
***************
*** 591,595 ****
      	&x->x_obj.ob_pd, &s_signal);
      inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_signal, &s_signal);
!     x->x_endbuf = x->x_buf = (t_float *)getbytes(0);
      x->x_bufsize = 0;
  
--- 606,610 ----
      	&x->x_obj.ob_pd, &s_signal);
      inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_signal, &s_signal);
!     x->x_endbuf = x->x_buf = (t_float *)getalignedbytes(0);
      x->x_bufsize = 0;
  

Index: m_memory.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/m_memory.c,v
retrieving revision 1.1.1.2.2.2
retrieving revision 1.1.1.2.2.3
diff -C2 -d -r1.1.1.2.2.2 -r1.1.1.2.2.3
*** m_memory.c	6 Oct 2004 19:24:14 -0000	1.1.1.2.2.2
--- m_memory.c	7 Oct 2004 17:56:38 -0000	1.1.1.2.2.3
***************
*** 8,11 ****
--- 8,21 ----
  #include "m_imp.h"
  
+ /* T.Grill - include SIMD functionality */
+ #include "m_simd.h"
+ /* T.Grill - bit alignment for signal vectors (must be a multiple of 8!) */
+ /* if undefined no alignment occurs */
+ #ifdef SIMD_BYTEALIGN
+     #define VECTORALIGNMENT (SIMD_BYTEALIGN*8)
+ #else
+     #define VECTORALIGNMENT 128
+ #endif
+ 
  /* #define LOUD */
  #ifdef LOUD
***************
*** 80,83 ****
--- 90,124 ----
  }
  
+ /* T.Grill - get aligned memory */
+ void *getalignedbytes(size_t nbytes)
+ {
+ 	/* to align the region we also need some extra memory to save the original pointer location
+ 		it is saved immediately before the aligned vector memory
+ 	*/
+    	void *vec = getbytes(nbytes+ (VECTORALIGNMENT/8-1)+sizeof(void *));
+ 	int alignment = ((unsigned long)vec+sizeof(void *))&(VECTORALIGNMENT/8-1);  /* get alignment of first possible signal vector byte */
+ 	void *ret = (unsigned char *)vec+sizeof(void *)+(alignment == 0?0:VECTORALIGNMENT/8-alignment); /* calculate aligned pointer */
+ 	*(void **)((unsigned char *)ret-sizeof(void *)) = vec; /* save original memory location */
+     return ret;
+ }
+ 
+ /* T.Grill - free aligned vector memory */
+ void freealignedbytes(void *ptr,size_t nbytes)
+ {
+ 	void *ori = *(void **)((unsigned char *)ptr-sizeof(void *)); /* get original memory location */
+ 	freebytes(ori,nbytes+(VECTORALIGNMENT/8-1)+sizeof(void *));
+ }
+ 
+ /* T.Grill - resize aligned vector memory */
+ void *resizealignedbytes(void *ptr,size_t oldsize, size_t newsize)
+ {
+ 	void *ori = *(void **)((unsigned char *)ptr-sizeof(void *)); /* get original memory location */
+     void *vec = resizebytes(ori,oldsize+(VECTORALIGNMENT/8-1)+sizeof(void *),newsize+ (VECTORALIGNMENT/8-1)+sizeof(void *));
+ 	int alignment = ((unsigned long)vec+sizeof(void *))&(VECTORALIGNMENT/8-1);  /* get alignment of first possible signal vector byte */
+ 	void *ret = (unsigned char *)vec+sizeof(void *)+(alignment == 0?0:VECTORALIGNMENT/8-alignment); /* calculate aligned pointer */
+ 	*(void **)((unsigned char *)ret-sizeof(void *)) = vec; /* save original memory location */
+     return ret;
+ }
+ 
  #ifdef DEBUGMEM
  #include <stdio.h>

Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -C2 -d -r1.1.2.8 -r1.1.2.9
*** m_simd_sse_gcc.h	6 Oct 2004 19:24:14 -0000	1.1.2.8
--- m_simd_sse_gcc.h	7 Oct 2004 17:56:38 -0000	1.1.2.9
***************
*** 10,13 ****
--- 10,20 ----
  
  /* SIMD functions for SSE with GCC */
+ //void zerovec_sse_gcc(t_float *dst,int n);
+ //void setvec_sse_gcc(t_float *dst,t_float v,int n);
+ //void copyvec_sse_gcc(t_float *dst,const t_float *src,int n);
+ //void addvec_sse_gcc(t_float *dst,const t_float *src,int n);
+ //void testcopyvec_sse_gcc(t_float *dst,const t_float *src,int n);
+ //void testaddvec_sse_gcc(t_float *dst,const t_float *src,int n);
+ 
  t_int *zero_perf_sse_gcc(t_int *w);
  t_int *copy_perf_sse_gcc(t_int *w);
***************
*** 32,35 ****
--- 39,49 ----
  
  
+ #define zerovec                 zerovec_8 /* SIMD not implemented */
+ #define setvec                  setvec_8 /* SIMD not implemented */
+ #define copyvec                 copyvec_8 /* SIMD not implemented */
+ #define addvec                  addvec_8 /* SIMD not implemented */
+ #define testcopyvec             testcopyvec_8 /* SIMD not implemented */
+ #define testaddvec              testaddvec_8 /* SIMD not implemented */
+ 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf_sse_gcc

Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** m_simd_sse_vc.h	6 Oct 2004 19:24:14 -0000	1.1.2.7
--- m_simd_sse_vc.h	7 Oct 2004 17:56:38 -0000	1.1.2.8
***************
*** 10,13 ****
--- 10,20 ----
  
  /* SIMD functions for SSE with VC++ */
+ void zerovec_sse_vc(t_float *dst,int n);
+ void setvec_sse_vc(t_float *dst,t_float v,int n);
+ void copyvec_sse_vc(t_float *dst,const t_float *src,int n);
+ void addvec_sse_vc(t_float *dst,const t_float *src,int n);
+ void testcopyvec_sse_vc(t_float *dst,const t_float *src,int n);
+ void testaddvec_sse_vc(t_float *dst,const t_float *src,int n);
+ 
  t_int *zero_perf_sse_vc(t_int *w);
  t_int *copy_perf_sse_vc(t_int *w);
***************
*** 32,35 ****
--- 39,49 ----
  
  
+ #define zerovec                 zerovec_sse_vc
+ #define setvec                  setvec_sse_vc
+ #define copyvec                 copyvec_sse_vc
+ #define addvec                  addvec_sse_vc
+ #define testcopyvec             testcopyvec_sse_vc
+ #define testaddvec              testaddvec_sse_vc
+ 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf_sse_vc

Index: m_simd_def.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_def.h,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** m_simd_def.h	6 Oct 2004 19:24:14 -0000	1.1.2.6
--- m_simd_def.h	7 Oct 2004 17:56:38 -0000	1.1.2.7
***************
*** 12,15 ****
--- 12,23 ----
  /* These are the functions that can be coded for SIMD */
  
+ /* functions in m_simd_def.c */
+ #define zerovec                 zerovec_8
+ #define setvec                  setvec_8
+ #define copyvec                 copyvec_8
+ #define addvec                  addvec_8
+ #define testcopyvec             testcopyvec_8
+ #define testaddvec              testaddvec_8
+ 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf8

Index: d_ugen.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ugen.c,v
retrieving revision 1.1.1.2.2.8
retrieving revision 1.1.1.2.2.9
diff -C2 -d -r1.1.1.2.2.8 -r1.1.1.2.2.9
*** d_ugen.c	6 Oct 2004 19:24:14 -0000	1.1.1.2.2.8
--- d_ugen.c	7 Oct 2004 17:56:37 -0000	1.1.1.2.2.9
***************
*** 31,43 ****
  #include "m_simd.h"
  
- /* T.Grill - bit alignment for signal vectors (must be a multiple of 8!) */
- /* if undefined no alignment occurs */
- #ifdef SIMD_BYTEALIGN
-     #define VECTORALIGNMENT (SIMD_BYTEALIGN*8)
- #else
-     #define VECTORALIGNMENT 128
- #endif
- 
- 
  extern t_class *vinlet_class, *voutlet_class, *canvas_class;
  t_sample *obj_findsignalscalar(t_object *x, int m);
--- 31,34 ----
***************
*** 68,82 ****
      t_float *out = (t_float *)(w[1]);
      int n = (int)(w[2]);
! 
      for (; n; n -= 8, out += 8)
      {
!     	out[0] = 0;
!     	out[1] = 0;
!     	out[2] = 0;
!     	out[3] = 0;
!     	out[4] = 0;
!     	out[5] = 0;
!     	out[6] = 0;
!     	out[7] = 0;
      }
      return (w+3);
--- 59,73 ----
      t_float *out = (t_float *)(w[1]);
      int n = (int)(w[2]);
!  
      for (; n; n -= 8, out += 8)
      {
!         out[0] = 0;
!         out[1] = 0;
!         out[2] = 0;
!         out[3] = 0;
!         out[4] = 0;
!         out[5] = 0;
!         out[6] = 0;
!         out[7] = 0;
      }
      return (w+3);
***************
*** 354,360 ****
  			t_freebytes(sig->s_vec, sig->s_n * sizeof (*sig->s_vec));
  #else
! 			/* T.Grill - free aligned vector memory */
! 			t_sample *ori = *(t_sample **)((unsigned char *)sig->s_vec-sizeof(t_sample *)); /* get original memory location */
! 			t_freebytes(ori, sig->s_n * sizeof (*ori)+(VECTORALIGNMENT/8-1)+sizeof(t_sample *));
  #endif
  		}
--- 345,349 ----
  			t_freebytes(sig->s_vec, sig->s_n * sizeof (*sig->s_vec));
  #else
!             freealignedbytes(sig->s_vec, sig->s_n * sizeof (*sig->s_vec));
  #endif
  		}
***************
*** 441,466 ****
      	    /* LATER figure out what to do for out-of-space here! */
      	ret = (t_signal *)t_getbytes(sizeof *ret);
! 	if (n)
! 	{
  #ifndef VECTORALIGNMENT
!    	    ret->s_vec = (t_sample *)getbytes(n * sizeof (*ret->s_vec));
  #else
! 		/* T.Grill - make signal vectors aligned! */
! 
! 		/* to align the signal vector we also need some extra memory to save the original pointer location
! 			it is saved immediately before the aligned vector memory
! 		*/
!    	    t_sample *vec = (t_sample *)getbytes(n * sizeof (*vec)+ (VECTORALIGNMENT/8-1)+sizeof(t_sample *));
! 		int alignment = ((unsigned long)vec+sizeof(t_sample *))&(VECTORALIGNMENT/8-1);  /* get alignment of first possible signal vector byte */
! 		ret->s_vec = (t_sample *)((unsigned char *)vec+sizeof(t_sample *)+(alignment == 0?0:VECTORALIGNMENT/8-alignment)); /* calculate aligned pointer */
! 		*(t_sample **)((unsigned char *)ret->s_vec-sizeof(t_sample *)) = vec; /* save original memory location */
  #endif
! 	    ret->s_isborrowed = 0;
!     	}
! 	else
! 	{
! 	    ret->s_vec = 0;
! 	    ret->s_isborrowed = 1;
! 	}
      	ret->s_nextused = signal_usedlist;
      	signal_usedlist = ret;
--- 430,448 ----
      	    /* LATER figure out what to do for out-of-space here! */
      	ret = (t_signal *)t_getbytes(sizeof *ret);
! 	    if (n)
! 	    {
  #ifndef VECTORALIGNMENT
!        	    ret->s_vec = (t_sample *)getbytes(n * sizeof (*ret->s_vec));
  #else
! 		    /* T.Grill - make signal vectors aligned! */
!             ret->s_vec = (t_sample *)getalignedbytes(n * sizeof (*ret->s_vec));
  #endif
! 	        ret->s_isborrowed = 0;
!     	    }
! 	    else
! 	    {
! 	        ret->s_vec = 0;
! 	        ret->s_isborrowed = 1;
! 	    }
      	ret->s_nextused = signal_usedlist;
      	signal_usedlist = ret;

Index: m_simd_ve_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.h,v
retrieving revision 1.1.2.7
retrieving revision 1.1.2.8
diff -C2 -d -r1.1.2.7 -r1.1.2.8
*** m_simd_ve_gcc.h	6 Oct 2004 19:24:14 -0000	1.1.2.7
--- m_simd_ve_gcc.h	7 Oct 2004 17:56:38 -0000	1.1.2.8
***************
*** 10,13 ****
--- 10,18 ----
  
  /* SIMD functions for VE with GCC */
+ void zerovec_ve_gcc(t_float *dst,int n);
+ void setvec_ve_gcc(t_float *dst,t_float v,int n);
+ void copyvec_ve_gcc(t_float *dst,const t_float *src,int n);
+ void addvec_ve_gcc(t_float *dst,const t_float *src,int n);
+ 
  t_int *zero_perf_ve_gcc(t_int *w);
  t_int *copy_perf_ve_gcc(t_int *w);
***************
*** 32,35 ****
--- 37,48 ----
  
  
+ #define zerovec                 zerovec_ve_gcc
+ #define setvec                  setvec_ve_gcc
+ #define copyvec                 copyvec_ve_gcc
+ #define addvec                  addvec_ve_gcc
+ /* no bad float testing for PPC! */
+ #define testcopyvec             copyvec_ve_gcc
+ #define testaddvec              addvec_ve_gcc
+ 
  /* functions in d_ugen.c */
  #define zero_perf_simd          zero_perf_ve_gcc

Index: d_global.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_global.c,v
retrieving revision 1.1.1.2.8.7
retrieving revision 1.1.1.2.8.8
diff -C2 -d -r1.1.1.2.8.7 -r1.1.1.2.8.8
*** d_global.c	6 Oct 2004 19:24:14 -0000	1.1.1.2.8.7
--- d_global.c	7 Oct 2004 17:56:37 -0000	1.1.1.2.8.8
***************
*** 6,9 ****
--- 6,10 ----
  
  #include "m_pd.h"
+ #include "m_simd.h"
  #include <string.h>
  
***************
*** 28,32 ****
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      x->x_f = 0;
--- 29,33 ----
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getalignedbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      x->x_f = 0;
***************
*** 41,56 ****
      while (n--)
      {
! 	*out = (PD_BIGORSMALL(*in) ? 0 : *in);
! 	out++;
! 	in++;
      }
      return (w+4);
  }
  
  static void sigsend_dsp(t_sigsend *x, t_signal **sp)
  {
!     if (x->x_n == sp[0]->s_n)
!     	dsp_add(sigsend_perform, 3, sp[0]->s_vec, x->x_vec, sp[0]->s_n);
!     else error("sigsend %s: unexpected vector size", x->x_sym->s_name);
  }
  
--- 42,70 ----
      while (n--)
      {
! 	    *out = (PD_BIGORSMALL(*in) ? 0 : *in);
! 	    out++;
! 	    in++;
      }
      return (w+4);
  }
  
+ /* T.Grill - SIMD version */
+ static t_int *sigsend_perfsimd(t_int *w)
+ {
+     testcopyvec((t_float *)w[2],(t_float *)w[1],w[3]);
+     return (w+4);
+ }
+ 
  static void sigsend_dsp(t_sigsend *x, t_signal **sp)
  {
!     const int n = x->x_n;
!     if(n == sp[0]->s_n) {
!         if(SIMD_CHECK1(n,sp[0]->s_vec)) /* x->x_vec is aligned in any case */
!     	    dsp_add(sigsend_perfsimd, 3, sp[0]->s_vec, x->x_vec, n);
!         else
!     	    dsp_add(sigsend_perform, 3, sp[0]->s_vec, x->x_vec, n);
!     }
!     else 
!         error("sigsend %s: unexpected vector size", x->x_sym->s_name);
  }
  
***************
*** 58,62 ****
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freebytes(x->x_vec, x->x_n * sizeof(float));
  }
  
--- 72,76 ----
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freealignedbytes(x->x_vec,x->x_n* sizeof(float));
  }
  
***************
*** 114,136 ****
  {
      t_sigreceive *x = (t_sigreceive *)(w[1]);
-     t_float *out = (t_float *)(w[2]);
-     int n = (int)(w[3]);
      t_float *in = x->x_wherefrom;
!     if (in)
!     {
! 	for (; n; n -= 8, in += 8, out += 8)
! 	{
! 	    out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; 
! 	    out[4] = in[4]; out[5] = in[5]; out[6] = in[6]; out[7] = in[7]; 
! 	}
!     }
!     else
!     {
! 	for (; n; n -= 8, in += 8, out += 8)
! 	{
! 	    out[0] = 0; out[1] = 0; out[2] = 0; out[3] = 0; 
! 	    out[4] = 0; out[5] = 0; out[6] = 0; out[7] = 0; 
! 	}
!     }
      return (w+4);
  }
--- 128,148 ----
  {
      t_sigreceive *x = (t_sigreceive *)(w[1]);
      t_float *in = x->x_wherefrom;
!     if(in) 
!         copyvec_8((t_float *)w[2],in,w[3]);
!     else 
!         zerovec_8((t_float *)w[2],w[3]);
!     return (w+4);
! }
! 
! /* T.Grill - SIMD version */
! static t_int *sigreceive_perfsimd(t_int *w)
! {
!     t_sigreceive *x = (t_sigreceive *)(w[1]);
!     t_float *in = x->x_wherefrom;
!     if(in) 
!         copyvec((t_float *)w[2],in,w[3]);
!     else 
!         zerovec((t_float *)w[2],w[3]);
      return (w+4);
  }
***************
*** 144,152 ****
      	if (sender->x_n == x->x_n)
      	    x->x_wherefrom = sender->x_vec;
! 	else
! 	{
! 	    pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
! 	    x->x_wherefrom = 0;
! 	}
      }
      else
--- 156,164 ----
      	if (sender->x_n == x->x_n)
      	    x->x_wherefrom = sender->x_vec;
! 	    else
! 	    {
! 	        pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
! 	        x->x_wherefrom = 0;
! 	    }
      }
      else
***************
*** 159,163 ****
  static void sigreceive_dsp(t_sigreceive *x, t_signal **sp)
  {
!     if (sp[0]->s_n != x->x_n)
      {
      	pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
--- 171,176 ----
  static void sigreceive_dsp(t_sigreceive *x, t_signal **sp)
  {
!     const int n = x->x_n;
!     if (sp[0]->s_n != n)
      {
      	pd_error(x, "receive~ %s: vector size mismatch", x->x_sym->s_name);
***************
*** 166,175 ****
      {
      	sigreceive_set(x, x->x_sym);
! 	if(sp[0]->s_n&7)
! 	    dsp_add(sigreceive_perform, 3,
! 		    x, sp[0]->s_vec, sp[0]->s_n);
! 	else
! 	    dsp_add(sigreceive_perf8, 3,
! 		    x, sp[0]->s_vec, sp[0]->s_n);
      }
  }
--- 179,189 ----
      {
      	sigreceive_set(x, x->x_sym);
! 	    if(n&7)
! 	        dsp_add(sigreceive_perform, 3, x, sp[0]->s_vec, n);
!         else if(SIMD_CHECK1(n,sp[0]->s_vec))
!             /* x->x_wherefrom is aligned because we aligned the sender memory buffer */
! 	        dsp_add(sigreceive_perfsimd, 3, x, sp[0]->s_vec, n);
! 	    else
! 	        dsp_add(sigreceive_perf8, 3, x, sp[0]->s_vec, n);
      }
  }
***************
*** 205,209 ****
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      outlet_new(&x->x_obj, &s_signal);
--- 219,223 ----
      x->x_sym = s;
      x->x_n = DEFSENDVS;
!     x->x_vec = (float *)getalignedbytes(DEFSENDVS * sizeof(float));
      memset((char *)(x->x_vec), 0, DEFSENDVS * sizeof(float));
      outlet_new(&x->x_obj, &s_signal);
***************
*** 223,237 ****
  static t_int *sigcatch_perf8(t_int *w)
  {
!     t_float *in = (t_float *)(w[1]);
!     t_float *out = (t_float *)(w[2]);
!     int n = (int)(w[3]);
!     for (; n; n -= 8, in += 8, out += 8)
!     {
! 	out[0] = in[0]; out[1] = in[1]; out[2] = in[2]; out[3] = in[3]; 
! 	out[4] = in[4]; out[5] = in[5]; out[6] = in[6]; out[7] = in[7]; 
!     
! 	in[0] = 0; in[1] = 0; in[2] = 0; in[3] = 0; 
! 	in[4] = 0; in[5] = 0; in[6] = 0; in[7] = 0; 
!     }
      return (w+4);
  }
--- 237,250 ----
  static t_int *sigcatch_perf8(t_int *w)
  {
!     copyvec_8((t_float *)w[2],(t_float *)w[1],w[3]);
!     zerovec_8((t_float *)w[1],w[3]);
!     return (w+4);
! }
! 
! /* T.Grill: SIMD catch function */
! static t_int *sigcatch_perfsimd(t_int *w)
! {
!     copyvec((t_float *)w[2],(t_float *)w[1],w[3]);
!     zerovec((t_float *)w[1],w[3]);
      return (w+4);
  }
***************
*** 239,248 ****
  static void sigcatch_dsp(t_sigcatch *x, t_signal **sp)
  {
!     if (x->x_n == sp[0]->s_n)
      {
!     	if(sp[0]->s_n&7)
! 	dsp_add(sigcatch_perform, 3, x->x_vec, sp[0]->s_vec, sp[0]->s_n);
! 	else
! 	dsp_add(sigcatch_perf8, 3, x->x_vec, sp[0]->s_vec, sp[0]->s_n);
      }
      else error("sigcatch %s: unexpected vector size", x->x_sym->s_name);
--- 252,264 ----
  static void sigcatch_dsp(t_sigcatch *x, t_signal **sp)
  {
!     const int n = sp[0]->s_n;
!     if (x->x_n == n)
      {
!     	if(n&7)
!     	    dsp_add(sigcatch_perform, 3, x->x_vec, sp[0]->s_vec, n);
!     	else if(SIMD_CHECK2(n,x->x_vec,sp[0]->s_vec))
!     	    dsp_add(sigcatch_perfsimd, 3, x->x_vec, sp[0]->s_vec, n);
! 	    else
! 	        dsp_add(sigcatch_perf8, 3, x->x_vec, sp[0]->s_vec, n);
      }
      else error("sigcatch %s: unexpected vector size", x->x_sym->s_name);
***************
*** 252,256 ****
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freebytes(x->x_vec, x->x_n * sizeof(float));
  }
  
--- 268,272 ----
  {
      pd_unbind(&x->x_obj.ob_pd, x->x_sym);
!     freealignedbytes(x->x_vec,x->x_n* sizeof(float));
  }
  
***************
*** 294,306 ****
      {
      	while (n--)
! 	{
!     	    *out += (PD_BIGORSMALL(*in) ? 0 : *in);
! 	    out++;
! 	    in++;
! 	}
      }
      return (w+4);
  }
  
  static void sigthrow_set(t_sigthrow *x, t_symbol *s)
  {
--- 310,331 ----
      {
      	while (n--)
! 	    {
!             *out += (PD_BIGORSMALL(*in) ? 0 : *in);
! 	        out++;
! 	        in++;
! 	    }
      }
      return (w+4);
  }
  
+ /* T.Grill - SIMD version */
+ static t_int *sigthrow_perfsimd(t_int *w)
+ {
+     t_sigthrow *x = (t_sigthrow *)(w[1]);
+     t_float *out = x->x_whereto;
+     if(out) testaddvec(out,(t_float *)w[2],w[3]);
+     return (w+4);
+ }
+ 
  static void sigthrow_set(t_sigthrow *x, t_symbol *s)
  {
***************
*** 326,330 ****
  static void sigthrow_dsp(t_sigthrow *x, t_signal **sp)
  {
!     if (sp[0]->s_n != x->x_n)
      {
      	pd_error(x, "throw~ %s: vector size mismatch", x->x_sym->s_name);
--- 351,356 ----
  static void sigthrow_dsp(t_sigthrow *x, t_signal **sp)
  {
!     const int n = x->x_n;
!     if (sp[0]->s_n != n)
      {
      	pd_error(x, "throw~ %s: vector size mismatch", x->x_sym->s_name);
***************
*** 333,338 ****
      {
      	sigthrow_set(x, x->x_sym);
!     	dsp_add(sigthrow_perform, 3,
!     	    x, sp[0]->s_vec, sp[0]->s_n);
      }
  }
--- 359,367 ----
      {
      	sigthrow_set(x, x->x_sym);
!         if(SIMD_CHECK1(n,sp[0]->s_vec))
!             /* the memory of the catcher is aligned in any case */
!     	    dsp_add(sigthrow_perfsimd, 3, x, sp[0]->s_vec, n);
!         else
!     	    dsp_add(sigthrow_perform, 3, x, sp[0]->s_vec, n);
      }
  }

Index: d_array.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_array.c,v
retrieving revision 1.1.1.3.2.4
retrieving revision 1.1.1.3.2.5
diff -C2 -d -r1.1.1.3.2.4 -r1.1.1.3.2.5
*** d_array.c	6 Oct 2004 19:24:14 -0000	1.1.1.3.2.4
--- d_array.c	7 Oct 2004 17:56:37 -0000	1.1.1.3.2.5
***************
*** 8,12 ****
  
  #include "m_pd.h"
! 
  
  /* ------------------------- tabwrite~ -------------------------- */
--- 8,12 ----
  
  #include "m_pd.h"
! #include "m_simd.h"
  
  /* ------------------------- tabwrite~ -------------------------- */
***************
*** 50,61 ****
      	if (nxfer > n) nxfer = n;
      	phase += nxfer;
!     	while (nxfer--)
! 	{
! 	    float f = *in++;
!     	    if (PD_BIGORSMALL(f))
! 	    	f = 0;
! 	    *fp++ = f;
      	}
! 	if (phase >= endphase)
      	{
      	    clock_delay(x->x_clock, 0);
--- 50,62 ----
      	if (nxfer > n) nxfer = n;
      	phase += nxfer;
!     	while (nxfer--) 
!         {
!             float f = *in++;
!             if (PD_BIGORSMALL(f)) 
!                 f = 0;
!             *fp++ = f;
      	}
! 
!     	if (phase >= endphase)
      	{
      	    clock_delay(x->x_clock, 0);
***************
*** 186,191 ****
      	clock_delay(x->x_clock, 0);
      	x->x_phase = 0x7fffffff;
! 	while (n3--)
! 	    *out++ = 0;
      }
      else x->x_phase = phase;
--- 187,192 ----
      	clock_delay(x->x_clock, 0);
      	x->x_phase = 0x7fffffff;
! 	    while (n3--)
! 	        *out++ = 0;
      }
      else x->x_phase = phase;
***************
*** 732,742 ****
      if (!x->x_vec) goto bad;
  
!     while (n--)
!     {	
!     	float f = *in++;
!     	if (PD_BIGORSMALL(f))
! 	    f = 0;
! 	 *dest++ = f;
!     }
      if (!i--)
      {
--- 733,749 ----
      if (!x->x_vec) goto bad;
  
!     if(n&7)
!         while(n--) 
!         {	
!     	    float f = *in++;
!     	    if (PD_BIGORSMALL(f)) 
!                 f = 0;
! 	        *dest++ = f;
!         }
!     else if(SIMD_CHECK2(n,in,dest))
!         testcopyvec(dest,in,n);
!     else
!         testcopyvec_8(dest,in,n);
! 
      if (!i--)
      {
***************
*** 816,819 ****
--- 823,848 ----
  }
  
+ static t_int *tabreceive_perf8(t_int *w)
+ {
+     t_tabreceive *x = (t_tabreceive *)(w[1]);
+     t_float *from = x->x_vec;
+     if (from) 
+         copyvec_8((t_float *)(w[2]),from,w[3]);
+     else 
+         zerovec_8((t_float *)(w[2]),w[3]);
+     return (w+4);
+ }
+ 
+ static t_int *tabreceive_perfsimd(t_int *w)
+ {
+     t_tabreceive *x = (t_tabreceive *)(w[1]);
+     t_float *from = x->x_vec;
+     if(from) 
+         copyvec((t_float *)(w[2]),from,w[3]);
+     else 
+         zerovec((t_float *)(w[2]),w[3]);
+     return (w+4);
+ }
+ 
  static void tabreceive_dsp(t_tabreceive *x, t_signal **sp)
  {
***************
*** 833,837 ****
      	if (n < vecsize) vecsize = n;
      	garray_usedindsp(a);
!     	dsp_add(tabreceive_perform, 3, x, sp[0]->s_vec, vecsize);
      }
  }
--- 862,872 ----
      	if (n < vecsize) vecsize = n;
      	garray_usedindsp(a);
!         if(vecsize&7)
!     	    dsp_add(tabreceive_perform, 3, x, sp[0]->s_vec, vecsize);
!         else if(SIMD_CHECK1(vecsize,sp[0]->s_vec))
!             /* the array is aligned in any case */
!     	    dsp_add(tabreceive_perfsimd, 3, x, sp[0]->s_vec, vecsize);
!         else
!     	    dsp_add(tabreceive_perf8, 3, x, sp[0]->s_vec, vecsize);
      }
  }





More information about the Pd-cvs mailing list