[PD-cvs] pd/src d_arithmetic.c,1.2,1.2.4.1 d_array.c,1.3,1.3.4.1 d_ctl.c,1.3,1.3.4.1 d_dac.c,1.3,1.3.4.1

Fri Nov 5 14:39:57 CET 2004

Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21900

Modified Files:
      Tag: devel_0_38
	d_arithmetic.c d_array.c d_ctl.c d_dac.c 
Log Message:
simd stuff

Index: d_arithmetic.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_arithmetic.c,v
retrieving revision 1.2
retrieving revision 1.2.4.1
diff -C2 -d -r1.2 -r1.2.4.1
*** d_arithmetic.c	6 Sep 2004 20:20:33 -0000	1.2
--- d_arithmetic.c	5 Nov 2004 13:39:55 -0000	1.2.4.1
***************
*** 11,14 ****
--- 11,16 ----
  #include "m_pd.h"
  
+ #include "m_simd.h"
+ 
  /* ----------------------------- plus ----------------------------- */
  static t_class *plus_class, *scalarplus_class;
***************
*** 110,113 ****
--- 112,117 ----
      if (n&7)
          dsp_add(plus_perform, 4, in1, in2, out, n);
+     else if(SIMD_CHECK3(n,in1,in2,out))
+     	dsp_add(plus_perf_simd, 4, in1, in2, out, n);
      else        
          dsp_add(plus_perf8, 4, in1, in2, out, n);
***************
*** 121,130 ****
  static void scalarplus_dsp(t_scalarplus *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(scalarplus_perform, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(scalarplus_perf8, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
  }
  
--- 125,135 ----
  static void scalarplus_dsp(t_scalarplus *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if(n&7)
!     	dsp_add(scalarplus_perform, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
! 	else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
!     	dsp_add(scalarplus_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
      else        
!     	dsp_add(scalarplus_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
  }
  
***************
*** 241,260 ****
  static void minus_dsp(t_minus *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(minus_perform, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(minus_perf8, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
  }
  
  static void scalarminus_dsp(t_scalarminus *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(scalarminus_perform, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(scalarminus_perf8, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
  }
  
--- 246,267 ----
  static void minus_dsp(t_minus *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if(n&7)
!     	dsp_add(minus_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! 	else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
!     	dsp_add(minus_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
      else        
!     	dsp_add(minus_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
  }
  
  static void scalarminus_dsp(t_scalarminus *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if(n&7)
!     	dsp_add(scalarminus_perform, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
! 	else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
!     	dsp_add(scalarminus_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
      else        
!     	dsp_add(scalarminus_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
  }
  
***************
*** 343,346 ****
--- 350,371 ----
  }
  
+ /* T.Grill - squaring: optimized * for equal input signals */
+ static t_int *sqr_perf8(t_int *w)
+ {
+     t_float *in = (t_float *)(w[1]);
+     t_float *out = (t_float *)(w[2]);
+     int n = (int)(w[3]);
+ 
+     for (; n; n -= 8, in += 8, out += 8)
+     {
+     	float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
+     	float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
+ 
+     	out[0] = f0 * f0; out[1] = f1 * f1; out[2] = f2 * f2; out[3] = f3 * f3;
+     	out[4] = f4 * f4; out[5] = f5 * f5; out[6] = f6 * f6; out[7] = f7 * f7;
+     }
+     return (w+4);
+ }
+ 
  t_int *scalartimes_perform(t_int *w)
  {
***************
*** 370,391 ****
  }
  
  static void times_dsp(t_times *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(times_perform, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(times_perf8, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
  }
  
  static void scalartimes_dsp(t_scalartimes *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(scalartimes_perform, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(scalartimes_perf8, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
  }
  
--- 395,430 ----
  }
  
+ /* T.Grill - added optimization for equal input signals */
  static void times_dsp(t_times *x, t_signal **sp)
  {
!     const int n = sp[0]->s_n;
!     if (n&7)
!     	dsp_add(times_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
!     else 
! 	if(sp[0]->s_vec == sp[1]->s_vec) 
! 	{
! 	    if(SIMD_CHECK2(n,sp[0]->s_vec,sp[2]->s_vec))
! 			dsp_add(sqr_perf_simd, 3, sp[0]->s_vec, sp[2]->s_vec, n);
! 	    else	
! 			dsp_add(sqr_perf8, 3, sp[0]->s_vec, sp[2]->s_vec, n);
! 	}
! 	else 
! 	{
! 	    if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
! 			dsp_add(times_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
      else        
! 			dsp_add(times_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! 	}
  }
  
  static void scalartimes_dsp(t_scalartimes *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if (n&7)
!     	dsp_add(scalartimes_perform, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
! 	else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
!     	dsp_add(scalartimes_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
      else        
!     	dsp_add(scalartimes_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
  }
  
***************
*** 515,534 ****
  static void over_dsp(t_over *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(over_perform, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(over_perf8, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
  }
  
  static void scalarover_dsp(t_scalarover *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(scalarover_perform, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(scalarover_perf8, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
  }
  
--- 554,575 ----
  static void over_dsp(t_over *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if (n&7)
!     	dsp_add(over_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! 	else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
!     	dsp_add(over_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
      else        
!     	dsp_add(over_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
  }
  
  static void scalarover_dsp(t_scalarover *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if (n&7)
!     	dsp_add(scalarover_perform, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
! 	else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
!     	dsp_add(scalarover_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
      else        
!     	dsp_add(scalarover_perf8, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
  }
  
***************
*** 657,676 ****
  static void max_dsp(t_max *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(max_perform, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(max_perf8, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
  }
  
  static void scalarmax_dsp(t_scalarmax *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(scalarmax_perform, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(scalarmax_perf8, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
  }
  
--- 698,719 ----
  static void max_dsp(t_max *x, t_signal **sp)
  {
!     const int n = sp[0]->s_n;
!     if(n&7)
!     	dsp_add(max_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! 	else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
!     	dsp_add(max_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
      else        
!     	dsp_add(max_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
  }
  
  static void scalarmax_dsp(t_scalarmax *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if (n&7)
!     	dsp_add(scalarmax_perform, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
! 	else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
!     	dsp_add(scalarmax_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
      else        
!     	dsp_add(scalarmax_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
  }
  
***************
*** 799,818 ****
  static void min_dsp(t_min *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(min_perform, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(min_perf8, 4,
!             sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
  }
  
  static void scalarmin_dsp(t_scalarmin *x, t_signal **sp)
  {
!     if (sp[0]->s_n&7)
!         dsp_add(scalarmin_perform, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
      else        
!         dsp_add(scalarmin_perf8, 4, sp[0]->s_vec, &x->x_g,
!             sp[1]->s_vec, sp[0]->s_n);
  }
  
--- 842,863 ----
  static void min_dsp(t_min *x, t_signal **sp)
  {
!     const int n = sp[0]->s_n;
!     if(n&7)
!     	dsp_add(min_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! 	else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
!     	dsp_add(min_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
      else        
!     	dsp_add(min_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
  }
  
  static void scalarmin_dsp(t_scalarmin *x, t_signal **sp)
  {
! 	const int n = sp[0]->s_n;
!     if (n&7)
!     	dsp_add(scalarmin_perform, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
! 	else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
!     	dsp_add(scalarmin_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
      else        
!     	dsp_add(scalarmin_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
  }
  

Index: d_dac.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_dac.c,v
retrieving revision 1.3
retrieving revision 1.3.4.1
diff -C2 -d -r1.3 -r1.3.4.1
*** d_dac.c	6 Sep 2004 20:20:33 -0000	1.3
--- d_dac.c	5 Nov 2004 13:39:55 -0000	1.3.4.1
***************
*** 9,12 ****
--- 9,15 ----
  #include "s_stuff.h"
  
+ /* T.Grill - include SIMD functionality */
+ #include "m_simd.h"
+ 
  /* ----------------------------- dac~ --------------------------- */
  static t_class *dac_class;
***************
*** 51,55 ****
          if ((*sp2)->s_n != DEFDACBLKSIZE)
              error("dac~: bad vector size");
!         else if (ch >= 0 && ch < sys_get_outchannels())
              dsp_add(plus_perform, 4, sys_soundout + DEFDACBLKSIZE*ch,
                  (*sp2)->s_vec, sys_soundout + DEFDACBLKSIZE*ch, DEFDACBLKSIZE);
--- 54,64 ----
          if ((*sp2)->s_n != DEFDACBLKSIZE)
              error("dac~: bad vector size");
!     	else 
! 	    if (ch >= 0 && ch < sys_get_outchannels())
! 		if(SIMD_CHECK3(DEFDACBLKSIZE,sys_soundout + DEFDACBLKSIZE*ch,
! 			       (*sp2)->s_vec,sys_soundout + DEFDACBLKSIZE*ch))
! 		    dsp_add(plus_perf_simd, 4, sys_soundout + DEFDACBLKSIZE*ch,
! 			    (*sp2)->s_vec, sys_soundout + DEFDACBLKSIZE*ch, DEFDACBLKSIZE);
! 		else
              dsp_add(plus_perform, 4, sys_soundout + DEFDACBLKSIZE*ch,
                  (*sp2)->s_vec, sys_soundout + DEFDACBLKSIZE*ch, DEFDACBLKSIZE);
***************
*** 144,149 ****
      if (n&7)
          dsp_add(copy_perform, 3, in, out, n);
!     else        
!         dsp_add(copy_perf8, 3, in, out, n);
  }
  
--- 153,161 ----
      if (n&7)
          dsp_add(copy_perform, 3, in, out, n);
! 	else
! 		if(SIMD_CHECK2(n,in,out))
! 			dsp_add(copy_perf_simd, 3, in, out, n);
! 		else        
! 			dsp_add(copy_perf8, 3, in, out, n);
  }
  

Index: d_ctl.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ctl.c,v
retrieving revision 1.3
retrieving revision 1.3.4.1
diff -C2 -d -r1.3 -r1.3.4.1
*** d_ctl.c	6 Sep 2004 20:20:33 -0000	1.3
--- d_ctl.c	5 Nov 2004 13:39:55 -0000	1.3.4.1
***************
*** 10,13 ****
--- 10,16 ----
  #include "math.h"
  
+ /* T.Grill - include SIMD functionality */
+ #include "m_simd.h"
+ 
  /* -------------------------- sig~ ------------------------------ */
  static t_class *sig_tilde_class;
***************
*** 54,57 ****
--- 57,63 ----
          dsp_add(sig_tilde_perform, 3, in, out, n);
      else        
+ 	if(SIMD_CHECK1(n,out))
+ 	    dsp_add(sig_tilde_perf_simd, 3, in, out, n);
+ 	else	
          dsp_add(sig_tilde_perf8, 3, in, out, n);
  }
***************
*** 64,68 ****
  static void sig_tilde_dsp(t_sig *x, t_signal **sp)
  {
!     dsp_add(sig_tilde_perform, 3, &x->x_f, sp[0]->s_vec, sp[0]->s_n);
  }
  
--- 70,76 ----
  static void sig_tilde_dsp(t_sig *x, t_signal **sp)
  {
! /*   dsp_add(sig_tilde_perform, 3, &x->x_f, sp[0]->s_vec, sp[0]->s_n); */
! 	/* T.Grill - use chance of unrolling */
! 	dsp_add_scalarcopy(&x->x_f, sp[0]->s_vec, sp[0]->s_n);
  }
  

Index: d_array.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_array.c,v
retrieving revision 1.3
retrieving revision 1.3.4.1
diff -C2 -d -r1.3 -r1.3.4.1
*** d_array.c	6 Sep 2004 20:20:33 -0000	1.3
--- d_array.c	5 Nov 2004 13:39:55 -0000	1.3.4.1
***************
*** 8,12 ****
  
  #include "m_pd.h"
! 
  
  /* ------------------------- tabwrite~ -------------------------- */
--- 8,12 ----
  
  #include "m_pd.h"
! #include "m_simd.h"
  
  /* ------------------------- tabwrite~ -------------------------- */
***************
*** 504,508 ****
  #define int32 long  /* a data type that has 32 bits */
  #else
! #ifdef MSW
      /* little-endian; most significant byte is at highest address */
  #define HIOFFSET 1
--- 504,508 ----
  #define int32 long  /* a data type that has 32 bits */
  #else
! #if defined(MSW) || defined(__CYGWIN__)
      /* little-endian; most significant byte is at highest address */
  #define HIOFFSET 1
***************
*** 732,735 ****
--- 732,736 ----
      if (!x->x_vec) goto bad;
  
+     if(n&7)
      while (n--)
      {   
***************
*** 739,742 ****
--- 740,748 ----
           *dest++ = f;
      }
+     else if(SIMD_CHECK2(n,in,dest))
+         testcopyvec_simd(dest,in,n);
+     else
+         testcopyvec_8(dest,in,n);
+ 
      if (!i--)
      {
***************
*** 807,810 ****
--- 813,838 ----
  }
  
+ static t_int *tabreceive_perf8(t_int *w)
+ {
+     t_tabreceive *x = (t_tabreceive *)(w[1]);
+     t_float *from = x->x_vec;
+     if (from) 
+         copyvec_8((t_float *)(w[2]),from,w[3]);
+     else 
+         zerovec_8((t_float *)(w[2]),w[3]);
+     return (w+4);
+ }
+ 
+ static t_int *tabreceive_perfsimd(t_int *w)
+ {
+     t_tabreceive *x = (t_tabreceive *)(w[1]);
+     t_float *from = x->x_vec;
+     if(from) 
+         copyvec_simd((t_float *)(w[2]),from,w[3]);
+     else 
+         zerovec_simd((t_float *)(w[2]),w[3]);
+     return (w+4);
+ }
+ 
  static void tabreceive_dsp(t_tabreceive *x, t_signal **sp)
  {
***************
*** 824,828 ****
--- 852,862 ----
          if (n < vecsize) vecsize = n;
          garray_usedindsp(a);
+         if(vecsize&7)
          dsp_add(tabreceive_perform, 3, x, sp[0]->s_vec, vecsize);
+         else if(SIMD_CHECK1(vecsize,sp[0]->s_vec))
+             /* the array is aligned in any case */
+     	    dsp_add(tabreceive_perfsimd, 3, x, sp[0]->s_vec, vecsize);
+         else
+     	    dsp_add(tabreceive_perf8, 3, x, sp[0]->s_vec, vecsize);
      }
  }