[PD-cvs] pd/src d_ctl.c,1.3.4.3,1.3.4.4 m_simd.c,1.1.4.1,1.1.4.2 m_simd_sse_gcc.c,1.1.4.7,1.1.4.8 m_simd_sse_gcc.h,1.1.4.3,1.1.4.4 m_simd_sse_vc.h,1.1.4.3,1.1.4.4 m_simd_ve_gcc.h,1.1.4.1,1.1.4.2 m_pd.h,1.4.4.5,1.4.4.6

Tim Blechmann timblech at users.sourceforge.net
Sat Jan 8 20:43:54 CET 2005


Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27025

Modified Files:
      Tag: devel_0_38
	d_ctl.c m_simd.c m_simd_sse_gcc.c m_simd_sse_gcc.h 
	m_simd_sse_vc.h m_simd_ve_gcc.h m_pd.h 
Log Message:
simd code and loop unrolling for env~ (gcc / sse only at the moment)

Index: m_pd.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/m_pd.h,v
retrieving revision 1.4.4.5
retrieving revision 1.4.4.6
diff -C2 -d -r1.4.4.5 -r1.4.4.6
*** m_pd.h	2 Dec 2004 09:25:15 -0000	1.4.4.5
--- m_pd.h	8 Jan 2005 19:43:52 -0000	1.4.4.6
***************
*** 481,485 ****
  
  /* tb: to be called at idle time */
! EXTERN void set_callback(t_int (*callback) (t_int* argv), t_int* argv, t_int argc);
  
  
--- 481,485 ----
  
  /* tb: to be called at idle time */
! EXTERN void sys_callback(t_int (*callback) (t_int* argv), t_int* argv, t_int argc);
  
  
***************
*** 658,661 ****
--- 658,662 ----
  EXTERN void testcopyvec_8(t_float *dst,const t_float *src,int n);
  EXTERN void testaddvec_8(t_float *dst,const t_float *src,int n);
+ EXTERN float sumvec_8(t_float* in, t_int n);
  
  /* vectorized, simd functions *
***************
*** 667,670 ****
--- 668,672 ----
  EXTERN void testcopyvec_simd(t_float *dst,const t_float *src,int n);
  EXTERN void testaddvec_simd(t_float *dst,const t_float *src,int n);
+ EXTERN float sumvec_simd(t_float* in, t_int n);
  
  EXTERN int simd_runtime_check(void);

Index: m_simd.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd.c,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd.c	5 Nov 2004 13:33:19 -0000	1.1.4.1
--- m_simd.c	8 Jan 2005 19:43:52 -0000	1.1.4.2
***************
*** 66,71 ****
  }
  
- 
- 
  #ifdef DONTUSESIMD
  int simd_runtime_check()
--- 66,69 ----
***************
*** 105,107 ****
--- 103,112 ----
  }
  
+ float sumvec_simd(t_float* in, t_int n)
+ {
+ 	return sumvec_8(in,n);
+ }
+ 
+ 
  #endif /* DONTUSESIMD */
+ 

Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.4.3
retrieving revision 1.1.4.4
diff -C2 -d -r1.1.4.3 -r1.1.4.4
*** m_simd_sse_gcc.h	29 Nov 2004 18:11:38 -0000	1.1.4.3
--- m_simd_sse_gcc.h	8 Jan 2005 19:43:52 -0000	1.1.4.4
***************
*** 41,44 ****
--- 41,47 ----
  t_int *sigrsqrt_perf_simd(t_int *w);
  
+ float sumvec_simd(t_float* in, t_int n);
+ 
+ //#define sum_vecsimd                sumvec_8
  #define sigwrap_perf_simd          sigwrap_perform  /* SIMD not implemented */
  

Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.4.3
retrieving revision 1.1.4.4
diff -C2 -d -r1.1.4.3 -r1.1.4.4
*** m_simd_sse_vc.h	29 Nov 2004 18:11:38 -0000	1.1.4.3
--- m_simd_sse_vc.h	8 Jan 2005 19:43:52 -0000	1.1.4.4
***************
*** 41,44 ****
--- 41,45 ----
  t_int *sigrsqrt_perf_simd(t_int *w);
  
+ #define sum_vecsimd             sumvec_8
  #define sigwrap_perf_simd       sigwrap_perform  /* SIMD not implemented */
  

Index: m_simd_ve_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.h,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_ve_gcc.h	5 Nov 2004 13:33:20 -0000	1.1.4.1
--- m_simd_ve_gcc.h	8 Jan 2005 19:43:52 -0000	1.1.4.2
***************
*** 41,43 ****
--- 41,45 ----
  t_int *sigrsqrt_perf_simd(t_int *w);
  
+ #define sum_vecsimd                sumvec_8 /* SIMD not implemented */
+ 
  #endif /* __M_SIMD_VE_GCC_H */

Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.4.7
retrieving revision 1.1.4.8
diff -C2 -d -r1.1.4.7 -r1.1.4.8
*** m_simd_sse_gcc.c	29 Nov 2004 18:11:34 -0000	1.1.4.7
--- m_simd_sse_gcc.c	8 Jan 2005 19:43:52 -0000	1.1.4.8
***************
*** 813,816 ****
--- 813,861 ----
  }
  
+ float sumvec_simd(t_float* in, t_int n)
+ {
+ 	float ret = 0;
+     asm(
+ 		".set T_FLOAT,4                            \n"
+ 		
+ 		"shrl      $4, %2                          \n" /* divide by 16 */
+ 		"xorps     %%xmm4, %%xmm4                  \n" /* zero values */
+ 		"xorps     %%xmm5, %%xmm5                  \n"
+ 		"xorps     %%xmm6, %%xmm6                  \n"
+ 		"xorps     %0, %0                          \n"
+ 
+ 		
+ 		"1:                                        \n"
+ 		"movaps    (%1), %%xmm0                    \n"
+ 		"movaps    4*T_FLOAT(%1), %%xmm1           \n"
+ 		"movaps    8*T_FLOAT(%1), %%xmm2           \n"
+ 		"movaps    12*T_FLOAT(%1), %%xmm3          \n"
+ 
+ 		"addps     %%xmm0,%%xmm4                   \n"
+ 		"addps     %%xmm1,%%xmm4                   \n"
+ 		"addps     %%xmm2,%%xmm4                   \n"
+ 		"addps     %%xmm3,%%xmm4                   \n"
+ 
+ 		"addl      $16*T_FLOAT,%1                  \n"
+ 		"loop      1b                              \n"
+ 
+ 		"movhlps   %%xmm4, %%xmm5                  \n"
+ 		"movups   %%xmm4, %%xmm6                   \n"
+ 		"movups   %%xmm5, %0                       \n"
+ 		"shufps    $81, %%xmm6, %%xmm6             \n"
+ 		"shufps    $81, %0, %0                     \n"
+ 
+ 		"addss     %%xmm4, %%xmm5                  \n"
+ 		"addss     %%xmm5, %%xmm6                  \n"
+ 		"addss     %%xmm6, %0                      \n"
+ 
+ 
+ 		:"=x"(ret)
+ 		:"r"(in),"c"(n)
+ 		:"%xmm0","%xmm1","%xmm2","%xmm3", "%xmm4","%xmm5","%xmm6");
+ 	return ret;
+ }
+ 		
+ 
  
  #endif

Index: d_ctl.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ctl.c,v
retrieving revision 1.3.4.3
retrieving revision 1.3.4.4
diff -C2 -d -r1.3.4.3 -r1.3.4.4
*** d_ctl.c	8 Jan 2005 09:41:02 -0000	1.3.4.3
--- d_ctl.c	8 Jan 2005 19:43:51 -0000	1.3.4.4
***************
*** 653,656 ****
--- 653,658 ----
      float x_sumbuf[MAXOVERLAP];     /* summing buffer */
      float x_f;
+ 	float *x_tmpbuf;                /* tb: temporary buffer for simd */
+ 	int x_blocksize;                /* tb: only for freealignedbytes */
  } t_sigenv;
  
***************
*** 670,674 ****
      if (period < npoints / MAXOVERLAP + 1)
          period = npoints / MAXOVERLAP + 1;
!     if (!(buf = getbytes(sizeof(float) * (npoints + MAXVSTAKEN))))
      {
          error("env: couldn't allocate buffer");
--- 672,676 ----
      if (period < npoints / MAXOVERLAP + 1)
          period = npoints / MAXOVERLAP + 1;
!     if (!(buf = getalignedbytes(sizeof(float) * (npoints + MAXVSTAKEN))))
      {
          error("env: couldn't allocate buffer");
***************
*** 687,690 ****
--- 689,694 ----
      x->x_outlet = outlet_new(&x->x_obj, gensym("float"));
      x->x_f = 0;
+ 	x->x_blocksize = 64;
+ 	x->x_tmpbuf = getalignedbytes(x->x_blocksize * sizeof(float));
      return (x);
  }
***************
*** 728,731 ****
--- 732,851 ----
  }
  
+ 
+ /* tb: loop unrolling and simd */
+ float sumvec_8(t_float* in, t_int n)
+ {
+ 	int i;
+ 	float result = 0;
+ 
+ 	n>>=3;
+ 	for (i = 0; i != n; ++i)
+ 	{
+ 		result += *in++;
+ 		result += *in++;
+ 		result += *in++;
+ 		result += *in++;
+ 		result += *in++;
+ 		result += *in++;
+ 		result += *in++;
+ 		result += *in++;
+ 	}
+ 	return result;
+ }
+ 
+ 
+ static t_int *env_tilde_perf8(t_int *w)
+ {
+     t_sigenv *x = (t_sigenv *)(w[1]);
+     t_float *in = (t_float *)(w[2]);
+     int n = (int)(w[3]);
+     int count;
+     float *sump; 
+     for (count = x->x_phase, sump = x->x_sumbuf;
+         count < x->x_npoints; count += x->x_realperiod, sump++)
+     {
+         float *hp = x->x_buf + count;
+         float *fp = in;
+         float sum = *sump;
+ 		float *tmp = x->x_tmpbuf;
+         int i;
+ 		t_int sqrargs[4];
+ 		t_int timesargs[5];
+ 		
+ 		sqrargs[1] = (t_int)in;
+ 		sqrargs[2] = (t_int)tmp;
+ 		sqrargs[3] = (t_int) n;
+ 		timesargs[1] = (t_int)tmp;
+ 		timesargs[2] = (t_int)hp;
+ 		timesargs[3] = (t_int)tmp;
+ 		timesargs[4] = (t_int)n;
+ 
+ 		sqr_perf8(sqrargs);
+ 		times_perf8(timesargs);
+ 		
+ 		*sump += sumvec_8(tmp,n);
+     }
+     sump[0] = 0;
+     x->x_phase -= n;
+     if (x->x_phase < 0)
+     {
+         x->x_result = x->x_sumbuf[0];
+         for (count = x->x_realperiod, sump = x->x_sumbuf;
+             count < x->x_npoints; count += x->x_realperiod, sump++)
+                 sump[0] = sump[1];
+         sump[0] = 0;
+         x->x_phase = x->x_realperiod - n;
+         clock_delay(x->x_clock, 0L);
+     }
+     return (w+4);
+ }
+ 
+ static t_int *env_tilde_perf_simd(t_int *w)
+ {
+     t_sigenv *x = (t_sigenv *)(w[1]);
+     t_float *in = (t_float *)(w[2]);
+     int n = (int)(w[3]);
+     int count;
+     float *sump; 
+     for (count = x->x_phase, sump = x->x_sumbuf;
+         count < x->x_npoints; count += x->x_realperiod, sump++)
+     {
+         float *hp = x->x_buf + count;
+         float *fp = in;
+         float sum = *sump;
+ 		float *tmp = x->x_tmpbuf;
+         int i;
+ 		t_int sqrargs[4];
+ 		t_int timesargs[5];
+ 		
+ 		sqrargs[1] = (t_int)in;
+ 		sqrargs[2] = (t_int)tmp;
+ 		sqrargs[3] = (t_int) n;
+ 		timesargs[1] = (t_int)tmp;
+ 		timesargs[2] = (t_int)hp;
+ 		timesargs[3] = (t_int)tmp;
+ 		timesargs[4] = (t_int)n;
+ 
+ 		sqr_perf_simd(sqrargs);
+ 		times_perf_simd(timesargs);
+ 		
+ 		*sump += sumvec_simd(tmp,n);
+     }
+     sump[0] = 0;
+     x->x_phase -= n;
+     if (x->x_phase < 0)
+     {
+         x->x_result = x->x_sumbuf[0];
+         for (count = x->x_realperiod, sump = x->x_sumbuf;
+             count < x->x_npoints; count += x->x_realperiod, sump++)
+                 sump[0] = sump[1];
+         sump[0] = 0;
+         x->x_phase = x->x_realperiod - n;
+         clock_delay(x->x_clock, 0L);
+     }
+     return (w+4);
+ }
+ 
+ 
  static void env_tilde_dsp(t_sigenv *x, t_signal **sp)
  {
***************
*** 733,738 ****
          x->x_period + sp[0]->s_n - (x->x_period % sp[0]->s_n);
      else x->x_realperiod = x->x_period;
!     dsp_add(env_tilde_perform, 3, x, sp[0]->s_vec, sp[0]->s_n);
!     if (sp[0]->s_n > MAXVSTAKEN) bug("env_tilde_dsp");
  }
  
--- 853,868 ----
          x->x_period + sp[0]->s_n - (x->x_period % sp[0]->s_n);
      else x->x_realperiod = x->x_period;
! 
! 	if (sp[0]->s_n & 7)
! 		dsp_add(env_tilde_perform, 3, x, sp[0]->s_vec, sp[0]->s_n);
! 	else
! 		if (SIMD_CHECK1(sp[0]->s_n, sp[0]->s_vec))
! 			dsp_add(env_tilde_perf_simd, 3, x, sp[0]->s_vec, sp[0]->s_n);
! 		else
! 			dsp_add(env_tilde_perf8, 3, x, sp[0]->s_vec, sp[0]->s_n);
!     
! 	if (sp[0]->s_n > MAXVSTAKEN) bug("env_tilde_dsp");
! 	
! 	x->x_blocksize = sp[0]->s_n;
  }
  
***************
*** 745,749 ****
  {
      clock_free(x->x_clock);
!     freebytes(x->x_buf, (x->x_npoints + MAXVSTAKEN) * sizeof(float));
  }
  
--- 875,879 ----
  {
      clock_free(x->x_clock);
!     freealignedbytes(x->x_buf, (x->x_npoints + MAXVSTAKEN) * sizeof(float));
  }
  





More information about the Pd-cvs mailing list