[PD-cvs] pd/src d_ctl.c,1.3.4.3,1.3.4.4 m_simd.c,1.1.4.1,1.1.4.2 m_simd_sse_gcc.c,1.1.4.7,1.1.4.8 m_simd_sse_gcc.h,1.1.4.3,1.1.4.4 m_simd_sse_vc.h,1.1.4.3,1.1.4.4 m_simd_ve_gcc.h,1.1.4.1,1.1.4.2 m_pd.h,1.4.4.5,1.4.4.6
Tim Blechmann
timblech at users.sourceforge.net
Sat Jan 8 20:43:54 CET 2005
- Previous message: [PD-cvs] pd/src d_ctl.c,1.3.4.2,1.3.4.3
- Next message: [PD-cvs] pd/src m_simd_sse_gcc.c,1.1.4.8,1.1.4.9 m_simd_sse_vc.h,1.1.4.4,1.1.4.5 m_simd_sse_gcc.h,1.1.4.4,1.1.4.5 m_simd_ve_gcc.h,1.1.4.2,1.1.4.3 m_simd.c,1.1.4.2,1.1.4.3 d_ctl.c,1.3.4.4,1.3.4.5
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27025
Modified Files:
Tag: devel_0_38
d_ctl.c m_simd.c m_simd_sse_gcc.c m_simd_sse_gcc.h
m_simd_sse_vc.h m_simd_ve_gcc.h m_pd.h
Log Message:
simd code and loop unrolling for env~ (gcc / sse only at the moment)
Index: m_pd.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/m_pd.h,v
retrieving revision 1.4.4.5
retrieving revision 1.4.4.6
diff -C2 -d -r1.4.4.5 -r1.4.4.6
*** m_pd.h 2 Dec 2004 09:25:15 -0000 1.4.4.5
--- m_pd.h 8 Jan 2005 19:43:52 -0000 1.4.4.6
***************
*** 481,485 ****
/* tb: to be called at idle time */
! EXTERN void set_callback(t_int (*callback) (t_int* argv), t_int* argv, t_int argc);
--- 481,485 ----
/* tb: to be called at idle time */
! EXTERN void sys_callback(t_int (*callback) (t_int* argv), t_int* argv, t_int argc);
***************
*** 658,661 ****
--- 658,662 ----
EXTERN void testcopyvec_8(t_float *dst,const t_float *src,int n);
EXTERN void testaddvec_8(t_float *dst,const t_float *src,int n);
+ EXTERN float sumvec_8(t_float* in, t_int n);
/* vectorized, simd functions *
***************
*** 667,670 ****
--- 668,672 ----
EXTERN void testcopyvec_simd(t_float *dst,const t_float *src,int n);
EXTERN void testaddvec_simd(t_float *dst,const t_float *src,int n);
+ EXTERN float sumvec_simd(t_float* in, t_int n);
EXTERN int simd_runtime_check(void);
Index: m_simd.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd.c,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd.c 5 Nov 2004 13:33:19 -0000 1.1.4.1
--- m_simd.c 8 Jan 2005 19:43:52 -0000 1.1.4.2
***************
*** 66,71 ****
}
-
-
#ifdef DONTUSESIMD
int simd_runtime_check()
--- 66,69 ----
***************
*** 105,107 ****
--- 103,112 ----
}
+ float sumvec_simd(t_float* in, t_int n)
+ {
+ return sumvec_8(in,n);
+ }
+
+
#endif /* DONTUSESIMD */
+
Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.4.3
retrieving revision 1.1.4.4
diff -C2 -d -r1.1.4.3 -r1.1.4.4
*** m_simd_sse_gcc.h 29 Nov 2004 18:11:38 -0000 1.1.4.3
--- m_simd_sse_gcc.h 8 Jan 2005 19:43:52 -0000 1.1.4.4
***************
*** 41,44 ****
--- 41,47 ----
t_int *sigrsqrt_perf_simd(t_int *w);
+ float sumvec_simd(t_float* in, t_int n);
+
+ //#define sum_vecsimd sumvec_8
#define sigwrap_perf_simd sigwrap_perform /* SIMD not implemented */
Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.4.3
retrieving revision 1.1.4.4
diff -C2 -d -r1.1.4.3 -r1.1.4.4
*** m_simd_sse_vc.h 29 Nov 2004 18:11:38 -0000 1.1.4.3
--- m_simd_sse_vc.h 8 Jan 2005 19:43:52 -0000 1.1.4.4
***************
*** 41,44 ****
--- 41,45 ----
t_int *sigrsqrt_perf_simd(t_int *w);
+ #define sum_vecsimd sumvec_8
#define sigwrap_perf_simd sigwrap_perform /* SIMD not implemented */
Index: m_simd_ve_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_ve_gcc.h,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_ve_gcc.h 5 Nov 2004 13:33:20 -0000 1.1.4.1
--- m_simd_ve_gcc.h 8 Jan 2005 19:43:52 -0000 1.1.4.2
***************
*** 41,43 ****
--- 41,45 ----
t_int *sigrsqrt_perf_simd(t_int *w);
+ #define sum_vecsimd sumvec_8 /* SIMD not implemented */
+
#endif /* __M_SIMD_VE_GCC_H */
Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.4.7
retrieving revision 1.1.4.8
diff -C2 -d -r1.1.4.7 -r1.1.4.8
*** m_simd_sse_gcc.c 29 Nov 2004 18:11:34 -0000 1.1.4.7
--- m_simd_sse_gcc.c 8 Jan 2005 19:43:52 -0000 1.1.4.8
***************
*** 813,816 ****
--- 813,861 ----
}
+ float sumvec_simd(t_float* in, t_int n)
+ {
+ float ret = 0;
+ asm(
+ ".set T_FLOAT,4 \n"
+
+ "shrl $4, %2 \n" /* divide by 16 */
+ "xorps %%xmm4, %%xmm4 \n" /* zero values */
+ "xorps %%xmm5, %%xmm5 \n"
+ "xorps %%xmm6, %%xmm6 \n"
+ "xorps %0, %0 \n"
+
+
+ "1: \n"
+ "movaps (%1), %%xmm0 \n"
+ "movaps 4*T_FLOAT(%1), %%xmm1 \n"
+ "movaps 8*T_FLOAT(%1), %%xmm2 \n"
+ "movaps 12*T_FLOAT(%1), %%xmm3 \n"
+
+ "addps %%xmm0,%%xmm4 \n"
+ "addps %%xmm1,%%xmm4 \n"
+ "addps %%xmm2,%%xmm4 \n"
+ "addps %%xmm3,%%xmm4 \n"
+
+ "addl $16*T_FLOAT,%1 \n"
+ "loop 1b \n"
+
+ "movhlps %%xmm4, %%xmm5 \n"
+ "movups %%xmm4, %%xmm6 \n"
+ "movups %%xmm5, %0 \n"
+ "shufps $81, %%xmm6, %%xmm6 \n"
+ "shufps $81, %0, %0 \n"
+
+ "addss %%xmm4, %%xmm5 \n"
+ "addss %%xmm5, %%xmm6 \n"
+ "addss %%xmm6, %0 \n"
+
+
+ :"=x"(ret)
+ :"r"(in),"c"(n)
+ :"%xmm0","%xmm1","%xmm2","%xmm3", "%xmm4","%xmm5","%xmm6");
+ return ret;
+ }
+
+
#endif
Index: d_ctl.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ctl.c,v
retrieving revision 1.3.4.3
retrieving revision 1.3.4.4
diff -C2 -d -r1.3.4.3 -r1.3.4.4
*** d_ctl.c 8 Jan 2005 09:41:02 -0000 1.3.4.3
--- d_ctl.c 8 Jan 2005 19:43:51 -0000 1.3.4.4
***************
*** 653,656 ****
--- 653,658 ----
float x_sumbuf[MAXOVERLAP]; /* summing buffer */
float x_f;
+ float *x_tmpbuf; /* tb: temporary buffer for simd */
+ int x_blocksize; /* tb: only for freealignedbytes */
} t_sigenv;
***************
*** 670,674 ****
if (period < npoints / MAXOVERLAP + 1)
period = npoints / MAXOVERLAP + 1;
! if (!(buf = getbytes(sizeof(float) * (npoints + MAXVSTAKEN))))
{
error("env: couldn't allocate buffer");
--- 672,676 ----
if (period < npoints / MAXOVERLAP + 1)
period = npoints / MAXOVERLAP + 1;
! if (!(buf = getalignedbytes(sizeof(float) * (npoints + MAXVSTAKEN))))
{
error("env: couldn't allocate buffer");
***************
*** 687,690 ****
--- 689,694 ----
x->x_outlet = outlet_new(&x->x_obj, gensym("float"));
x->x_f = 0;
+ x->x_blocksize = 64;
+ x->x_tmpbuf = getalignedbytes(x->x_blocksize * sizeof(float));
return (x);
}
***************
*** 728,731 ****
--- 732,851 ----
}
+
+ /* tb: loop unrolling and simd */
+ float sumvec_8(t_float* in, t_int n)
+ {
+ int i;
+ float result = 0;
+
+ n>>=3;
+ for (i = 0; i != n; ++i)
+ {
+ result += *in++;
+ result += *in++;
+ result += *in++;
+ result += *in++;
+ result += *in++;
+ result += *in++;
+ result += *in++;
+ result += *in++;
+ }
+ return result;
+ }
+
+
+ static t_int *env_tilde_perf8(t_int *w)
+ {
+ t_sigenv *x = (t_sigenv *)(w[1]);
+ t_float *in = (t_float *)(w[2]);
+ int n = (int)(w[3]);
+ int count;
+ float *sump;
+ for (count = x->x_phase, sump = x->x_sumbuf;
+ count < x->x_npoints; count += x->x_realperiod, sump++)
+ {
+ float *hp = x->x_buf + count;
+ float *fp = in;
+ float sum = *sump;
+ float *tmp = x->x_tmpbuf;
+ int i;
+ t_int sqrargs[4];
+ t_int timesargs[5];
+
+ sqrargs[1] = (t_int)in;
+ sqrargs[2] = (t_int)tmp;
+ sqrargs[3] = (t_int) n;
+ timesargs[1] = (t_int)tmp;
+ timesargs[2] = (t_int)hp;
+ timesargs[3] = (t_int)tmp;
+ timesargs[4] = (t_int)n;
+
+ sqr_perf8(sqrargs);
+ times_perf8(timesargs);
+
+ *sump += sumvec_8(tmp,n);
+ }
+ sump[0] = 0;
+ x->x_phase -= n;
+ if (x->x_phase < 0)
+ {
+ x->x_result = x->x_sumbuf[0];
+ for (count = x->x_realperiod, sump = x->x_sumbuf;
+ count < x->x_npoints; count += x->x_realperiod, sump++)
+ sump[0] = sump[1];
+ sump[0] = 0;
+ x->x_phase = x->x_realperiod - n;
+ clock_delay(x->x_clock, 0L);
+ }
+ return (w+4);
+ }
+
+ static t_int *env_tilde_perf_simd(t_int *w)
+ {
+ t_sigenv *x = (t_sigenv *)(w[1]);
+ t_float *in = (t_float *)(w[2]);
+ int n = (int)(w[3]);
+ int count;
+ float *sump;
+ for (count = x->x_phase, sump = x->x_sumbuf;
+ count < x->x_npoints; count += x->x_realperiod, sump++)
+ {
+ float *hp = x->x_buf + count;
+ float *fp = in;
+ float sum = *sump;
+ float *tmp = x->x_tmpbuf;
+ int i;
+ t_int sqrargs[4];
+ t_int timesargs[5];
+
+ sqrargs[1] = (t_int)in;
+ sqrargs[2] = (t_int)tmp;
+ sqrargs[3] = (t_int) n;
+ timesargs[1] = (t_int)tmp;
+ timesargs[2] = (t_int)hp;
+ timesargs[3] = (t_int)tmp;
+ timesargs[4] = (t_int)n;
+
+ sqr_perf_simd(sqrargs);
+ times_perf_simd(timesargs);
+
+ *sump += sumvec_simd(tmp,n);
+ }
+ sump[0] = 0;
+ x->x_phase -= n;
+ if (x->x_phase < 0)
+ {
+ x->x_result = x->x_sumbuf[0];
+ for (count = x->x_realperiod, sump = x->x_sumbuf;
+ count < x->x_npoints; count += x->x_realperiod, sump++)
+ sump[0] = sump[1];
+ sump[0] = 0;
+ x->x_phase = x->x_realperiod - n;
+ clock_delay(x->x_clock, 0L);
+ }
+ return (w+4);
+ }
+
+
static void env_tilde_dsp(t_sigenv *x, t_signal **sp)
{
***************
*** 733,738 ****
x->x_period + sp[0]->s_n - (x->x_period % sp[0]->s_n);
else x->x_realperiod = x->x_period;
! dsp_add(env_tilde_perform, 3, x, sp[0]->s_vec, sp[0]->s_n);
! if (sp[0]->s_n > MAXVSTAKEN) bug("env_tilde_dsp");
}
--- 853,868 ----
x->x_period + sp[0]->s_n - (x->x_period % sp[0]->s_n);
else x->x_realperiod = x->x_period;
!
! if (sp[0]->s_n & 7)
! dsp_add(env_tilde_perform, 3, x, sp[0]->s_vec, sp[0]->s_n);
! else
! if (SIMD_CHECK1(sp[0]->s_n, sp[0]->s_vec))
! dsp_add(env_tilde_perf_simd, 3, x, sp[0]->s_vec, sp[0]->s_n);
! else
! dsp_add(env_tilde_perf8, 3, x, sp[0]->s_vec, sp[0]->s_n);
!
! if (sp[0]->s_n > MAXVSTAKEN) bug("env_tilde_dsp");
!
! x->x_blocksize = sp[0]->s_n;
}
***************
*** 745,749 ****
{
clock_free(x->x_clock);
! freebytes(x->x_buf, (x->x_npoints + MAXVSTAKEN) * sizeof(float));
}
--- 875,879 ----
{
clock_free(x->x_clock);
! freealignedbytes(x->x_buf, (x->x_npoints + MAXVSTAKEN) * sizeof(float));
}
- Previous message: [PD-cvs] pd/src d_ctl.c,1.3.4.2,1.3.4.3
- Next message: [PD-cvs] pd/src m_simd_sse_gcc.c,1.1.4.8,1.1.4.9 m_simd_sse_vc.h,1.1.4.4,1.1.4.5 m_simd_sse_gcc.h,1.1.4.4,1.1.4.5 m_simd_ve_gcc.h,1.1.4.2,1.1.4.3 m_simd.c,1.1.4.2,1.1.4.3 d_ctl.c,1.3.4.4,1.3.4.5
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Pd-cvs
mailing list