[PD-cvs] pd/src d_arithmetic.c,1.2,1.2.4.1 d_array.c,1.3,1.3.4.1 d_ctl.c,1.3,1.3.4.1 d_dac.c,1.3,1.3.4.1
Tim Blechmann
timblech at users.sourceforge.net
Fri Nov 5 14:39:57 CET 2004
- Previous message: [PD-cvs] pd/src s_midi_alsa.c,NONE,1.1.2.1 s_audio_asio.cpp,NONE,1.1.4.1 m_simd.c,NONE,1.1.4.1 m_simd.h,NONE,1.1.4.1 m_simd_def.h,NONE,1.1.4.1 m_simd_sse_gcc.c,NONE,1.1.4.1 m_simd_sse_gcc.h,NONE,1.1.4.1 m_simd_sse_vc.c,NONE,1.1.4.1 m_simd_sse_vc.h,NONE,1.1.4.1 m_simd_ve_gcc.c,NONE,1.1.4.1 m_simd_ve_gcc.h,NONE,1.1.4.1
- Next message: [PD-cvs] pd/src d_fft.c,1.2,1.2.4.1 d_global.c,1.2,1.2.4.1 d_math.c,1.2,1.2.4.1 d_osc.c,1.2,1.2.4.1 d_soundfile.c,1.4,1.4.4.1 d_ugen.c,1.3,1.3.4.1 g_array.c,1.3,1.3.4.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21900
Modified Files:
Tag: devel_0_38
d_arithmetic.c d_array.c d_ctl.c d_dac.c
Log Message:
simd stuff
Index: d_arithmetic.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_arithmetic.c,v
retrieving revision 1.2
retrieving revision 1.2.4.1
diff -C2 -d -r1.2 -r1.2.4.1
*** d_arithmetic.c 6 Sep 2004 20:20:33 -0000 1.2
--- d_arithmetic.c 5 Nov 2004 13:39:55 -0000 1.2.4.1
***************
*** 11,14 ****
--- 11,16 ----
#include "m_pd.h"
+ #include "m_simd.h"
+
/* ----------------------------- plus ----------------------------- */
static t_class *plus_class, *scalarplus_class;
***************
*** 110,113 ****
--- 112,117 ----
if (n&7)
dsp_add(plus_perform, 4, in1, in2, out, n);
+ else if(SIMD_CHECK3(n,in1,in2,out))
+ dsp_add(plus_perf_simd, 4, in1, in2, out, n);
else
dsp_add(plus_perf8, 4, in1, in2, out, n);
***************
*** 121,130 ****
static void scalarplus_dsp(t_scalarplus *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(scalarplus_perform, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
else
! dsp_add(scalarplus_perf8, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
}
--- 125,135 ----
static void scalarplus_dsp(t_scalarplus *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if(n&7)
! dsp_add(scalarplus_perform, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
! else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
! dsp_add(scalarplus_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
else
! dsp_add(scalarplus_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
}
***************
*** 241,260 ****
static void minus_dsp(t_minus *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(minus_perform, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
else
! dsp_add(minus_perf8, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
}
static void scalarminus_dsp(t_scalarminus *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(scalarminus_perform, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
else
! dsp_add(scalarminus_perf8, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
}
--- 246,267 ----
static void minus_dsp(t_minus *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if(n&7)
! dsp_add(minus_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
! dsp_add(minus_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
else
! dsp_add(minus_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
}
static void scalarminus_dsp(t_scalarminus *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if(n&7)
! dsp_add(scalarminus_perform, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
! else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
! dsp_add(scalarminus_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
else
! dsp_add(scalarminus_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
}
***************
*** 343,346 ****
--- 350,371 ----
}
+ /* T.Grill - squaring: optimized * for equal input signals */
+ static t_int *sqr_perf8(t_int *w)
+ {
+ t_float *in = (t_float *)(w[1]);
+ t_float *out = (t_float *)(w[2]);
+ int n = (int)(w[3]);
+
+ for (; n; n -= 8, in += 8, out += 8)
+ {
+ float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
+ float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
+
+ out[0] = f0 * f0; out[1] = f1 * f1; out[2] = f2 * f2; out[3] = f3 * f3;
+ out[4] = f4 * f4; out[5] = f5 * f5; out[6] = f6 * f6; out[7] = f7 * f7;
+ }
+ return (w+4);
+ }
+
t_int *scalartimes_perform(t_int *w)
{
***************
*** 370,391 ****
}
static void times_dsp(t_times *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(times_perform, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
else
! dsp_add(times_perf8, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
}
static void scalartimes_dsp(t_scalartimes *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(scalartimes_perform, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
else
! dsp_add(scalartimes_perf8, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
}
--- 395,430 ----
}
+ /* T.Grill - added optimization for equal input signals */
static void times_dsp(t_times *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if (n&7)
! dsp_add(times_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! else
! if(sp[0]->s_vec == sp[1]->s_vec)
! {
! if(SIMD_CHECK2(n,sp[0]->s_vec,sp[2]->s_vec))
! dsp_add(sqr_perf_simd, 3, sp[0]->s_vec, sp[2]->s_vec, n);
! else
! dsp_add(sqr_perf8, 3, sp[0]->s_vec, sp[2]->s_vec, n);
! }
! else
! {
! if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
! dsp_add(times_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
else
! dsp_add(times_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! }
}
static void scalartimes_dsp(t_scalartimes *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if (n&7)
! dsp_add(scalartimes_perform, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
! else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
! dsp_add(scalartimes_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
else
! dsp_add(scalartimes_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
}
***************
*** 515,534 ****
static void over_dsp(t_over *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(over_perform, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
else
! dsp_add(over_perf8, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
}
static void scalarover_dsp(t_scalarover *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(scalarover_perform, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
else
! dsp_add(scalarover_perf8, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
}
--- 554,575 ----
static void over_dsp(t_over *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if (n&7)
! dsp_add(over_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
! dsp_add(over_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
else
! dsp_add(over_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
}
static void scalarover_dsp(t_scalarover *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if (n&7)
! dsp_add(scalarover_perform, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
! else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
! dsp_add(scalarover_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
else
! dsp_add(scalarover_perf8, 4, sp[0]->s_vec, &x->x_g,sp[1]->s_vec, n);
}
***************
*** 657,676 ****
static void max_dsp(t_max *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(max_perform, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
else
! dsp_add(max_perf8, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
}
static void scalarmax_dsp(t_scalarmax *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(scalarmax_perform, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
else
! dsp_add(scalarmax_perf8, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
}
--- 698,719 ----
static void max_dsp(t_max *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if(n&7)
! dsp_add(max_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
! dsp_add(max_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
else
! dsp_add(max_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
}
static void scalarmax_dsp(t_scalarmax *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if (n&7)
! dsp_add(scalarmax_perform, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
! else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
! dsp_add(scalarmax_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
else
! dsp_add(scalarmax_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
}
***************
*** 799,818 ****
static void min_dsp(t_min *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(min_perform, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
else
! dsp_add(min_perf8, 4,
! sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n);
}
static void scalarmin_dsp(t_scalarmin *x, t_signal **sp)
{
! if (sp[0]->s_n&7)
! dsp_add(scalarmin_perform, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
else
! dsp_add(scalarmin_perf8, 4, sp[0]->s_vec, &x->x_g,
! sp[1]->s_vec, sp[0]->s_n);
}
--- 842,863 ----
static void min_dsp(t_min *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if(n&7)
! dsp_add(min_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
! else if(SIMD_CHECK3(n,sp[0]->s_vec,sp[1]->s_vec,sp[2]->s_vec))
! dsp_add(min_perf_simd, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
else
! dsp_add(min_perf8, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, n);
}
static void scalarmin_dsp(t_scalarmin *x, t_signal **sp)
{
! const int n = sp[0]->s_n;
! if (n&7)
! dsp_add(scalarmin_perform, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
! else if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
! dsp_add(scalarmin_perf_simd, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
else
! dsp_add(scalarmin_perf8, 4, sp[0]->s_vec, &x->x_g, sp[1]->s_vec, n);
}
Index: d_dac.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_dac.c,v
retrieving revision 1.3
retrieving revision 1.3.4.1
diff -C2 -d -r1.3 -r1.3.4.1
*** d_dac.c 6 Sep 2004 20:20:33 -0000 1.3
--- d_dac.c 5 Nov 2004 13:39:55 -0000 1.3.4.1
***************
*** 9,12 ****
--- 9,15 ----
#include "s_stuff.h"
+ /* T.Grill - include SIMD functionality */
+ #include "m_simd.h"
+
/* ----------------------------- dac~ --------------------------- */
static t_class *dac_class;
***************
*** 51,55 ****
if ((*sp2)->s_n != DEFDACBLKSIZE)
error("dac~: bad vector size");
! else if (ch >= 0 && ch < sys_get_outchannels())
dsp_add(plus_perform, 4, sys_soundout + DEFDACBLKSIZE*ch,
(*sp2)->s_vec, sys_soundout + DEFDACBLKSIZE*ch, DEFDACBLKSIZE);
--- 54,64 ----
if ((*sp2)->s_n != DEFDACBLKSIZE)
error("dac~: bad vector size");
! else
! if (ch >= 0 && ch < sys_get_outchannels())
! if(SIMD_CHECK3(DEFDACBLKSIZE,sys_soundout + DEFDACBLKSIZE*ch,
! (*sp2)->s_vec,sys_soundout + DEFDACBLKSIZE*ch))
! dsp_add(plus_perf_simd, 4, sys_soundout + DEFDACBLKSIZE*ch,
! (*sp2)->s_vec, sys_soundout + DEFDACBLKSIZE*ch, DEFDACBLKSIZE);
! else
dsp_add(plus_perform, 4, sys_soundout + DEFDACBLKSIZE*ch,
(*sp2)->s_vec, sys_soundout + DEFDACBLKSIZE*ch, DEFDACBLKSIZE);
***************
*** 144,149 ****
if (n&7)
dsp_add(copy_perform, 3, in, out, n);
! else
! dsp_add(copy_perf8, 3, in, out, n);
}
--- 153,161 ----
if (n&7)
dsp_add(copy_perform, 3, in, out, n);
! else
! if(SIMD_CHECK2(n,in,out))
! dsp_add(copy_perf_simd, 3, in, out, n);
! else
! dsp_add(copy_perf8, 3, in, out, n);
}
Index: d_ctl.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_ctl.c,v
retrieving revision 1.3
retrieving revision 1.3.4.1
diff -C2 -d -r1.3 -r1.3.4.1
*** d_ctl.c 6 Sep 2004 20:20:33 -0000 1.3
--- d_ctl.c 5 Nov 2004 13:39:55 -0000 1.3.4.1
***************
*** 10,13 ****
--- 10,16 ----
#include "math.h"
+ /* T.Grill - include SIMD functionality */
+ #include "m_simd.h"
+
/* -------------------------- sig~ ------------------------------ */
static t_class *sig_tilde_class;
***************
*** 54,57 ****
--- 57,63 ----
dsp_add(sig_tilde_perform, 3, in, out, n);
else
+ if(SIMD_CHECK1(n,out))
+ dsp_add(sig_tilde_perf_simd, 3, in, out, n);
+ else
dsp_add(sig_tilde_perf8, 3, in, out, n);
}
***************
*** 64,68 ****
static void sig_tilde_dsp(t_sig *x, t_signal **sp)
{
! dsp_add(sig_tilde_perform, 3, &x->x_f, sp[0]->s_vec, sp[0]->s_n);
}
--- 70,76 ----
static void sig_tilde_dsp(t_sig *x, t_signal **sp)
{
! /* dsp_add(sig_tilde_perform, 3, &x->x_f, sp[0]->s_vec, sp[0]->s_n); */
! /* T.Grill - use chance of unrolling */
! dsp_add_scalarcopy(&x->x_f, sp[0]->s_vec, sp[0]->s_n);
}
Index: d_array.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_array.c,v
retrieving revision 1.3
retrieving revision 1.3.4.1
diff -C2 -d -r1.3 -r1.3.4.1
*** d_array.c 6 Sep 2004 20:20:33 -0000 1.3
--- d_array.c 5 Nov 2004 13:39:55 -0000 1.3.4.1
***************
*** 8,12 ****
#include "m_pd.h"
!
/* ------------------------- tabwrite~ -------------------------- */
--- 8,12 ----
#include "m_pd.h"
! #include "m_simd.h"
/* ------------------------- tabwrite~ -------------------------- */
***************
*** 504,508 ****
#define int32 long /* a data type that has 32 bits */
#else
! #ifdef MSW
/* little-endian; most significant byte is at highest address */
#define HIOFFSET 1
--- 504,508 ----
#define int32 long /* a data type that has 32 bits */
#else
! #if defined(MSW) || defined(__CYGWIN__)
/* little-endian; most significant byte is at highest address */
#define HIOFFSET 1
***************
*** 732,735 ****
--- 732,736 ----
if (!x->x_vec) goto bad;
+ if(n&7)
while (n--)
{
***************
*** 739,742 ****
--- 740,748 ----
*dest++ = f;
}
+ else if(SIMD_CHECK2(n,in,dest))
+ testcopyvec_simd(dest,in,n);
+ else
+ testcopyvec_8(dest,in,n);
+
if (!i--)
{
***************
*** 807,810 ****
--- 813,838 ----
}
+ static t_int *tabreceive_perf8(t_int *w)
+ {
+ t_tabreceive *x = (t_tabreceive *)(w[1]);
+ t_float *from = x->x_vec;
+ if (from)
+ copyvec_8((t_float *)(w[2]),from,w[3]);
+ else
+ zerovec_8((t_float *)(w[2]),w[3]);
+ return (w+4);
+ }
+
+ static t_int *tabreceive_perfsimd(t_int *w)
+ {
+ t_tabreceive *x = (t_tabreceive *)(w[1]);
+ t_float *from = x->x_vec;
+ if(from)
+ copyvec_simd((t_float *)(w[2]),from,w[3]);
+ else
+ zerovec_simd((t_float *)(w[2]),w[3]);
+ return (w+4);
+ }
+
static void tabreceive_dsp(t_tabreceive *x, t_signal **sp)
{
***************
*** 824,828 ****
--- 852,862 ----
if (n < vecsize) vecsize = n;
garray_usedindsp(a);
+ if(vecsize&7)
dsp_add(tabreceive_perform, 3, x, sp[0]->s_vec, vecsize);
+ else if(SIMD_CHECK1(vecsize,sp[0]->s_vec))
+ /* the array is aligned in any case */
+ dsp_add(tabreceive_perfsimd, 3, x, sp[0]->s_vec, vecsize);
+ else
+ dsp_add(tabreceive_perf8, 3, x, sp[0]->s_vec, vecsize);
}
}
- Previous message: [PD-cvs] pd/src s_midi_alsa.c,NONE,1.1.2.1 s_audio_asio.cpp,NONE,1.1.4.1 m_simd.c,NONE,1.1.4.1 m_simd.h,NONE,1.1.4.1 m_simd_def.h,NONE,1.1.4.1 m_simd_sse_gcc.c,NONE,1.1.4.1 m_simd_sse_gcc.h,NONE,1.1.4.1 m_simd_sse_vc.c,NONE,1.1.4.1 m_simd_sse_vc.h,NONE,1.1.4.1 m_simd_ve_gcc.c,NONE,1.1.4.1 m_simd_ve_gcc.h,NONE,1.1.4.1
- Next message: [PD-cvs] pd/src d_fft.c,1.2,1.2.4.1 d_global.c,1.2,1.2.4.1 d_math.c,1.2,1.2.4.1 d_osc.c,1.2,1.2.4.1 d_soundfile.c,1.4,1.4.4.1 d_ugen.c,1.3,1.3.4.1 g_array.c,1.3,1.3.4.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Pd-cvs
mailing list