[PD-cvs] pd/src s_audio_asio.cpp,1.1.4.9,1.1.4.10 d_math.c,1.2.4.1,1.2.4.2 m_simd_sse_gcc.c,1.1.4.2,1.1.4.3 m_simd_sse_gcc.h,1.1.4.1,1.1.4.2 m_simd_sse_vc.c,1.1.4.1,1.1.4.2 m_simd_sse_vc.h,1.1.4.1,1.1.4.2
Tim Blechmann
timblech at users.sourceforge.net
Wed Nov 10 22:53:05 CET 2004
Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8421
Modified Files:
Tag: devel_0_38
s_audio_asio.cpp d_math.c m_simd_sse_gcc.c m_simd_sse_gcc.h
m_simd_sse_vc.c m_simd_sse_vc.h
Log Message:
simd optimized clipping before sending audio vector to asio driver
Index: s_audio_asio.cpp
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/s_audio_asio.cpp,v
retrieving revision 1.1.4.9
retrieving revision 1.1.4.10
diff -C2 -d -r1.1.4.9 -r1.1.4.10
*** s_audio_asio.cpp 10 Nov 2004 13:47:09 -0000 1.1.4.9
--- s_audio_asio.cpp 10 Nov 2004 21:53:02 -0000 1.1.4.10
***************
*** 158,161 ****
--- 158,165 ----
#define MAXNDEV 20
+ /* for clipping: */
+ extern "C" t_int *clip_perform(t_int *w);
+
+
/* open asio interface */
/* todo: some more error messages */
***************
*** 417,421 ****
// the DMA buffers would be played past ASIOStop
// -> clear output buffers and notify driver
! #if 0
if(asio_ringbuffer)
{
--- 421,425 ----
// the DMA buffers would be played past ASIOStop
// -> clear output buffers and notify driver
! #if 0
if(asio_ringbuffer)
{
***************
*** 429,433 ****
pthread_cond_wait(&asio_ringbuf_cond, &asio_ringbuf_mutex);
}
! #else
// direct method - clear both hardware buffers
if(asio_bufferinfo && asio_out_samplewidth)
--- 433,437 ----
pthread_cond_wait(&asio_ringbuf_cond, &asio_ringbuf_mutex);
}
! #else
// direct method - clear both hardware buffers
if(asio_bufferinfo && asio_out_samplewidth)
***************
*** 441,445 ****
// notify driver
status = ASIOOutputReady();
! #endif
}
--- 445,449 ----
// notify driver
status = ASIOOutputReady();
! #endif
}
***************
*** 521,527 ****
#endif
! /* called on every dac~ send
! * todo:
! * - function pointer to avoid segfaults */
int asio_send_dacs(void)
{
--- 525,529 ----
#endif
! /* called on every dac~ send */
int asio_send_dacs(void)
{
***************
*** 544,548 ****
for (i = 0; i < sys_outchannels; i++)
{
! copyblock(asio_ringbuffer[i] + asio_ringbuffer_inoffset, sp);
zeroblock(sp);
sp+=sys_dacblocksize;
--- 546,560 ----
for (i = 0; i < sys_outchannels; i++)
{
! /* clipping here, we are sure, we can use simd instructions */
! t_int lo = -1;
! t_int hi = 1;
! t_int clipargs[6];
! clipargs[1] = (t_int)sp;
! clipargs[2] = (t_int)(asio_ringbuffer[i] + asio_ringbuffer_inoffset);
! clipargs[3] = (t_int)&lo;
! clipargs[4] = (t_int)&hi;
! clipargs[5] = (t_int)DEFDACBLKSIZE;
!
! clip_perf_simd(clipargs);
zeroblock(sp);
sp+=sys_dacblocksize;
***************
*** 955,959 ****
{
// TODO: do some dithering!!
- // TODO: how about clipping?
float o = *(in++) * SCALE_INT16;
--- 967,970 ----
***************
*** 986,991 ****
while (frames--)
{
- // TODO: how about clipping?
-
float o = *(in++) * SCALE_INT24;
#ifdef __GNUC__
--- 997,1000 ----
***************
*** 1017,1022 ****
while (frames--)
{
- // TODO: how about clipping?
-
float o = (float)*(in++) * SCALE_INT32;
#ifdef __GNUC__
--- 1026,1029 ----
***************
*** 1049,1054 ****
{
// TODO: do some dithering!!
- // TODO: how about clipping?
-
float o = (float)*(in++) * SCALE_INT16;
#ifdef __GNUC__
--- 1056,1059 ----
***************
*** 1080,1085 ****
while (frames--)
{
- // TODO: how about clipping?
-
float o = (float)*(in++) * SCALE_INT24;
#ifdef __GNUC__
--- 1085,1088 ----
***************
*** 1121,1126 ****
while (frames--)
{
- // TODO: how about clipping?
-
float o = (float)*(in++) * SCALE_INT32;
#ifdef __GNUC__
--- 1124,1127 ----
Index: m_simd_sse_vc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.c,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_sse_vc.c 5 Nov 2004 13:33:19 -0000 1.1.4.1
--- m_simd_sse_vc.c 10 Nov 2004 21:53:02 -0000 1.1.4.2
***************
*** 835,838 ****
--- 835,895 ----
}
+ /* tb: fast clipping */
+ t_int *clip_perf_simd(t_int *w)
+ {
+ __asm {
+ mov esi,dword ptr [w]
+
+ mov ebx,dword ptr [esi + 1*TYPE t_int] /* in */
+ /* prefetcht0 [ebx] */
+ mov edx,dword ptr [esi + 2*TYPE t_int] /* out */
+
+ /* load value ... this is not very clean.. */
+ mov eax,dword ptr [esi + 3*TYPE t_int] /* lo */
+ movss xmm0,xmmword ptr [eax]
+ shufps xmm0,xmm0,0
+
+ mov eax,dword ptr [esi + 4*TYPE t_int] /* hi */
+ movss xmm1,xmmword ptr [eax]
+ shufps xmm1,xmm1,0
+
+ mov ecx,dword ptr [esi + 4*TYPE t_int] /* n */
+ shr ecx,4
+
+ /* prefetcht0 [ebx+8*TYPE t_float] */
+
+ loopa:
+ /* prefetcht0 [ebx+16*TYPE t_float] */
+
+ movaps xmm2,xmmword ptr[ebx]
+ maxps xmm2,xmm0
+ minps xmm2,xmm1
+ movaps xmmword ptr[edx],xmm2
+
+ movaps xmm3,xmmword ptr[ebx+4*TYPE t_float]
+ maxps xmm3,xmm0
+ minps xmm3,xmm1
+ movaps xmmword ptr[edx+4*TYPE t_float],xmm3
+
+ /* prefetcht0 [ebx+24*TYPE t_float] */
+
+ movaps xmm4,xmmword ptr[ebx+8*TYPE t_float]
+ maxps xmm4,xmm0
+ minps xmm4,xmm1
+ movaps xmmword ptr[edx+8*TYPE t_float],xmm4
+
+ movaps xmm5,xmmword ptr[ebx+12*TYPE t_float]
+ maxps xmm5,xmm0
+ minps xmm5,xmm1
+ movaps xmmword ptr[edx+12*TYPE t_float],xmm5
+
+ add ebx,16*TYPE t_float
+ add edx,16*TYPE t_float
+ loop loopa
+ }
+ return (w+5);
+ }
+
+
/* TB: runtime check */
int simd_runtime_check()
Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_sse_gcc.h 5 Nov 2004 13:33:19 -0000 1.1.4.1
--- m_simd_sse_gcc.h 10 Nov 2004 21:53:02 -0000 1.1.4.2
***************
*** 34,38 ****
t_int *min_perf_simd(t_int *w);
t_int *scalarmin_perf_simd(t_int *w);
- t_int *clip_perf_simd(t_int *w);
t_int *sigwrap_perf_simd(t_int *w);
t_int *sigsqrt_perf_simd(t_int *w);
--- 34,37 ----
***************
*** 40,44 ****
/* functions in d_math.c */
! #define clip_perf_simd clip_perform /* SIMD not implemented */
#define sigwrap_perf_simd sigwrap_perform /* SIMD not implemented */
#define sigsqrt_perf_simd sigsqrt_perform /* SIMD not implemented */
--- 39,44 ----
/* functions in d_math.c */
! t_int *clip_perf_simd(t_int *w);
!
#define sigwrap_perf_simd sigwrap_perform /* SIMD not implemented */
#define sigsqrt_perf_simd sigsqrt_perform /* SIMD not implemented */
Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_sse_vc.h 5 Nov 2004 13:33:19 -0000 1.1.4.1
--- m_simd_sse_vc.h 10 Nov 2004 21:53:03 -0000 1.1.4.2
***************
*** 34,38 ****
t_int *min_perf_simd(t_int *w);
t_int *scalarmin_perf_simd(t_int *w);
- t_int *clip_perf_simd(t_int *w);
t_int *sigwrap_perf_simd(t_int *w);
t_int *sigsqrt_perf_simd(t_int *w);
--- 34,37 ----
***************
*** 40,44 ****
/* functions in d_math.c */
! #define clip_perf_simd clip_perform /* SIMD not implemented */
#define sigwrap_perf_simd sigwrap_perform /* SIMD not implemented */
#define sigsqrt_perf_simd sigsqrt_perform /* SIMD not implemented */
--- 39,44 ----
/* functions in d_math.c */
! t_int *clip_perf_simd(t_int *w);
!
#define sigwrap_perf_simd sigwrap_perform /* SIMD not implemented */
#define sigsqrt_perf_simd sigsqrt_perform /* SIMD not implemented */
Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.4.2
retrieving revision 1.1.4.3
diff -C2 -d -r1.1.4.2 -r1.1.4.3
*** m_simd_sse_gcc.c 10 Nov 2004 07:26:07 -0000 1.1.4.2
--- m_simd_sse_gcc.c 10 Nov 2004 21:53:02 -0000 1.1.4.3
***************
*** 687,690 ****
--- 687,733 ----
}
+ t_int* clip_perf_simd(t_int *w)
+ {
+ asm(
+ ".set T_FLOAT,4 \n"
+
+ "shufps $0, %2, %2 \n" /* lo */
+ "shufps $0, %3, %3 \n" /* hi */
+ "shrl $4, %4 \n" /* divide by 16 */
+
+ /* loop: *out = min ( max (lo, *in), hi )*/
+ "1: \n"
+ "movaps (%0), %%xmm2 \n"
+ "maxps %2, %%xmm2 \n"
+ "minps %3, %%xmm2 \n"
+ "movaps %%xmm2, (%1) \n"
+
+ "movaps 4*T_FLOAT(%0), %%xmm3 \n"
+ "maxps %2, %%xmm3 \n"
+ "minps %3, %%xmm3 \n"
+ "movaps %%xmm3, 4*T_FLOAT(%1) \n"
+
+ "movaps 8*T_FLOAT(%0), %%xmm4 \n"
+ "maxps %2, %%xmm4 \n"
+ "minps %3, %%xmm4 \n"
+ "movaps %%xmm4, 8*T_FLOAT(%1) \n"
+
+ "movaps 12*T_FLOAT(%0), %%xmm5 \n"
+ "maxps %2, %%xmm5 \n"
+ "minps %3, %%xmm5 \n"
+ "movaps %%xmm5, 12*T_FLOAT(%1) \n"
+
+ "addl $16*T_FLOAT, %0 \n"
+ "addl $16*T_FLOAT, %1 \n"
+ "loop 1b \n"
+ :
+ /* in, out, lo, hi, n */
+ :"r"(w[1]),"r"(w[2]),"x"(*(t_float*)w[3]),"x"(*(t_float*)w[4]),"c"(w[5])
+ :"%xmm2","%xmm3","%xmm4","%xmm5"
+ );
+ return w+5;
+ }
+
+
/* TB: runtime check */
int simd_runtime_check()
Index: d_math.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_math.c,v
retrieving revision 1.2.4.1
retrieving revision 1.2.4.2
diff -C2 -d -r1.2.4.1 -r1.2.4.2
*** d_math.c 5 Nov 2004 13:55:58 -0000 1.2.4.1
--- d_math.c 10 Nov 2004 21:53:02 -0000 1.2.4.2
***************
*** 38,42 ****
/* T.Grill - changed function interface so that class pointer needn't be passed */
! static t_int *clip_perform(t_int *w)
{
t_float *in = (t_float *)(w[1]);
--- 38,42 ----
/* T.Grill - changed function interface so that class pointer needn't be passed */
! t_int *clip_perform(t_int *w)
{
t_float *in = (t_float *)(w[1]);
More information about the Pd-cvs
mailing list