[PD-cvs] pd/src s_audio_asio.cpp,1.1.4.9,1.1.4.10 d_math.c,1.2.4.1,1.2.4.2 m_simd_sse_gcc.c,1.1.4.2,1.1.4.3 m_simd_sse_gcc.h,1.1.4.1,1.1.4.2 m_simd_sse_vc.c,1.1.4.1,1.1.4.2 m_simd_sse_vc.h,1.1.4.1,1.1.4.2

Tim Blechmann timblech at users.sourceforge.net
Wed Nov 10 22:53:05 CET 2004


Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8421

Modified Files:
      Tag: devel_0_38
	s_audio_asio.cpp d_math.c m_simd_sse_gcc.c m_simd_sse_gcc.h 
	m_simd_sse_vc.c m_simd_sse_vc.h 
Log Message:
simd optimized clipping before sending audio vector to asio driver

Index: s_audio_asio.cpp
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/s_audio_asio.cpp,v
retrieving revision 1.1.4.9
retrieving revision 1.1.4.10
diff -C2 -d -r1.1.4.9 -r1.1.4.10
*** s_audio_asio.cpp	10 Nov 2004 13:47:09 -0000	1.1.4.9
--- s_audio_asio.cpp	10 Nov 2004 21:53:02 -0000	1.1.4.10
***************
*** 158,161 ****
--- 158,165 ----
  #define MAXNDEV   20
  
+ /* for clipping: */
+ extern "C" t_int *clip_perform(t_int *w);
+ 
+ 
  /* open asio interface */
  /* todo: some more error messages */
***************
*** 417,421 ****
              // the DMA buffers would be played past ASIOStop
              // -> clear output buffers and notify driver
!     #if 0
              if(asio_ringbuffer)
              {
--- 421,425 ----
              // the DMA buffers would be played past ASIOStop
              // -> clear output buffers and notify driver
! #if 0
              if(asio_ringbuffer)
              {
***************
*** 429,433 ****
  	            pthread_cond_wait(&asio_ringbuf_cond, &asio_ringbuf_mutex);  
              }
!     #else
              // direct method - clear both hardware buffers
              if(asio_bufferinfo && asio_out_samplewidth)
--- 433,437 ----
  	            pthread_cond_wait(&asio_ringbuf_cond, &asio_ringbuf_mutex);  
              }
! #else
              // direct method - clear both hardware buffers
              if(asio_bufferinfo && asio_out_samplewidth)
***************
*** 441,445 ****
              // notify driver
  		    status = ASIOOutputReady();
!     #endif
          }
  
--- 445,449 ----
              // notify driver
  		    status = ASIOOutputReady();
! #endif
          }
  
***************
*** 521,527 ****
  #endif
  
! /* called on every dac~ send
!  * todo: 
!  * - function pointer to avoid segfaults */      
  int asio_send_dacs(void)
  {
--- 525,529 ----
  #endif
  
! /* called on every dac~ send */      
  int asio_send_dacs(void)
  {
***************
*** 544,548 ****
      for (i = 0; i < sys_outchannels; i++)
  	{
! 		copyblock(asio_ringbuffer[i] + asio_ringbuffer_inoffset, sp);
         	zeroblock(sp);
          sp+=sys_dacblocksize;
--- 546,560 ----
      for (i = 0; i < sys_outchannels; i++)
  	{
! 		/* clipping here, we are sure, we can use simd instructions */
! 		t_int lo = -1;
! 		t_int hi = 1;
! 		t_int clipargs[6];
! 		clipargs[1] = (t_int)sp;
! 		clipargs[2] = (t_int)(asio_ringbuffer[i] + asio_ringbuffer_inoffset);
! 		clipargs[3] = (t_int)&lo;
! 		clipargs[4] = (t_int)&hi;
! 		clipargs[5] = (t_int)DEFDACBLKSIZE;
! 		
! 		clip_perf_simd(clipargs);
         	zeroblock(sp);
          sp+=sys_dacblocksize;
***************
*** 955,959 ****
  	{
          // TODO: do some dithering!!
-         // TODO: how about clipping?
  
  		float o = *(in++) * SCALE_INT16;
--- 967,970 ----
***************
*** 986,991 ****
  	while (frames--)
  	{
-         // TODO: how about clipping?
- 
          float o = *(in++) * SCALE_INT24;
  #ifdef __GNUC__
--- 997,1000 ----
***************
*** 1017,1022 ****
  	while (frames--)
  	{
-         // TODO: how about clipping?
- 
          float o = (float)*(in++) * SCALE_INT32;
  #ifdef __GNUC__
--- 1026,1029 ----
***************
*** 1049,1054 ****
  	{
          // TODO: do some dithering!!
-         // TODO: how about clipping?
- 
  		float o = (float)*(in++) * SCALE_INT16;
  #ifdef __GNUC__
--- 1056,1059 ----
***************
*** 1080,1085 ****
  	while (frames--)
  	{
-         // TODO: how about clipping?
- 
          float o = (float)*(in++) * SCALE_INT24;
  #ifdef __GNUC__
--- 1085,1088 ----
***************
*** 1121,1126 ****
  	while (frames--)
  	{
-         // TODO: how about clipping?
- 
          float o = (float)*(in++) * SCALE_INT32;
  #ifdef __GNUC__
--- 1124,1127 ----

Index: m_simd_sse_vc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.c,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_sse_vc.c	5 Nov 2004 13:33:19 -0000	1.1.4.1
--- m_simd_sse_vc.c	10 Nov 2004 21:53:02 -0000	1.1.4.2
***************
*** 835,838 ****
--- 835,895 ----
  }
  
+ /* tb: fast clipping */
+ t_int *clip_perf_simd(t_int *w)
+ {
+ 	__asm {
+ 		mov		esi,dword ptr [w]
+ 			
+ 		mov		ebx,dword ptr [esi + 1*TYPE t_int] /* in */
+ /*		prefetcht0 [ebx] */
+ 		mov		edx,dword ptr [esi + 2*TYPE t_int] /* out */
+ 
+ 		/* load value ... this is not very clean.. */
+ 		mov		eax,dword ptr [esi + 3*TYPE t_int] /* lo */
+ 		movss	xmm0,xmmword ptr [eax]
+ 		shufps	xmm0,xmm0,0
+ 
+ 		mov		eax,dword ptr [esi + 4*TYPE t_int] /* hi */
+ 		movss	xmm1,xmmword ptr [eax]
+ 		shufps	xmm1,xmm1,0
+ 
+ 		mov		ecx,dword ptr [esi + 4*TYPE t_int] /* n */
+ 		shr		ecx,4
+ 
+ /*		prefetcht0 [ebx+8*TYPE t_float] */
+ 
+ loopa:
+ /*		prefetcht0 [ebx+16*TYPE t_float] */
+ 
+ 		movaps	xmm2,xmmword ptr[ebx]
+ 		maxps	xmm2,xmm0
+ 		minps	xmm2,xmm1
+ 		movaps	xmmword ptr[edx],xmm2
+ 
+ 		movaps	xmm3,xmmword ptr[ebx+4*TYPE t_float]
+ 		maxps	xmm3,xmm0
+ 		minps	xmm3,xmm1
+ 		movaps	xmmword ptr[edx+4*TYPE t_float],xmm3
+ 
+ /*		prefetcht0 [ebx+24*TYPE t_float] */
+ 
+ 		movaps	xmm4,xmmword ptr[ebx+8*TYPE t_float]
+ 		maxps	xmm4,xmm0
+ 		minps	xmm4,xmm1
+ 		movaps	xmmword ptr[edx+8*TYPE t_float],xmm4
+ 
+ 		movaps	xmm5,xmmword ptr[ebx+12*TYPE t_float]
+ 		maxps	xmm5,xmm0
+ 		minps	xmm5,xmm1
+ 		movaps	xmmword ptr[edx+12*TYPE t_float],xmm5
+ 
+ 		add		ebx,16*TYPE t_float
+ 		add		edx,16*TYPE t_float
+ 		loop	loopa 
+ 	}
+     return (w+5);
+ }
+ 
+ 
  /* TB: runtime check */
  int simd_runtime_check()

Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_sse_gcc.h	5 Nov 2004 13:33:19 -0000	1.1.4.1
--- m_simd_sse_gcc.h	10 Nov 2004 21:53:02 -0000	1.1.4.2
***************
*** 34,38 ****
  t_int *min_perf_simd(t_int *w);
  t_int *scalarmin_perf_simd(t_int *w);
- t_int *clip_perf_simd(t_int *w);
  t_int *sigwrap_perf_simd(t_int *w);
  t_int *sigsqrt_perf_simd(t_int *w);
--- 34,37 ----
***************
*** 40,44 ****
  
  /* functions in d_math.c */
! #define clip_perf_simd             clip_perform     /* SIMD not implemented */
  #define sigwrap_perf_simd          sigwrap_perform  /* SIMD not implemented */
  #define sigsqrt_perf_simd          sigsqrt_perform  /* SIMD not implemented */
--- 39,44 ----
  
  /* functions in d_math.c */
! t_int *clip_perf_simd(t_int *w);
! 
  #define sigwrap_perf_simd          sigwrap_perform  /* SIMD not implemented */
  #define sigsqrt_perf_simd          sigsqrt_perform  /* SIMD not implemented */

Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.4.1
retrieving revision 1.1.4.2
diff -C2 -d -r1.1.4.1 -r1.1.4.2
*** m_simd_sse_vc.h	5 Nov 2004 13:33:19 -0000	1.1.4.1
--- m_simd_sse_vc.h	10 Nov 2004 21:53:03 -0000	1.1.4.2
***************
*** 34,38 ****
  t_int *min_perf_simd(t_int *w);
  t_int *scalarmin_perf_simd(t_int *w);
- t_int *clip_perf_simd(t_int *w);
  t_int *sigwrap_perf_simd(t_int *w);
  t_int *sigsqrt_perf_simd(t_int *w);
--- 34,37 ----
***************
*** 40,44 ****
  
  /* functions in d_math.c */
! #define clip_perf_simd          clip_perform  /* SIMD not implemented */
  #define sigwrap_perf_simd       sigwrap_perform  /* SIMD not implemented */
  #define sigsqrt_perf_simd       sigsqrt_perform  /* SIMD not implemented */
--- 39,44 ----
  
  /* functions in d_math.c */
! t_int *clip_perf_simd(t_int *w);
! 
  #define sigwrap_perf_simd       sigwrap_perform  /* SIMD not implemented */
  #define sigsqrt_perf_simd       sigsqrt_perform  /* SIMD not implemented */

Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.4.2
retrieving revision 1.1.4.3
diff -C2 -d -r1.1.4.2 -r1.1.4.3
*** m_simd_sse_gcc.c	10 Nov 2004 07:26:07 -0000	1.1.4.2
--- m_simd_sse_gcc.c	10 Nov 2004 21:53:02 -0000	1.1.4.3
***************
*** 687,690 ****
--- 687,733 ----
  }
  
+ t_int* clip_perf_simd(t_int *w)
+ {
+     asm(
+ 	".set T_FLOAT,4                            \n"
+ 	
+     "shufps    $0, %2, %2                      \n" /* lo */
+     "shufps    $0, %3, %3                      \n" /* hi */
+ 	"shrl      $4, %4                          \n" /* divide by 16 */
+ 
+ 	/* loop: *out = min ( max (lo, *in), hi )*/
+ 	"1:                                        \n"
+ 	"movaps    (%0), %%xmm2                    \n"
+ 	"maxps     %2, %%xmm2                      \n"
+ 	"minps     %3, %%xmm2                      \n"
+ 	"movaps    %%xmm2, (%1)                    \n"
+     
+ 	"movaps    4*T_FLOAT(%0), %%xmm3           \n"
+ 	"maxps     %2, %%xmm3                      \n"
+ 	"minps     %3, %%xmm3                      \n"
+ 	"movaps    %%xmm3, 4*T_FLOAT(%1)           \n"
+ 	
+ 	"movaps    8*T_FLOAT(%0), %%xmm4           \n"
+ 	"maxps     %2, %%xmm4                      \n"
+ 	"minps     %3, %%xmm4                      \n"
+ 	"movaps    %%xmm4, 8*T_FLOAT(%1)           \n"
+ 	
+ 	"movaps    12*T_FLOAT(%0), %%xmm5          \n"
+ 	"maxps     %2, %%xmm5                      \n"
+ 	"minps     %3, %%xmm5                      \n"
+ 	"movaps    %%xmm5, 12*T_FLOAT(%1)          \n"
+ 	
+ 	"addl      $16*T_FLOAT, %0                 \n"
+ 	"addl      $16*T_FLOAT, %1                 \n"
+ 	"loop      1b                              \n"
+ 	:
+ 	/* in, out, lo, hi, n */
+ 	:"r"(w[1]),"r"(w[2]),"x"(*(t_float*)w[3]),"x"(*(t_float*)w[4]),"c"(w[5])
+ 	:"%xmm2","%xmm3","%xmm4","%xmm5"
+ 	);
+     return w+5;
+ }
+ 
+ 
  /* TB: runtime check */
  int simd_runtime_check()

Index: d_math.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/d_math.c,v
retrieving revision 1.2.4.1
retrieving revision 1.2.4.2
diff -C2 -d -r1.2.4.1 -r1.2.4.2
*** d_math.c	5 Nov 2004 13:55:58 -0000	1.2.4.1
--- d_math.c	10 Nov 2004 21:53:02 -0000	1.2.4.2
***************
*** 38,42 ****
  
  /* T.Grill - changed function interface so that class pointer needn't be passed */
! static t_int *clip_perform(t_int *w)
  {
      t_float *in = (t_float *)(w[1]);
--- 38,42 ----
  
  /* T.Grill - changed function interface so that class pointer needn't be passed */
! t_int *clip_perform(t_int *w)
  {
      t_float *in = (t_float *)(w[1]);





More information about the Pd-cvs mailing list