[PD-cvs] pd/src m_simd_sse_gcc.c,1.1.4.6,1.1.4.7 m_simd_sse_gcc.h,1.1.4.2,1.1.4.3 m_simd_sse_vc.h,1.1.4.2,1.1.4.3 m_simd_sse_vc.c,1.1.4.4,1.1.4.5

Tim Blechmann timblech at users.sourceforge.net
Mon Nov 29 19:11:40 CET 2004


Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17176

Modified Files:
      Tag: devel_0_38
	m_simd_sse_gcc.c m_simd_sse_gcc.h m_simd_sse_vc.h 
	m_simd_sse_vc.c 
Log Message:
sqrt and rsqrt sse code

Index: m_simd_sse_vc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.h,v
retrieving revision 1.1.4.2
retrieving revision 1.1.4.3
diff -C2 -d -r1.1.4.2 -r1.1.4.3
*** m_simd_sse_vc.h	10 Nov 2004 21:53:03 -0000	1.1.4.2
--- m_simd_sse_vc.h	29 Nov 2004 18:11:38 -0000	1.1.4.3
***************
*** 35,47 ****
  t_int *scalarmin_perf_simd(t_int *w);
  t_int *sigwrap_perf_simd(t_int *w);
- t_int *sigsqrt_perf_simd(t_int *w);
- t_int *sigrsqrt_perf_simd(t_int *w);
  
  /* functions in d_math.c */
  t_int *clip_perf_simd(t_int *w);
  
  #define sigwrap_perf_simd       sigwrap_perform  /* SIMD not implemented */
! #define sigsqrt_perf_simd       sigsqrt_perform  /* SIMD not implemented */
! #define sigrsqrt_perf_simd      sigrsqrt_perform /* SIMD not implemented */
  
  #endif /* __M_SIMD_SSE_VC_H */
--- 35,46 ----
  t_int *scalarmin_perf_simd(t_int *w);
  t_int *sigwrap_perf_simd(t_int *w);
  
  /* functions in d_math.c */
  t_int *clip_perf_simd(t_int *w);
+ t_int *sigsqrt_perf_simd(t_int *w);
+ t_int *sigrsqrt_perf_simd(t_int *w);
  
  #define sigwrap_perf_simd       sigwrap_perform  /* SIMD not implemented */
! 
  
  #endif /* __M_SIMD_SSE_VC_H */

Index: m_simd_sse_vc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_vc.c,v
retrieving revision 1.1.4.4
retrieving revision 1.1.4.5
diff -C2 -d -r1.1.4.4 -r1.1.4.5
*** m_simd_sse_vc.c	18 Nov 2004 22:25:06 -0000	1.1.4.4
--- m_simd_sse_vc.c	29 Nov 2004 18:11:38 -0000	1.1.4.5
***************
*** 835,839 ****
  }
  
! /* tb: fast clipping */
  t_int *clip_perf_simd(t_int *w)
  {
--- 835,840 ----
  }
  
! /* tb {
! /* fast clipping */
  t_int *clip_perf_simd(t_int *w)
  {
***************
*** 892,896 ****
  
  
! /* TB: runtime check */
  int simd_runtime_check()
  {
--- 893,983 ----
  
  
! t_int *sigsqrt_perf_simd(t_int *w)
! {
! 	__asm {
! 		mov		esi,dword ptr [w]
! 
! 		mov		ebx,dword ptr [esi + 1*TYPE t_int] /* in */
! /*      prefetcht0 [ebx] prefetch first cache line */
! 		mov		edx,dword ptr [esi + 2*TYPE t_int] /* out */
! 
! 		mov		ecx,dword ptr [esi + 3*TYPE t_int] /* n */
! 		shr		ecx,4
! 
! /*		prefetcht0 [ebx+8*TYPE t_float] */
! 
! loopa:
! /*		prefetcht0 [ebx+16*TYPE t_float] */
! 
! 		movaps	xmm0,xmmword ptr[ebx]
! 		sqrtps	xmm0,xmm0
! 		movaps	xmmword ptr[edx],xmm0
! 
! 		movaps	xmm1,xmmword ptr[ebx+4*TYPE t_float]
! 		sqrtps	xmm1,xmm1
! 		movaps	xmmword ptr[edx+4*TYPE t_float],xmm1
! 
! /*		prefetcht0 [ebx+24*TYPE t_float] */
! 
! 		movaps	xmm2,xmmword ptr[ebx+8*TYPE t_float]
! 		sqrtps	xmm2,xmm2
! 		movaps	xmmword ptr[edx+8*TYPE t_float],xmm2
! 
! 		movaps	xmm3,xmmword ptr[ebx+12*TYPE t_float]
! 		sqrtps	xmm3,xmm3
! 		movaps	xmmword ptr[edx+12*TYPE t_float],xmm3
! 
! 		add		ebx,16*TYPE t_float
! 		add		edx,16*TYPE t_float
! 		loop	loopa 
! 	}
!     return (w+4);
! }
! 
! 
! t_int *sigrsqrt_perf_simd(t_int *w)
! {
! 	__asm {
! 		mov		esi,dword ptr [w]
! 
! 		mov		ebx,dword ptr [esi + 1*TYPE t_int] /* in */
! /*      prefetcht0 [ebx] prefetch first cache line */
! 		mov		edx,dword ptr [esi + 2*TYPE t_int] /* out */
! 
! 		mov		ecx,dword ptr [esi + 3*TYPE t_int] /* n */
! 		shr		ecx,4
! 
! /*		prefetcht0 [ebx+8*TYPE t_float] */
! 
! loopa:
! /*		prefetcht0 [ebx+16*TYPE t_float] */
! 
! 		movaps	xmm0,xmmword ptr[ebx]
! 		rsqrtps	xmm0,xmm0
! 		movaps	xmmword ptr[edx],xmm0
! 
! 		movaps	xmm1,xmmword ptr[ebx+4*TYPE t_float]
! 		rsqrtps	xmm1,xmm1
! 		movaps	xmmword ptr[edx+4*TYPE t_float],xmm1
! 
! /*		prefetcht0 [ebx+24*TYPE t_float] */
! 
! 		movaps	xmm2,xmmword ptr[ebx+8*TYPE t_float]
! 		rsqrtps	xmm2,xmm2
! 		movaps	xmmword ptr[edx+8*TYPE t_float],xmm2
! 
! 		movaps	xmm3,xmmword ptr[ebx+12*TYPE t_float]
! 		rsqrtps	xmm3,xmm3
! 		movaps	xmmword ptr[edx+12*TYPE t_float],xmm3
! 
! 		add		ebx,16*TYPE t_float
! 		add		edx,16*TYPE t_float
! 		loop	loopa 
! 	}
!     return (w+4);
! }
! 
! 
! /* runtime check */
  int simd_runtime_check()
  {
***************
*** 904,907 ****
--- 991,995 ----
      return (0x2000000 & redx);
  }
+ /* } tb */
  
  #endif

Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.4.6
retrieving revision 1.1.4.7
diff -C2 -d -r1.1.4.6 -r1.1.4.7
*** m_simd_sse_gcc.c	24 Nov 2004 14:31:11 -0000	1.1.4.6
--- m_simd_sse_gcc.c	29 Nov 2004 18:11:34 -0000	1.1.4.7
***************
*** 731,734 ****
--- 731,808 ----
  
  
+ t_int* sigsqrt_perf_simd(t_int *w)
+ {
+     asm(
+ 	".set T_FLOAT,4                            \n"
+ 	
+ 	"shrl      $4, %2                          \n" /* divide by 16 */
+ 
+ 	/* loop: *out = sqrt(*in)  */
+ 	"1:                                        \n"
+ 	"movaps    (%0), %%xmm0                    \n"
+ 	"sqrtps    %%xmm0, %%xmm0                  \n"
+ 	"movaps    %%xmm0, (%1)                    \n"
+     
+ 	"movaps    4*T_FLOAT(%0), %%xmm1           \n"
+ 	"sqrtps    %%xmm1, %%xmm1                  \n"
+ 	"movaps    %%xmm1, 4*T_FLOAT(%1)           \n"
+ 	
+ 	"movaps    8*T_FLOAT(%0), %%xmm2           \n"
+ 	"sqrtps    %%xmm2, %%xmm2                  \n"
+ 	"movaps    %%xmm2, 8*T_FLOAT(%1)           \n"
+ 	
+ 	"movaps    12*T_FLOAT(%0), %%xmm3          \n"
+ 	"sqrtps    %%xmm3, %%xmm3                  \n"
+ 	"movaps    %%xmm3, 12*T_FLOAT(%1)          \n"
+ 	
+ 	"addl      $16*T_FLOAT, %0                 \n"
+ 	"addl      $16*T_FLOAT, %1                 \n"
+ 	"loop      1b                              \n"
+ 	:
+ 	/* in, out, n */
+ 	:"r"(w[1]),"r"(w[2]),"c"(w[3])
+ 	:"%xmm0","%xmm1","%xmm2","%xmm3"
+ 	);
+     return w+4;
+ }
+ 
+ 
+ t_int* sigrsqrt_perf_simd(t_int *w)
+ {
+     asm(
+ 	".set T_FLOAT,4                            \n"
+ 	
+ 	"shrl      $4, %2                          \n" /* divide by 16 */
+ 
+ 	/* loop: *out = sqrt(*in)  */
+ 	"1:                                        \n"
+ 	"movaps    (%0), %%xmm0                    \n"
+ 	"rsqrtps    %%xmm0, %%xmm0                  \n"
+ 	"movaps    %%xmm0, (%1)                    \n"
+     
+ 	"movaps    4*T_FLOAT(%0), %%xmm1           \n"
+ 	"rsqrtps    %%xmm1, %%xmm1                  \n"
+ 	"movaps    %%xmm1, 4*T_FLOAT(%1)           \n"
+ 	
+ 	"movaps    8*T_FLOAT(%0), %%xmm2           \n"
+ 	"rsqrtps    %%xmm2, %%xmm2                  \n"
+ 	"movaps    %%xmm2, 8*T_FLOAT(%1)           \n"
+ 	
+ 	"movaps    12*T_FLOAT(%0), %%xmm3          \n"
+ 	"rsqrtps    %%xmm3, %%xmm3                  \n"
+ 	"movaps    %%xmm3, 12*T_FLOAT(%1)          \n"
+ 	
+ 	"addl      $16*T_FLOAT, %0                 \n"
+ 	"addl      $16*T_FLOAT, %1                 \n"
+ 	"loop      1b                              \n"
+ 	:
+ 	/* in, out, n */
+ 	:"r"(w[1]),"r"(w[2]),"c"(w[3])
+ 	:"%xmm0","%xmm1","%xmm2","%xmm3"
+ 	);
+     return w+4;
+ }
+ 
+ 
  /* TB: runtime check */
  int simd_runtime_check()

Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.4.2
retrieving revision 1.1.4.3
diff -C2 -d -r1.1.4.2 -r1.1.4.3
*** m_simd_sse_gcc.h	10 Nov 2004 21:53:02 -0000	1.1.4.2
--- m_simd_sse_gcc.h	29 Nov 2004 18:11:38 -0000	1.1.4.3
***************
*** 34,47 ****
  t_int *min_perf_simd(t_int *w);
  t_int *scalarmin_perf_simd(t_int *w);
- t_int *sigwrap_perf_simd(t_int *w);
- t_int *sigsqrt_perf_simd(t_int *w);
- t_int *sigrsqrt_perf_simd(t_int *w);
  
  /* functions in d_math.c */
  t_int *clip_perf_simd(t_int *w);
  
  #define sigwrap_perf_simd          sigwrap_perform  /* SIMD not implemented */
- #define sigsqrt_perf_simd          sigsqrt_perform  /* SIMD not implemented */
- #define sigrsqrt_perf_simd         sigrsqrt_perform /* SIMD not implemented */
  
  #endif /* __M_SIMD_SSE_GCC_H */
--- 34,45 ----
  t_int *min_perf_simd(t_int *w);
  t_int *scalarmin_perf_simd(t_int *w);
  
  /* functions in d_math.c */
  t_int *clip_perf_simd(t_int *w);
+ t_int *sigwrap_perf_simd(t_int *w);
+ t_int *sigsqrt_perf_simd(t_int *w);
+ t_int *sigrsqrt_perf_simd(t_int *w);
  
  #define sigwrap_perf_simd          sigwrap_perform  /* SIMD not implemented */
  
  #endif /* __M_SIMD_SSE_GCC_H */





More information about the Pd-cvs mailing list