[PD-cvs] pd/src m_simd_sse_gcc.h,1.1.2.9,1.1.2.10 m_simd_sse_gcc.c,1.1.2.5,1.1.2.6
Tim Blechmann
timblech at users.sourceforge.net
Sat Oct 9 09:44:32 CEST 2004
- Previous message: [PD-cvs] pd/src makefile.in,1.1.1.3.2.18,1.1.1.3.2.19 configure.in,1.1.1.4.2.24,1.1.1.4.2.25 s_audio.c,1.1.1.1.2.9,1.1.1.1.2.10 s_audio_alsa.c,1.1.1.1.2.7,1.1.1.1.2.8 s_loader.c,1.1.1.2.2.2,1.1.1.2.2.3 s_main.c,1.1.1.4.2.18,1.1.1.4.2.19
- Next message: [PD-cvs] pd/src m_memory.c,1.1.1.2.2.3,1.1.1.2.2.4 m_simd.h,1.1.2.8,1.1.2.9
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31156
Modified Files:
Tag: devel_0_37
m_simd_sse_gcc.h m_simd_sse_gcc.c
Log Message:
port of thomas' sse optimized code to gcc inline assembly
Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.2.5
retrieving revision 1.1.2.6
diff -C2 -d -r1.1.2.5 -r1.1.2.6
*** m_simd_sse_gcc.c 2 Sep 2004 14:23:01 -0000 1.1.2.5
--- m_simd_sse_gcc.c 9 Oct 2004 07:44:29 -0000 1.1.2.6
***************
*** 10,123 ****
! /* TB: adapted from the xxx_sse_vc routines
! since gcc can't access the c code as vc can, the functions itself have
! been adapted to assembler */
/* zero_perf_sse_gcc (t_int * w)*/
! asm(
! ".set T_FLOAT,4 \n" /* sizeof(t_float) */
! ".set T_INT,4 \n" /* sizeof(t_int) */
! /* header */
! ".text \n"
! ".align 4 \n" /* alignment */
! ".globl zero_perf_sse_gcc \n"
! "zero_perf_sse_gcc: \n"
! /* head of function */
! "pushl %ebp \n"
! "movl %esp, %ebp \n"
! "pushl %esi \n"
! "movl %esi, -4(%ebp) \n"
! /* get arguments */
! "movl 8(%ebp), %esi \n" /* argument pointer */
! "movl T_INT(%esi), %edx \n" /* out */
! "movl 2*T_INT(%esi), %ecx \n" /* n */
! "xorps %xmm0, %xmm0 \n" /* load zero */
! "shrl $4, %ecx \n" /* divide by 16 */
! /* loop: *out=0 */
! "zpsg_loop: \n"
! "movaps %xmm0, (%edx) \n"
! "movaps %xmm0, 4*T_FLOAT(%edx) \n"
! "movaps %xmm0, 8*T_FLOAT(%edx) \n"
! "movaps %xmm0, 12*T_FLOAT(%edx) \n"
! "addl $64, %edx \n" /* out+=16 */
! "loop zpsg_loop \n"
! /* return w+3; */
! "movl -4(%ebp), %esi \n"
! "movl 8(%ebp), %eax \n"
! "addl $12, %eax \n"
! "movl %ebp, %esp \n"
! "popl %ebp \n"
! "ret \n"
! ".align 4 \n" /* alignment */
! ".type zero_perf_sse_gcc, @function \n"
! );
- /* copy_perf_sse_gcc (t_int * w)*/
- asm(
- ".set T_FLOAT,4 \n"
- ".set T_INT,4 \n"
! /* header */
! ".text \n"
! ".align 4 \n" /* alignment */
! ".globl copy_perf_sse_gcc \n"
! "copy_perf_sse_gcc: \n"
! /* head of function */
! "pushl %ebp \n"
! "movl %esp, %ebp \n"
! "subl $8, %esp \n"
! "movl %ebx, -4(%ebp) \n"
! "movl %esi, -8(%ebp) \n"
! /* get arguments */
! "movl 8(%ebp), %esi \n" /* argument pointer */
! "movl 1*T_INT(%esi), %ebx \n" /* in1 */
! "movl 2*T_INT(%esi), %edx \n" /* out */
! "movl 3*T_INT(%esi), %ecx \n" /* n */
! "shrl $4, %ecx \n" /* divide by 16 */
! /* loop: *out = *in */
! "cpsg_loop: \n"
! "movaps (%ebx), %xmm0 \n"
! "movaps %xmm0, (%edx) \n"
! "movaps 4*T_FLOAT(%ebx), %xmm1 \n"
! "movaps %xmm1, 4*T_FLOAT(%edx) \n"
! "movaps 8*T_FLOAT(%ebx), %xmm2 \n"
! "movaps %xmm2, 8*T_FLOAT(%edx) \n"
! "movaps 12*T_FLOAT(%ebx), %xmm3 \n"
! "movaps %xmm3, 12*T_FLOAT(%edx) \n"
! "addl $64, %ebx \n" /* in1 +=16 */
! "addl $64, %edx \n" /* out +=16 */
! "loop cpsg_loop \n"
! /* return w+4; */
! "movl -4(%ebp), %ebx \n"
! "movl -8(%ebp), %esi \n"
! "movl 8(%ebp), %eax \n"
! "addl $16, %eax \n"
! "movl %ebp, %esp \n"
! "popl %ebp \n"
! "ret \n"
! ".align 4 \n" /* alignment */
! ".type copy_perf_sse_gcc, @function \n"
! );
--- 10,211 ----
! /* TB: adapted from the xxx_sse_vc routines */
!
! /* dst is assumed to be aligned */
! static void zerovec_sse_gcc(t_float *dst,int n)
! {
! asm(
! ".set T_FLOAT,4 \n" /* sizeof(t_float) */
! "xorps %%xmm0, %%xmm0 \n" /* zero value */
! "shr $4, %0 \n"
!
! /* should we do more loop unrolling? */
! "1: \n"
! "movaps %%xmm0, (%1) \n"
! "movaps %%xmm0, 4*T_FLOAT(%1) \n"
! "movaps %%xmm0, 8*T_FLOAT(%1) \n"
! "movaps %%xmm0, 12*T_FLOAT(%1) \n"
!
! "addl $64,%1 \n"
! "loop 1b \n"
! :
! :"c"(n),"r"(dst)
! :"%xmm0");
! }
!
! /* dst is assumed to be aligned */
! void setvec_sse_gcc(t_float *dst,t_float v,int n)
! {
! asm(
! ".set T_FLOAT,4 \n" /* sizeof(t_float) */
! "shufps $0,%2,%2 \n" /* load value */
! "shr $4,%0 \n"
!
! /* should we do more loop unrolling? */
! "1: \n"
! "movaps %2, (%1) \n"
! "movaps %2, 4*T_FLOAT(%1) \n"
! "movaps %2, 8*T_FLOAT(%1) \n"
! "movaps %2, 12*T_FLOAT(%1) \n"
!
! "addl $64,%1 \n"
! "loop 1b \n"
! :
! :"c"(n),"r"(dst),"x"(v)
! );
! }
!
!
! /* dst and src are assumed to be aligned */
! void copyvec_sse_gcc(t_float *dst,const t_float *src,int n)
! {
! asm(
! ".set T_FLOAT,4 \n" /* sizeof(t_float) */
! "shr $4, %0 \n"
!
! /* loop: *out = *in */
! "1: \n"
! "movaps (%1), %%xmm0 \n"
! "movaps 4*T_FLOAT(%1), %%xmm1 \n"
! "movaps 8*T_FLOAT(%1), %%xmm2 \n"
! "movaps 16*T_FLOAT(%1), %%xmm3 \n"
! "movaps %%xmm0, (%2) \n"
! "movaps %%xmm1, 4*T_FLOAT(%2) \n"
! "movaps %%xmm2, 8*T_FLOAT(%2) \n"
! "movaps %%xmm3, 16*T_FLOAT(%2) \n"
!
! "addl $64,%1 \n"
! "addl $64,%2 \n"
! "loop 1b \n"
! :
! :"r"(n),"r"(src),"r"(dst)
! :"%xmm0","%xmm1","%xmm2","%xmm3","%eax");
! }
!
! t_int *zero_perf_sse_gcc(t_int *w)
! {
! zerovec_sse_gcc((t_float *)w[1],w[2]);
! return w+3;
! }
!
! t_int *copy_perf_sse_gcc(t_int *w)
! {
! copyvec_sse_gcc((t_float *)w[2],(const t_float *)w[1],w[3]);
! return w+4;
! }
!
! /* t_int *sig_tilde_perf_sse_gcc(t_int *w) */
! /* { */
! /* setvec_sse_gcc((t_float *)w[2],*(const t_float *)w[1],w[3]); */
! /* return w+4; */
! /* } */
/* zero_perf_sse_gcc (t_int * w)*/
! /* asm( */
! /* ".set T_FLOAT,4 \n" /\* sizeof(t_float) *\/ */
! /* ".set T_INT,4 \n" /\* sizeof(t_int) *\/ */
! /* /\* header *\/ */
! /* ".text \n" */
! /* ".align 4 \n" /\* alignment *\/ */
! /* ".globl zero_perf_sse_gcc \n" */
! /* "zero_perf_sse_gcc: \n" */
! /* /\* head of function *\/ */
! /* "pushl %ebp \n" */
! /* "movl %esp, %ebp \n" */
! /* "pushl %esi \n" */
! /* "movl %esi, -4(%ebp) \n" */
! /* /\* get arguments *\/ */
! /* "movl 8(%ebp), %esi \n" /\* argument pointer *\/ */
! /* "movl T_INT(%esi), %edx \n" /\* out *\/ */
! /* "movl 2*T_INT(%esi), %ecx \n" /\* n *\/ */
! /* "xorps %xmm0, %xmm0 \n" /\* load zero *\/ */
! /* "shrl $4, %ecx \n" /\* divide by 16 *\/ */
! /* /\* loop: *out=0 *\/ */
! /* "zpsg_loop: \n" */
! /* "movaps %xmm0, (%edx) \n" */
! /* "movaps %xmm0, 4*T_FLOAT(%edx) \n" */
! /* "movaps %xmm0, 8*T_FLOAT(%edx) \n" */
! /* "movaps %xmm0, 12*T_FLOAT(%edx) \n" */
! /* "addl $64, %edx \n" /\* out+=16 *\/ */
! /* "loop zpsg_loop \n" */
! /* /\* return w+3; *\/ */
! /* "movl -4(%ebp), %esi \n" */
! /* "movl 8(%ebp), %eax \n" */
! /* "addl $12, %eax \n" */
! /* "movl %ebp, %esp \n" */
! /* "popl %ebp \n" */
! /* "ret \n" */
! /* ".align 4 \n" /\* alignment *\/ */
! /* ".type zero_perf_sse_gcc, @function \n" */
! /* ); */
! /* /\* copy_perf_sse_gcc (t_int * w)*\/ */
! /* asm( */
! /* ".set T_FLOAT,4 \n" */
! /* ".set T_INT,4 \n" */
!
! /* /\* header *\/ */
! /* ".text \n" */
! /* ".align 4 \n" /\* alignment *\/ */
! /* ".globl copy_perf_sse_gcc \n" */
! /* "copy_perf_sse_gcc: \n" */
! /* /\* head of function *\/ */
! /* "pushl %ebp \n" */
! /* "movl %esp, %ebp \n" */
! /* "subl $8, %esp \n" */
! /* "movl %ebx, -4(%ebp) \n" */
! /* "movl %esi, -8(%ebp) \n" */
! /* /\* get arguments *\/ */
! /* "movl 8(%ebp), %esi \n" /\* argument pointer *\/ */
! /* "movl 1*T_INT(%esi), %ebx \n" /\* in1 *\/ */
! /* "movl 2*T_INT(%esi), %edx \n" /\* out *\/ */
! /* "movl 3*T_INT(%esi), %ecx \n" /\* n *\/ */
! /* "shrl $4, %ecx \n" /\* divide by 16 *\/ */
! /* /\* loop: *out = *in *\/ */
! /* "cpsg_loop: \n" */
! /* "movaps (%ebx), %xmm0 \n" */
! /* "movaps %xmm0, (%edx) \n" */
! /* "movaps 4*T_FLOAT(%ebx), %xmm1 \n" */
! /* "movaps %xmm1, 4*T_FLOAT(%edx) \n" */
! /* "movaps 8*T_FLOAT(%ebx), %xmm2 \n" */
! /* "movaps %xmm2, 8*T_FLOAT(%edx) \n" */
! /* "movaps 12*T_FLOAT(%ebx), %xmm3 \n" */
! /* "movaps %xmm3, 12*T_FLOAT(%edx) \n" */
! /* "addl $64, %ebx \n" /\* in1 +=16 *\/ */
! /* "addl $64, %edx \n" /\* out +=16 *\/ */
! /* "loop cpsg_loop \n" */
! /* /\* return w+4; *\/ */
! /* "movl -4(%ebp), %ebx \n" */
! /* "movl -8(%ebp), %esi \n" */
! /* "movl 8(%ebp), %eax \n" */
! /* "addl $16, %eax \n" */
! /* "movl %ebp, %esp \n" */
! /* "popl %ebp \n" */
! /* "ret \n" */
! /* ".align 4 \n" /\* alignment *\/ */
! /* ".type copy_perf_sse_gcc, @function \n" */
! /* ); */
***************
*** 157,161 ****
"movaps %xmm0, 16(%edx) \n"
"movaps %xmm0, 32(%edx) \n"
! "movaps %xmm0, 48(%edx) \n"
"addl $64, %edx \n" /* out+=16 */
"loop sigtpsg_loop \n"
--- 245,249 ----
"movaps %xmm0, 16(%edx) \n"
"movaps %xmm0, 32(%edx) \n"
! "movaps %xmm0, 48(%edx) \n"
"addl $64, %edx \n" /* out+=16 */
"loop sigtpsg_loop \n"
Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.2.9
retrieving revision 1.1.2.10
diff -C2 -d -r1.1.2.9 -r1.1.2.10
*** m_simd_sse_gcc.h 7 Oct 2004 17:56:38 -0000 1.1.2.9
--- m_simd_sse_gcc.h 9 Oct 2004 07:44:29 -0000 1.1.2.10
***************
*** 11,16 ****
/* SIMD functions for SSE with GCC */
//void zerovec_sse_gcc(t_float *dst,int n);
! //void setvec_sse_gcc(t_float *dst,t_float v,int n);
! //void copyvec_sse_gcc(t_float *dst,const t_float *src,int n);
//void addvec_sse_gcc(t_float *dst,const t_float *src,int n);
//void testcopyvec_sse_gcc(t_float *dst,const t_float *src,int n);
--- 11,16 ----
/* SIMD functions for SSE with GCC */
//void zerovec_sse_gcc(t_float *dst,int n);
! void setvec_sse_gcc(t_float *dst,t_float v,int n);
! void copyvec_sse_gcc(t_float *dst,const t_float *src,int n);
//void addvec_sse_gcc(t_float *dst,const t_float *src,int n);
//void testcopyvec_sse_gcc(t_float *dst,const t_float *src,int n);
***************
*** 39,45 ****
! #define zerovec zerovec_8 /* SIMD not implemented */
! #define setvec setvec_8 /* SIMD not implemented */
! #define copyvec copyvec_8 /* SIMD not implemented */
#define addvec addvec_8 /* SIMD not implemented */
#define testcopyvec testcopyvec_8 /* SIMD not implemented */
--- 39,45 ----
! #define zerovec zerovec_8 //sse_gcc
! #define setvec setvec_sse_gcc
! #define copyvec copyvec_sse_gcc
#define addvec addvec_8 /* SIMD not implemented */
#define testcopyvec testcopyvec_8 /* SIMD not implemented */
- Previous message: [PD-cvs] pd/src makefile.in,1.1.1.3.2.18,1.1.1.3.2.19 configure.in,1.1.1.4.2.24,1.1.1.4.2.25 s_audio.c,1.1.1.1.2.9,1.1.1.1.2.10 s_audio_alsa.c,1.1.1.1.2.7,1.1.1.1.2.8 s_loader.c,1.1.1.2.2.2,1.1.1.2.2.3 s_main.c,1.1.1.4.2.18,1.1.1.4.2.19
- Next message: [PD-cvs] pd/src m_memory.c,1.1.1.2.2.3,1.1.1.2.2.4 m_simd.h,1.1.2.8,1.1.2.9
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Pd-cvs
mailing list