[PD-cvs] pd/src m_simd_sse_gcc.c,1.1.2.6,1.1.2.7 m_simd_sse_gcc.h,1.1.2.10,1.1.2.11

Tim Blechmann timblech at users.sourceforge.net
Sat Oct 9 13:56:18 CEST 2004


Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14242

Modified Files:
      Tag: devel_0_37
	m_simd_sse_gcc.c m_simd_sse_gcc.h 
Log Message:
further update for sse / gcc

Index: m_simd_sse_gcc.c
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.c,v
retrieving revision 1.1.2.6
retrieving revision 1.1.2.7
diff -C2 -d -r1.1.2.6 -r1.1.2.7
*** m_simd_sse_gcc.c	9 Oct 2004 07:44:29 -0000	1.1.2.6
--- m_simd_sse_gcc.c	9 Oct 2004 11:56:16 -0000	1.1.2.7
***************
*** 13,17 ****
  
  /* dst is assumed to be aligned */
! static void zerovec_sse_gcc(t_float *dst,int n)
  {
      asm(
--- 13,17 ----
  
  /* dst is assumed to be aligned */
! void zerovec_sse_gcc(t_float *dst,int n)
  {
      asm(
***************
*** 21,25 ****
  
  	/* should we do more loop unrolling? */
! 	"1:                                       \n"
  	"movaps    %%xmm0, (%1)                    \n"
  	"movaps    %%xmm0, 4*T_FLOAT(%1)           \n"
--- 21,26 ----
  
  	/* should we do more loop unrolling? */
! 	/* *dst = 0 */
! 	"1:                                        \n"
  	"movaps    %%xmm0, (%1)                    \n"
  	"movaps    %%xmm0, 4*T_FLOAT(%1)           \n"
***************
*** 27,31 ****
  	"movaps    %%xmm0, 12*T_FLOAT(%1)          \n"
  
! 	"addl      $64,%1                          \n"
  	"loop      1b                              \n"
  	:
--- 28,32 ----
  	"movaps    %%xmm0, 12*T_FLOAT(%1)          \n"
  
! 	"addl      $16*T_FLOAT,%1                  \n"
  	"loop      1b                              \n"
  	:
***************
*** 43,56 ****
  
  	/* should we do more loop unrolling? */
  	"1:                                        \n"
! 	"movaps    %2, (%1)                    \n"
! 	"movaps    %2, 4*T_FLOAT(%1)           \n"
! 	"movaps    %2, 8*T_FLOAT(%1)           \n"
! 	"movaps    %2, 12*T_FLOAT(%1)          \n"
  
! 	"addl      $64,%1                          \n"
  	"loop      1b                              \n"
  	:
! 	:"c"(n),"r"(dst),"x"(v)
  	);
  }
--- 44,58 ----
  
  	/* should we do more loop unrolling? */
+ 	/* *dst = v */
  	"1:                                        \n"
! 	"movaps    %2, (%1)                        \n"
! 	"movaps    %2, 4*T_FLOAT(%1)               \n"
! 	"movaps    %2, 8*T_FLOAT(%1)               \n"
! 	"movaps    %2, 12*T_FLOAT(%1)              \n"
  
! 	"addl      $16*T_FLOAT,%1                  \n"
  	"loop      1b                              \n"
  	:
! 	:"c"(n),"r"(dst),"x"((t_float)v)
  	);
  }
***************
*** 64,84 ****
  	"shr       $4, %0                          \n"
  
! 	/* loop: *out = *in */
  	"1:                                        \n"
  	"movaps    (%1), %%xmm0                    \n"
  	"movaps    4*T_FLOAT(%1), %%xmm1           \n"
  	"movaps    8*T_FLOAT(%1), %%xmm2           \n"
! 	"movaps    16*T_FLOAT(%1), %%xmm3          \n"
  	"movaps    %%xmm0, (%2)                    \n"
  	"movaps    %%xmm1, 4*T_FLOAT(%2)           \n"
  	"movaps    %%xmm2, 8*T_FLOAT(%2)           \n"
! 	"movaps    %%xmm3, 16*T_FLOAT(%2)          \n"
  
! 	"addl      $64,%1                          \n"
! 	"addl      $64,%2                          \n"
  	"loop      1b                              \n"
  	:
! 	:"r"(n),"r"(src),"r"(dst)
! 	:"%xmm0","%xmm1","%xmm2","%xmm3","%eax");
  }
  
--- 66,123 ----
  	"shr       $4, %0                          \n"
  
! 	/* loop: *dst = *src */
  	"1:                                        \n"
  	"movaps    (%1), %%xmm0                    \n"
  	"movaps    4*T_FLOAT(%1), %%xmm1           \n"
  	"movaps    8*T_FLOAT(%1), %%xmm2           \n"
! 	"movaps    12*T_FLOAT(%1), %%xmm3          \n"
  	"movaps    %%xmm0, (%2)                    \n"
  	"movaps    %%xmm1, 4*T_FLOAT(%2)           \n"
  	"movaps    %%xmm2, 8*T_FLOAT(%2)           \n"
! 	"movaps    %%xmm3, 12*T_FLOAT(%2)          \n"
  
! 	"addl      $16*T_FLOAT,%1                  \n"
! 	"addl      $16*T_FLOAT,%2                  \n"
  	"loop      1b                              \n"
  	:
! 	:"c"(n),"r"(src),"r"(dst)
! 	:"%xmm0","%xmm1","%xmm2","%xmm3");
! }
! 
! 
! /* dst and src are assumed to be aligned */
! void addvec_sse_gcc(t_float *dst,const t_float *src,int n)
! {
!     asm(
! 	".set T_FLOAT,4                            \n" /* sizeof(t_float) */
! 	"shr       $4, %0                          \n"
! 
! 	/* loop: *dst += *src */
! 	"1:                                        \n"
! 	"movaps    (%2,%3),%%xmm0                  \n"
! 	"movaps    (%1,%3),%%xmm1                  \n"
! 	"addps     %%xmm0,%%xmm1                   \n"
! 	"movaps    %%xmm0,(%2,%3)                  \n"
! 
! 	"movaps    4*T_FLOAT(%2,%3),%%xmm0         \n"
! 	"movaps    4*T_FLOAT(%1,%3),%%xmm1         \n"
! 	"addps     %%xmm0,%%xmm1                   \n"
! 	"movaps    %%xmm0,4*T_FLOAT(%2,%3)         \n"
! 
! 	"movaps    8*T_FLOAT(%2,%3),%%xmm0         \n"
! 	"movaps    8*T_FLOAT(%1,%3),%%xmm1         \n"
! 	"addps     %%xmm0,%%xmm1                   \n"
! 	"movaps    %%xmm0,8*T_FLOAT(%2,%3)         \n"
! 
! 	"movaps    12*T_FLOAT(%2,%3),%%xmm0        \n"
! 	"movaps    12*T_FLOAT(%1,%3),%%xmm1        \n"
! 	"addps     %%xmm0,%%xmm1                   \n"
! 	"movaps    %%xmm0,12*T_FLOAT(%2,%3)        \n"
! 
! 	"addl      $16*T_FLOAT,%3                  \n"
! 	"loop      1b                              \n"
! 	:
! 	: "c"(n),"r"(src),"r"(dst),"r"(0)
! 	: "%xmm0","%xmm1","%xmm2","%xmm3","%xmm4","%xmm5","%xmm6","%xmm7");
  }
  
***************
*** 95,157 ****
  }
  
! /* t_int *sig_tilde_perf_sse_gcc(t_int *w) */
! /* { */
! /*     setvec_sse_gcc((t_float *)w[2],*(const t_float *)w[1],w[3]); */
! /*     return w+4; */
! /* } */
! 
! /* zero_perf_sse_gcc (t_int * w)*/
! /* asm( */
! /*     ".set T_FLOAT,4                               \n" /\* sizeof(t_float) *\/ */
! /*     ".set T_INT,4                                 \n" /\* sizeof(t_int)   *\/ */
! 
! /*     /\* header *\/ */
! /*     ".text                                        \n" */
! /*     ".align    4                                  \n" /\* alignment *\/ */
  
- /*     ".globl    zero_perf_sse_gcc                  \n" */
- /*     "zero_perf_sse_gcc:                           \n" */
-     
- /*     /\* head of function *\/ */
- /*     "pushl     %ebp                               \n" */
- /*     "movl      %esp, %ebp                         \n" */
- /*     "pushl     %esi                               \n" */
- /*     "movl      %esi, -4(%ebp)                     \n" */
  
! /*     /\* get arguments *\/ */
! /*     "movl      8(%ebp), %esi                      \n" /\* argument pointer *\/ */
! /*     "movl      T_INT(%esi), %edx                  \n" /\* out *\/ */
! /*     "movl      2*T_INT(%esi), %ecx                \n" /\* n *\/ */
! 	 
! /*     "xorps     %xmm0, %xmm0                       \n" /\* load zero *\/ */
! /*     "shrl      $4, %ecx                           \n" /\* divide by 16 *\/ */
!     
! /*     /\* loop: *out=0 *\/ */
! /*     "zpsg_loop:                                   \n" */
! /*     "movaps    %xmm0, (%edx)                      \n" */
! /*     "movaps    %xmm0, 4*T_FLOAT(%edx)             \n" */
! /*     "movaps    %xmm0, 8*T_FLOAT(%edx)             \n" */
! /*     "movaps    %xmm0, 12*T_FLOAT(%edx)            \n" */
!     
! /*     "addl      $64, %edx                          \n" /\* out+=16 *\/ */
! /*     "loop      zpsg_loop                          \n" */
!     
      
! /*     /\* return w+3; *\/ */
! /*     "movl      -4(%ebp), %esi                     \n" */
! /*     "movl      8(%ebp), %eax                      \n" */
! /*     "addl      $12, %eax                          \n" */
! /*     "movl      %ebp, %esp                         \n" */
! /*     "popl      %ebp                               \n" */
! /*     "ret                                          \n" */
  
  
! /*     ".align    4                                  \n" /\* alignment *\/ */
! /*     ".type     zero_perf_sse_gcc, @function       \n" */
  
! /*     ); */
  
  
! /* /\* copy_perf_sse_gcc (t_int * w)*\/ */
  /* asm( */
  /*     ".set T_FLOAT,4                               \n" */
--- 134,222 ----
  }
  
! t_int *sig_tilde_perf_sse_gcc(t_int *w)
! {
!     setvec_sse_gcc((t_float *)w[2],*(const t_float *)w[1],w[3]);
!     return w+4;
! }
  
  
! t_int * plus_perf_sse_gcc (t_int * w)
! {
!     asm(
! 	".set T_FLOAT,4                            \n"
! 	
! 	"shrl      $4, %3                          \n" /* divide by 16 */
      
! 	/* loop: *out = *in1 + *in2 */
! 	"1:                                        \n"
! 	"movaps    (%0,%4), %%xmm0                 \n"
! 	"movaps    (%1,%4), %%xmm1                 \n"
! 	"addps     %%xmm1, %%xmm0                  \n"
! 	"movaps    %%xmm0, (%2,%4)                 \n"
! 	
! 	"movaps    4*T_FLOAT(%0,%4), %%xmm2        \n"
! 	"movaps    4*T_FLOAT(%1,%4), %%xmm3        \n"
! 	"addps     %%xmm3, %%xmm2                  \n"
! 	"movaps    %%xmm2, 4*T_FLOAT(%2,%4)        \n"
! 	
! 	"movaps    8*T_FLOAT(%0,%4), %%xmm4        \n"
! 	"movaps    8*T_FLOAT(%1,%4), %%xmm5        \n"
! 	"addps     %%xmm5, %%xmm4                  \n"
! 	"movaps    %%xmm4, 8*T_FLOAT(%2,%4)        \n"
! 	
! 	"movaps    12*T_FLOAT(%0,%4), %%xmm6       \n"
! 	"movaps    12*T_FLOAT(%1,%4), %%xmm7       \n"
! 	"addps     %%xmm7, %%xmm6                  \n"
! 	"movaps    %%xmm6, 12*T_FLOAT(%2,%4)       \n"
! 	
! 	"addl      $16*T_FLOAT, %4                 \n"
! 	"loop      1b                              \n"
! 	:
! 	/* in1, in2, out, n */
! 	:"r"(w[1]),"r"(w[2]),"r"(w[3]),"c"(w[4]),"r"(0)
! 	);
!     return w+5;
! }
  
  
! t_int *scalarplus_perf_sse_gcc(t_int *w)
! {
!     asm(
! 	".set T_FLOAT,4                            \n"
! 	
!         "shufps    $0, %1, %1                      \n"
! 	"shrl      $4, %3                          \n" /* divide by 16 */
  
! 	/* loop: *out = *in + value */
! 	"1:                                        \n"
! 	"movaps    (%0), %%xmm1                    \n"
! 	"addps     %1, %%xmm1                      \n"
! 	"movaps    %%xmm1, (%2)                    \n"
!     
! 	"movaps    4*T_FLOAT(%0), %%xmm2           \n"
! 	"addps     %1, %%xmm2                      \n"
! 	"movaps    %%xmm2, 4*T_FLOAT(%2)           \n"
! 	
! 	"movaps    8*T_FLOAT(%0), %%xmm3           \n"
! 	"addps     %1, %%xmm3                      \n"
! 	"movaps    %%xmm3, 8*T_FLOAT(%2)           \n"
! 	
! 	"movaps    12*T_FLOAT(%0), %%xmm4          \n"
! 	"addps     %1, %%xmm4                      \n"
! 	"movaps    %%xmm4, 12*T_FLOAT(%2)          \n"
! 	
! 	"addl      $16*T_FLOAT, %0                 \n"
! 	"addl      $16*T_FLOAT, %2                 \n"
! 	"loop      1b                              \n"
! 	:
! 	/* in, value, out, n */
! 	:"r"(w[1]),"x"((t_float)w[2]),"r"(w[3]),"c"(w[4])
! 	:"%xmm1","%xmm2","%xmm3","%xmm4"
! 	);
!     return w+5;
! }
  
  
! /* /\* scalarplus_perf_sse_gcc(t_int *w) *\/ */
  /* asm( */
  /*     ".set T_FLOAT,4                               \n" */
***************
*** 162,167 ****
  /*     ".align    4                                  \n" /\* alignment *\/ */
      
! /*     ".globl    copy_perf_sse_gcc                  \n" */
! /*     "copy_perf_sse_gcc:                           \n" */
      
  /*     /\* head of function *\/ */
--- 227,232 ----
  /*     ".align    4                                  \n" /\* alignment *\/ */
      
! /*     ".globl    scalarplus_perf_sse_gcc            \n" */
! /*     "scalarplus_perf_sse_gcc:                     \n" */
      
  /*     /\* head of function *\/ */
***************
*** 174,402 ****
  /*     /\* get arguments *\/ */
  /*     "movl      8(%ebp), %esi                      \n" /\* argument pointer *\/ */
! /*     "movl      1*T_INT(%esi), %ebx                \n" /\* in1 *\/ */
! /*     "movl      2*T_INT(%esi), %edx                \n" /\* out *\/ */
! /*     "movl      3*T_INT(%esi), %ecx                \n" /\* n *\/ */
  /*     "shrl      $4, %ecx                           \n" /\* divide by 16 *\/ */
  
! /*     /\* loop: *out = *in *\/ */
! /*     "cpsg_loop:                                   \n" */
! /*     "movaps    (%ebx), %xmm0                      \n" */
! /*     "movaps    %xmm0, (%edx)                      \n" */
! /*     "movaps    4*T_FLOAT(%ebx), %xmm1             \n" */
! /*     "movaps    %xmm1, 4*T_FLOAT(%edx)             \n" */
      
! /*     "movaps    8*T_FLOAT(%ebx), %xmm2             \n" */
! /*     "movaps    %xmm2, 8*T_FLOAT(%edx)             \n" */
! /*     "movaps    12*T_FLOAT(%ebx), %xmm3            \n" */
! /*     "movaps    %xmm3, 12*T_FLOAT(%edx)            \n" */
!      
! /*     "addl      $64, %ebx                          \n" /\* in1 +=16 *\/ */
! /*     "addl      $64, %edx                          \n" /\* out +=16 *\/ */
! /*     "loop      cpsg_loop                          \n" */
      
! /*     /\* return w+4; *\/ */
  /*     "movl      -4(%ebp), %ebx                     \n" */
  /*     "movl      -8(%ebp), %esi                     \n" */
  /*     "movl      8(%ebp), %eax                      \n" */
! /*     "addl      $16, %eax                          \n" */
  /*     "movl      %ebp, %esp                         \n" */
  /*     "popl      %ebp                               \n" */
  /*     "ret                                          \n" */
  
- 
  /*     ".align    4                                  \n" /\* alignment *\/ */
! /*     ".type     copy_perf_sse_gcc, @function       \n" */
! /*     ); */
! 
! 
! /* sig_tilde_perf_sse_gcc(t_int * w) */
! asm(
!     ".set T_FLOAT,4                               \n"
!     ".set T_INT,4                                 \n"
!     
!     /* header */
!     ".text                                        \n"
!     ".align    4                                  \n" /* alignment */
!     
!     ".globl    sig_tilde_perf_sse_gcc             \n"
!     "sig_tilde_perf_sse_gcc:                      \n"
!     
!     /* head of function */
!     "pushl     %ebp                               \n"
!     "movl      %esp, %ebp                         \n"
!     "pushl     %esi                               \n"
!     "movl      %esi, -4(%ebp)                     \n"
! 
!     /* get arguments */
!     "movl      8(%ebp), %esi                      \n" /* argument pointer */
!     "movl      2*T_INT(%esi), %edx                \n" /* out */
!     "movl      1*T_INT(%esi), %eax                \n" /* f */
!     "movl      3*T_INT(%esi), %ecx                \n" /* n */
! 
!     /* set registers to f */
!     "movss     (%eax), %xmm0                      \n"
!     "shufps    $0, %xmm0, %xmm0                   \n"
!     "movl      12(%esi), %ecx                     \n"
!     "shrl      $4, %ecx                           \n"  /* divide by 16 */
!     
!     /* loop: *out = f */
!     "sigtpsg_loop:                                \n"
!     "movaps    %xmm0, (%edx)                      \n"
!     "movaps    %xmm0, 16(%edx)                    \n"
!     "movaps    %xmm0, 32(%edx)                    \n"
!     "movaps    %xmm0, 48(%edx)                    \n"
!     "addl      $64, %edx                          \n" /* out+=16 */
!     "loop      sigtpsg_loop                       \n"
! 
!     /* return w+4; */
!     "movl      -4(%ebp), %esi                     \n"
!     "movl      8(%ebp), %eax                      \n"
!     "addl      $16, %eax                          \n"
!     "movl      %ebp, %esp                         \n"
!     "popl      %ebp                               \n"
!     "ret                                          \n"
! 
!     ".align    4                                  \n" /* alignment */
!     ".type     sig_tilde_perf_sse_gcc, @function  \n"
! 
!     );
!     
! 
! 
! /* plus_perf_sse_gcc (t_int * w)*/
! asm(
!     ".set T_FLOAT,4                               \n"
!     ".set T_INT,4                                 \n"
! 
!     /* header */
!     ".text                                        \n"
!     ".align    4                                  \n" /* alignment */
!     
!     ".globl    plus_perf_sse_gcc                  \n"
!     "plus_perf_sse_gcc:                           \n"
!     
!     /* head of function */
!     "pushl     %ebp                               \n"
!     "movl      %esp, %ebp                         \n"
!     "subl      $8, %esp                           \n"
!     "movl      %ebx, -4(%ebp)                     \n"
!     "movl      %esi, -8(%ebp)                     \n"
! 
!     /* get arguments */
!     "movl      8(%ebp), %esi                      \n" /* argument vector */
!     
!     "movl      1*T_INT(%esi), %eax                \n" /* in1 */
!     "movl      2*T_INT(%esi), %ebx                \n" /* in2 */
!     "movl      3*T_INT(%esi), %edx                \n" /* out */
!     "movl      4*T_INT(%esi), %ecx                \n" /* n */
!     "shrl      $4, %ecx                           \n" /* divide by 16 */
!     "xorl      %esi, %esi                         \n" /* reset index */
!     
!     /* loop: *out = *in1 + *in2 */
!     "ppsg_loop:                                   \n"
!     "movaps    (%eax,%esi), %xmm0                 \n"
!     "movaps    (%ebx,%esi), %xmm1                 \n"
!     "addps     %xmm1, %xmm0                       \n"
!     "movaps    %xmm0, (%edx,%esi)                 \n"
!     
!     "movaps    4*T_FLOAT(%eax,%esi), %xmm2        \n"
!     "movaps    4*T_FLOAT(%ebx,%esi), %xmm3        \n"
!     "addps     %xmm3, %xmm2                       \n"
!     "movaps    %xmm2, 4*T_FLOAT(%edx,%esi)        \n"
!     
!     "movaps    8*T_FLOAT(%eax,%esi), %xmm4        \n"
!     "movaps    8*T_FLOAT(%ebx,%esi), %xmm5        \n"
!     "addps     %xmm5, %xmm4                       \n"
!     "movaps    %xmm4, 8*T_FLOAT(%edx,%esi)        \n"
!     
!     "movaps    12*T_FLOAT(%eax,%esi), %xmm6       \n"
!     "movaps    12*T_FLOAT(%ebx,%esi), %xmm7       \n"
!     "addps     %xmm7, %xmm6                       \n"
!     "movaps    %xmm6, 12*T_FLOAT(%edx,%esi)       \n"
! 
!     "addl      $64, %esi                          \n" /* out+=16; */
!     "loop      ppsg_loop                          \n"
! 
!     /* return w+5; */
!     "movl      -4(%ebp), %ebx                     \n"
!     "movl      -8(%ebp), %esi                     \n"
!     "movl      8(%ebp), %eax                      \n"
!     "addl      $20, %eax                          \n"
!     "movl      %ebp, %esp                         \n"
!     "popl      %ebp                               \n"
!     "ret                                          \n"
! 
! 
!     ".align    4                                  \n" /* alignment */
!     ".type     plus_perf_sse_gcc, @function       \n"
! 
!     );
! 
! /* scalarplus_perf_sse_gcc(t_int *w) */
! asm(
!     ".set T_FLOAT,4                               \n"
!     ".set T_INT,4                                 \n"
! 
!     /* header */
!     ".text                                        \n"
!     ".align    4                                  \n" /* alignment */
!     
!     ".globl    scalarplus_perf_sse_gcc            \n"
!     "scalarplus_perf_sse_gcc:                     \n"
!     
!     /* head of function */
!     "pushl     %ebp                               \n"
!     "movl      %esp, %ebp                         \n"
!     "subl      $8, %esp                           \n"
!     "movl      %ebx, -4(%ebp)                     \n"
!     "movl      %esi, -8(%ebp)                     \n"
! 
!     /* get arguments */
!     "movl      8(%ebp), %esi                      \n" /* argument pointer */
!     "movl      T_INT(%esi), %ebx                  \n" /* in */
!     "movl      3*T_INT(%esi), %edx                \n" /* out */
!     "movl      2*T_INT(%esi), %eax                \n" /* value */
!     "movl      4*T_INT(%esi), %ecx                \n" /* n */
!     
!     "movss     (%eax), %xmm0                      \n"
!     "shufps    $0, %xmm0, %xmm0                   \n"
!     "shrl      $4, %ecx                           \n" /* divide by 16 */
! 
!     /* loop: *out = *in + value */
!     "sppsg_loop:                                  \n"
!     
!     "movaps    (%ebx), %xmm1                      \n"
!     "addps     %xmm0, %xmm1                       \n"
!     "movaps    %xmm1, (%edx)                      \n"
!     
!     "movaps    4*T_FLOAT(%ebx), %xmm2             \n"
!     "addps     %xmm0, %xmm2                       \n"
!     "movaps    %xmm2, 4*T_FLOAT(%edx)             \n"
! 
!     "movaps    8*T_FLOAT(%ebx), %xmm3             \n"
!     "addps     %xmm0, %xmm3                       \n"
!     "movaps    %xmm3, 8*T_FLOAT(%edx)             \n"
! 
!     "movaps    12*T_FLOAT(%ebx), %xmm4            \n"
!     "addps     %xmm0, %xmm4                       \n"
!     "movaps    %xmm4, 12*T_FLOAT(%edx)            \n"
! 
!     "addl      $64, %ebx                          \n" /* in += 16 */
!     "addl      $64, %edx                          \n" /* out += 16 */
!     "loop      sppsg_loop                         \n"
! 
!     /* return w+5; */
!     "movl      -4(%ebp), %ebx                     \n"
!     "movl      -8(%ebp), %esi                     \n"
!     "movl      8(%ebp), %eax                      \n"
!     "addl      $20, %eax                          \n"
!     "movl      %ebp, %esp                         \n"
!     "popl      %ebp                               \n"
!     "ret                                          \n"
! 
!     ".align    4                                  \n" /* alignment */
!     ".type     scalarplus_perf_sse_gcc, @function \n"
  
!     );
  
  
--- 239,287 ----
  /*     /\* get arguments *\/ */
  /*     "movl      8(%ebp), %esi                      \n" /\* argument pointer *\/ */
! /*     "movl      T_INT(%esi), %ebx                  \n" /\* in *\/ */
! /*     "movl      3*T_INT(%esi), %edx                \n" /\* out *\/ */
! /*     "movl      2*T_INT(%esi), %eax                \n" /\* value *\/ */
! /*     "movl      4*T_INT(%esi), %ecx                \n" /\* n *\/ */
!     
! /*     "movss     (%eax), %xmm0                      \n" */
! /*     "shufps    $0, %xmm0, %xmm0                   \n" */
  /*     "shrl      $4, %ecx                           \n" /\* divide by 16 *\/ */
  
! /*     /\* loop: *out = *in + value *\/ */
! /*     "sppsg_loop:                                  \n" */
      
! /*     "movaps    (%ebx), %xmm1                      \n" */
! /*     "addps     %xmm0, %xmm1                       \n" */
! /*     "movaps    %xmm1, (%edx)                      \n" */
      
! /*     "movaps    4*T_FLOAT(%ebx), %xmm2             \n" */
! /*     "addps     %xmm0, %xmm2                       \n" */
! /*     "movaps    %xmm2, 4*T_FLOAT(%edx)             \n" */
! 
! /*     "movaps    8*T_FLOAT(%ebx), %xmm3             \n" */
! /*     "addps     %xmm0, %xmm3                       \n" */
! /*     "movaps    %xmm3, 8*T_FLOAT(%edx)             \n" */
! 
! /*     "movaps    12*T_FLOAT(%ebx), %xmm4            \n" */
! /*     "addps     %xmm0, %xmm4                       \n" */
! /*     "movaps    %xmm4, 12*T_FLOAT(%edx)            \n" */
! 
! /*     "addl      $64, %ebx                          \n" /\* in += 16 *\/ */
! /*     "addl      $64, %edx                          \n" /\* out += 16 *\/ */
! /*     "loop      sppsg_loop                         \n" */
! 
! /*     /\* return w+5; *\/ */
  /*     "movl      -4(%ebp), %ebx                     \n" */
  /*     "movl      -8(%ebp), %esi                     \n" */
  /*     "movl      8(%ebp), %eax                      \n" */
! /*     "addl      $20, %eax                          \n" */
  /*     "movl      %ebp, %esp                         \n" */
  /*     "popl      %ebp                               \n" */
  /*     "ret                                          \n" */
  
  /*     ".align    4                                  \n" /\* alignment *\/ */
! /*     ".type     scalarplus_perf_sse_gcc, @function \n" */
  
! /*     ); */
  
  

Index: m_simd_sse_gcc.h
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/m_simd_sse_gcc.h,v
retrieving revision 1.1.2.10
retrieving revision 1.1.2.11
diff -C2 -d -r1.1.2.10 -r1.1.2.11
*** m_simd_sse_gcc.h	9 Oct 2004 07:44:29 -0000	1.1.2.10
--- m_simd_sse_gcc.h	9 Oct 2004 11:56:16 -0000	1.1.2.11
***************
*** 10,14 ****
  
  /* SIMD functions for SSE with GCC */
! //void zerovec_sse_gcc(t_float *dst,int n);
  void setvec_sse_gcc(t_float *dst,t_float v,int n);
  void copyvec_sse_gcc(t_float *dst,const t_float *src,int n);
--- 10,14 ----
  
  /* SIMD functions for SSE with GCC */
! void zerovec_sse_gcc(t_float *dst,int n);
  void setvec_sse_gcc(t_float *dst,t_float v,int n);
  void copyvec_sse_gcc(t_float *dst,const t_float *src,int n);
***************
*** 39,46 ****
  
  
! #define zerovec                 zerovec_8 //sse_gcc
  #define setvec                  setvec_sse_gcc
  #define copyvec                 copyvec_sse_gcc
! #define addvec                  addvec_8 /* SIMD not implemented */
  #define testcopyvec             testcopyvec_8 /* SIMD not implemented */
  #define testaddvec              testaddvec_8 /* SIMD not implemented */
--- 39,46 ----
  
  
! #define zerovec                 zerovec_sse_gcc
  #define setvec                  setvec_sse_gcc
  #define copyvec                 copyvec_sse_gcc
! #define addvec                  addvec_sse_gcc
  #define testcopyvec             testcopyvec_8 /* SIMD not implemented */
  #define testaddvec              testaddvec_8 /* SIMD not implemented */





More information about the Pd-cvs mailing list