[PD-cvs] externals/grill/flext/source flsimd.cpp,1.11,1.12
Thomas Grill
xovo at users.sourceforge.net
Fri Apr 2 04:21:22 CEST 2004
Update of /cvsroot/pure-data/externals/grill/flext/source
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5343/source
Modified Files:
flsimd.cpp
Log Message:
""
Index: flsimd.cpp
===================================================================
RCS file: /cvsroot/pure-data/externals/grill/flext/source/flsimd.cpp,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** flsimd.cpp 30 Dec 2003 03:32:46 -0000 1.11
--- flsimd.cpp 2 Apr 2004 02:21:20 -0000 1.12
***************
*** 507,510 ****
--- 507,660 ----
}
+ #if defined(FLEXT_USE_SIMD) && FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
+ // because of some frame code Altivec stuff should be in seperate functions....
+
+ static const vector float zero = (vector float)(0);
+
+ static void SetAltivec(t_sample *dst,int cnt,t_sample s)
+ {
+ vector float svec = LoadValue(s);
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ while(n--) {
+ vec_st(svec,0,dst);
+ vec_st(svec,16,dst);
+ vec_st(svec,32,dst);
+ vec_st(svec,48,dst);
+ dst += 16;
+ }
+
+ while(cnt--) *(dst++) = s;
+ }
+
+ static void MulAltivec(t_sample *dst,const t_sample *src,t_sample op,int cnt)
+ {
+ const vector float arg = LoadValue(op);
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ for(; n--; src += 16,dst += 16) {
+ vector float a1 = vec_ld( 0,src);
+ vector float a2 = vec_ld(16,src);
+ vector float a3 = vec_ld(32,src);
+ vector float a4 = vec_ld(48,src);
+
+ a1 = vec_madd(a1,arg,zero);
+ a2 = vec_madd(a2,arg,zero);
+ a3 = vec_madd(a3,arg,zero);
+ a4 = vec_madd(a4,arg,zero);
+
+ vec_st(a1, 0,dst);
+ vec_st(a2,16,dst);
+ vec_st(a3,32,dst);
+ vec_st(a4,48,dst);
+ }
+
+ while(cnt--) *(dst++) = *(src++)*op;
+ }
+
+ static void MulAltivec(t_sample *dst,const t_sample *src,const t_sample *op,int cnt)
+ {
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ for(; n--; src += 16,op += 16,dst += 16) {
+ vector float a1 = vec_ld( 0,src),b1 = vec_ld( 0,op);
+ vector float a2 = vec_ld(16,src),b2 = vec_ld(16,op);
+ vector float a3 = vec_ld(32,src),b3 = vec_ld(32,op);
+ vector float a4 = vec_ld(48,src),b4 = vec_ld(48,op);
+
+ a1 = vec_madd(a1,b1,zero);
+ a2 = vec_madd(a2,b2,zero);
+ a3 = vec_madd(a3,b3,zero);
+ a4 = vec_madd(a4,b4,zero);
+
+ vec_st(a1, 0,dst);
+ vec_st(a2,16,dst);
+ vec_st(a3,32,dst);
+ vec_st(a4,48,dst);
+ }
+ while(cnt--) *(dst++) = *(src++) * *(op++);
+ }
+
+ static void AddAltivec(t_sample *dst,const t_sample *src,t_sample op,int cnt)
+ {
+ const vector float arg = LoadValue(op);
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ for(; n--; src += 16,dst += 16) {
+ vector float a1 = vec_ld( 0,src);
+ vector float a2 = vec_ld(16,src);
+ vector float a3 = vec_ld(32,src);
+ vector float a4 = vec_ld(48,src);
+
+ a1 = vec_add(a1,arg);
+ a2 = vec_add(a2,arg);
+ a3 = vec_add(a3,arg);
+ a4 = vec_add(a4,arg);
+
+ vec_st(a1, 0,dst);
+ vec_st(a2,16,dst);
+ vec_st(a3,32,dst);
+ vec_st(a4,48,dst);
+ }
+
+ while(cnt--) *(dst++) = *(src++)+op;
+ }
+
+ static void AddAltivec(t_sample *dst,const t_sample *src,const t_sample *op,int cnt)
+ {
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ for(; n--; src += 16,op += 16,dst += 16) {
+ vector float a1 = vec_ld( 0,src),b1 = vec_ld( 0,op);
+ vector float a2 = vec_ld(16,src),b2 = vec_ld(16,op);
+ vector float a3 = vec_ld(32,src),b3 = vec_ld(32,op);
+ vector float a4 = vec_ld(48,src),b4 = vec_ld(48,op);
+
+ a1 = vec_add(a1,b1);
+ a2 = vec_add(a2,b2);
+ a3 = vec_add(a3,b3);
+ a4 = vec_add(a4,b4);
+
+ vec_st(a1, 0,dst);
+ vec_st(a2,16,dst);
+ vec_st(a3,32,dst);
+ vec_st(a4,48,dst);
+ }
+ while(cnt--) *(dst++) = *(src++) + *(op++);
+ }
+
+ static void ScaleAltivec(t_sample *dst,const t_sample *src,t_sample opmul,t_sample opadd,int cnt)
+ {
+ const vector float argmul = LoadValue(opmul);
+ const vector float argadd = LoadValue(opadd);
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ for(; n--; src += 16,dst += 16) {
+ vector float a1 = vec_ld( 0,src);
+ vector float a2 = vec_ld(16,src);
+ vector float a3 = vec_ld(32,src);
+ vector float a4 = vec_ld(48,src);
+
+ a1 = vec_madd(a1,argmul,argadd);
+ a2 = vec_madd(a2,argmul,argadd);
+ a3 = vec_madd(a3,argmul,argadd);
+ a4 = vec_madd(a4,argmul,argadd);
+
+ vec_st(a1, 0,dst);
+ vec_st(a2,16,dst);
+ vec_st(a3,32,dst);
+ vec_st(a4,48,dst);
+ }
+
+ while(cnt--) *(dst++) = *(src++)*opmul+opadd;
+ }
+ #endif
+
void flext::SetSamples(t_sample *dst,int cnt,t_sample s)
{
***************
*** 564,582 ****
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(dst)) {
! vector float svec = LoadValue(s);
! int n = cnt>>4;
! cnt -= n<<4;
!
! while(n--) {
! vec_st(svec,0,dst);
! vec_st(svec,16,dst);
! vec_st(svec,32,dst);
! vec_st(svec,48,dst);
! dst += 16;
! }
!
! while(cnt--) *(dst++) = s;
! }
else
#endif
--- 714,719 ----
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(dst))
! SetAltivec(dst,cnt,s);
else
#endif
***************
*** 699,727 ****
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst)) {
! const vector float arg = LoadValue(op);
! const vector float zero = (vector float)(0);
! int n = cnt>>4;
! cnt -= n<<4;
!
! for(; n--; src += 16,dst += 16) {
! vector float a1 = vec_ld( 0,src);
! vector float a2 = vec_ld(16,src);
! vector float a3 = vec_ld(32,src);
! vector float a4 = vec_ld(48,src);
!
! a1 = vec_madd(a1,arg,zero);
! a2 = vec_madd(a2,arg,zero);
! a3 = vec_madd(a3,arg,zero);
! a4 = vec_madd(a4,arg,zero);
!
! vec_st(a1, 0,dst);
! vec_st(a2,16,dst);
! vec_st(a3,32,dst);
! vec_st(a4,48,dst);
! }
!
! while(cnt--) *(dst++) = *(src++)*op;
! }
else
#endif // _MSC_VER
--- 836,841 ----
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst))
! MulAltivec(dst,src,op,cnt);
else
#endif // _MSC_VER
***************
*** 949,975 ****
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(op) && IsVectorAligned(dst)) {
! const vector float zero = (vector float)(0);
! int n = cnt>>4;
! cnt -= n<<4;
!
! for(; n--; src += 16,op += 16,dst += 16) {
! vector float a1 = vec_ld( 0,src),b1 = vec_ld( 0,op);
! vector float a2 = vec_ld(16,src),b2 = vec_ld(16,op);
! vector float a3 = vec_ld(32,src),b3 = vec_ld(32,op);
! vector float a4 = vec_ld(48,src),b4 = vec_ld(48,op);
!
! a1 = vec_madd(a1,b1,zero);
! a2 = vec_madd(a2,b2,zero);
! a3 = vec_madd(a3,b3,zero);
! a4 = vec_madd(a4,b4,zero);
!
! vec_st(a1, 0,dst);
! vec_st(a2,16,dst);
! vec_st(a3,32,dst);
! vec_st(a4,48,dst);
! }
! while(cnt--) *(dst++) = *(src++) * *(op++);
! }
else
#endif // _MSC_VER
--- 1063,1068 ----
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(op) && IsVectorAligned(dst))
! MulAltivec(dst,src,op,cnt);
else
#endif // _MSC_VER
***************
*** 1100,1127 ****
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst)) {
! const vector float arg = LoadValue(op);
! int n = cnt>>4;
! cnt -= n<<4;
!
! for(; n--; src += 16,dst += 16) {
! vector float a1 = vec_ld( 0,src);
! vector float a2 = vec_ld(16,src);
! vector float a3 = vec_ld(32,src);
! vector float a4 = vec_ld(48,src);
!
! a1 = vec_add(a1,arg);
! a2 = vec_add(a2,arg);
! a3 = vec_add(a3,arg);
! a4 = vec_add(a4,arg);
!
! vec_st(a1, 0,dst);
! vec_st(a2,16,dst);
! vec_st(a3,32,dst);
! vec_st(a4,48,dst);
! }
!
! while(cnt--) *(dst++) = *(src++)+op;
! }
else
#endif // _MSC_VER
--- 1193,1198 ----
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst))
! AddAltivec(dst,src,op,cnt);
else
#endif // _MSC_VER
***************
*** 1350,1375 ****
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(op) && IsVectorAligned(dst)) {
! int n = cnt>>4;
! cnt -= n<<4;
!
! for(; n--; src += 16,op += 16,dst += 16) {
! vector float a1 = vec_ld( 0,src),b1 = vec_ld( 0,op);
! vector float a2 = vec_ld(16,src),b2 = vec_ld(16,op);
! vector float a3 = vec_ld(32,src),b3 = vec_ld(32,op);
! vector float a4 = vec_ld(48,src),b4 = vec_ld(48,op);
!
! a1 = vec_add(a1,b1);
! a2 = vec_add(a2,b2);
! a3 = vec_add(a3,b3);
! a4 = vec_add(a4,b4);
!
! vec_st(a1, 0,dst);
! vec_st(a2,16,dst);
! vec_st(a3,32,dst);
! vec_st(a4,48,dst);
! }
! while(cnt--) *(dst++) = *(src++) + *(op++);
! }
else
#endif // _MSC_VER
--- 1421,1426 ----
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(op) && IsVectorAligned(dst))
! AddAltivec(dst,src,op,cnt);
else
#endif // _MSC_VER
***************
*** 1512,1540 ****
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst)) {
! const vector float argmul = LoadValue(opmul);
! const vector float argadd = LoadValue(opadd);
! int n = cnt>>4;
! cnt -= n<<4;
!
! for(; n--; src += 16,dst += 16) {
! vector float a1 = vec_ld( 0,src);
! vector float a2 = vec_ld(16,src);
! vector float a3 = vec_ld(32,src);
! vector float a4 = vec_ld(48,src);
!
! a1 = vec_madd(a1,argmul,argadd);
! a2 = vec_madd(a2,argmul,argadd);
! a3 = vec_madd(a3,argmul,argadd);
! a4 = vec_madd(a4,argmul,argadd);
!
! vec_st(a1, 0,dst);
! vec_st(a2,16,dst);
! vec_st(a3,32,dst);
! vec_st(a4,48,dst);
! }
!
! while(cnt--) *(dst++) = *(src++)*opmul+opadd;
! }
else
#endif // _MSC_VER
--- 1563,1568 ----
else
#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__)
! if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst))
! ScaleAltivec(dst,src,opmul,opadd,cnt);
else
#endif // _MSC_VER
More information about the Pd-cvs
mailing list