[PD-cvs] pd/src s_audio_asio.cpp,1.1.4.4,1.1.4.5

Thomas Grill xovo at users.sourceforge.net
Sun Nov 7 18:01:44 CET 2004


Update of /cvsroot/pure-data/pd/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3643/src

Modified Files:
      Tag: devel_0_38
	s_audio_asio.cpp 
Log Message:
aligned buffers and SIMD copying/zeroing
more code cleanups

Index: s_audio_asio.cpp
===================================================================
RCS file: /cvsroot/pure-data/pd/src/Attic/s_audio_asio.cpp,v
retrieving revision 1.1.4.4
retrieving revision 1.1.4.5
diff -C2 -d -r1.1.4.4 -r1.1.4.5
*** s_audio_asio.cpp	7 Nov 2004 13:47:05 -0000	1.1.4.4
--- s_audio_asio.cpp	7 Nov 2004 17:01:34 -0000	1.1.4.5
***************
*** 23,26 ****
--- 23,27 ----
  extern "C" {
  #include "s_stuff.h"
+ #include "m_simd.h"
  }
  
***************
*** 82,89 ****
  static void Int32tofloat32_S(void* inbuffer, void* outbuffer, long frames);
  
- /* Swap LSB to MSB and vice versa */
- #define SWAPLONG(v) ((((v)>>24)&0xFF)|(((v)>>8)&0xFF00)|(((v)&0xFF00)<<8)|(((v)&0xFF)<<24))
- #define SWAPSHORT(v) ((((v)>>8)&0xFF)|(((v)&0xFF)<<8))
- 
  typedef void converter_t(void* inbuffer, void* outbuffer, long frames);
  
--- 83,86 ----
***************
*** 358,365 ****
  
  	/* calculate ringbuffer length */
- 	asio_ringbuffer_length = asio_bufsize * DEFDACBLKSIZE;
- 	
  	/* a strange way to find the least common multiple, 
  	 * but works, since DEFDACBLKSIZE (expt 2 x)        */
  	while ( !(asio_ringbuffer_length % DEFDACBLKSIZE) && 
  			!(asio_ringbuffer_length % asio_bufsize))
--- 355,362 ----
  
  	/* calculate ringbuffer length */
  	/* a strange way to find the least common multiple, 
  	 * but works, since DEFDACBLKSIZE (expt 2 x)        */
+ 
+ 	asio_ringbuffer_length = asio_bufsize * DEFDACBLKSIZE;
  	while ( !(asio_ringbuffer_length % DEFDACBLKSIZE) && 
  			!(asio_ringbuffer_length % asio_bufsize))
***************
*** 378,382 ****
  	for (i = 0; i != channels; ++i)
  	{	
! 		asio_ringbuffer[i] = (t_sample*)getbytes(asio_ringbuffer_length * sizeof (t_sample));
  		if (!asio_ringbuffer[i])
  			error("ASIO: couldn't allocate ASIO ringbuffer");
--- 375,379 ----
  	for (i = 0; i != channels; ++i)
  	{	
! 		asio_ringbuffer[i] = (t_sample*)getalignedbytes(asio_ringbuffer_length * sizeof (t_sample));
  		if (!asio_ringbuffer[i])
  			error("ASIO: couldn't allocate ASIO ringbuffer");
***************
*** 384,397 ****
  	}
  	
! 
! 	/* initialize ringbuffer stuff */
  	asio_ringbuffer_inoffset = asio_ringbuffer_outoffset = 0;
  
  	if (ASIOStart() == ASE_OK)
  		post("ASIO: started");
  	else
  		post("ASIO: couldn't start");
- 	
- 	return;
  }
  
--- 381,392 ----
  	}
  	
! 	/* initialize ringbuffer pointers */
  	asio_ringbuffer_inoffset = asio_ringbuffer_outoffset = 0;
  
+ 
  	if (ASIOStart() == ASE_OK)
  		post("ASIO: started");
  	else
  		post("ASIO: couldn't start");
  }
  
***************
*** 409,418 ****
      if(asio_useoutputready) 
      {
!         // the DMA buffers would be playing past ASIOStop
          // -> clear output buffers and notify driver
  #if 0
!         // slow method
  	    for(i = 0; i != sys_outchannels; ++i)
! 		    memset(asio_ringbuffer[i], 0, asio_ringbuffer_length * sizeof (t_sample));
          // wait for bufferswitch to process silence (twice)
  	    pthread_cond_wait(&asio_ringbuf_cond, &asio_ringbuf_mutex);  
--- 404,413 ----
      if(asio_useoutputready) 
      {
!         // the DMA buffers would be played past ASIOStop
          // -> clear output buffers and notify driver
  #if 0
!         // slow, blocking method
  	    for(i = 0; i != sys_outchannels; ++i)
! 		    zerovec_simd(asio_ringbuffer[i], asio_ringbuffer_length);
          // wait for bufferswitch to process silence (twice)
  	    pthread_cond_wait(&asio_ringbuf_cond, &asio_ringbuf_mutex);  
***************
*** 442,446 ****
  		
  		for (i = 0; i != channels; i++)
! 			freebytes(asio_ringbuffer[i], 
  					  asio_ringbuffer_length * sizeof (t_sample));
  		freebytes(asio_ringbuffer, channels * sizeof (t_sample *));
--- 437,441 ----
  		
  		for (i = 0; i != channels; i++)
! 			freealignedbytes(asio_ringbuffer[i], 
  					  asio_ringbuffer_length * sizeof (t_sample));
  		freebytes(asio_ringbuffer, channels * sizeof (t_sample *));
***************
*** 468,472 ****
  		asio_driver = NULL;
  	}
- 	return;
  }
  
--- 463,466 ----
***************
*** 489,497 ****
  }
  
! 
  
  /* called on every dac~ send
   * todo: 
-  * - use vectorized functions 
   * - function pointer to avoid segfaults */      
  int asio_send_dacs(void)
--- 483,496 ----
  }
  
! #if SIMD_CHKCNT(DEFDACBLKSIZE)
! inline void copyblock(t_sample *dst,t_sample *src) { copyvec_simd(dst,src,DEFDACBLKSIZE); }
! inline void zeroblock(t_sample *dst) { zerovec_simd(dst,DEFDACBLKSIZE); }
! #else
! inline void copyblock(t_sample *dst,t_sample *src) { memcpy(dst,src,DEFDACBLKSIZE*sizeof(t_sample)); }
! inline void zeroblock(t_sample *dst) { memset(dst,0,DEFDACBLKSIZE*sizeof(t_sample)); }
! #endif
  
  /* called on every dac~ send
   * todo: 
   * - function pointer to avoid segfaults */      
  int asio_send_dacs(void)
***************
*** 511,521 ****
  	/* send sound to ringbuffer */
  	sp = sys_soundout;
! 	for (i = 0; i < sys_outchannels; i++)
  	{
! 		memcpy(asio_ringbuffer[i] + asio_ringbuffer_inoffset, sp,
! 			   DEFDACBLKSIZE*sizeof(t_sample));
! 		memset(sp, 0, DEFDACBLKSIZE*sizeof(t_sample));
! 		sp+=DEFDACBLKSIZE;
! 	}
  	
  	/* get sound from ringbuffer */
--- 510,519 ----
  	/* send sound to ringbuffer */
  	sp = sys_soundout;
!     for (i = 0; i < sys_outchannels; i++)
  	{
! 		copyblock(asio_ringbuffer[i] + asio_ringbuffer_inoffset, sp);
!        	zeroblock(sp);
!         sp+=DEFDACBLKSIZE;
!     }
  	
  	/* get sound from ringbuffer */
***************
*** 529,542 ****
   		int offset = 2 * asio_bufsize;
   		if (asio_ringbuffer_inoffset <=  offset )
! 			memcpy(sp, asio_ringbuffer[i+j] + asio_ringbuffer_length +
!  				   asio_ringbuffer_inoffset - offset ,
!  				   DEFDACBLKSIZE*sizeof(t_sample));
   		else
!  			memcpy(sp, asio_ringbuffer[i+j] + asio_ringbuffer_inoffset - offset,
!  				   DEFDACBLKSIZE*sizeof(t_sample));
! #else /* working but higer latency */
! 		memcpy(sp, asio_ringbuffer[i+j] + asio_ringbuffer_inoffset,
! 			   DEFDACBLKSIZE*sizeof(t_sample));
! 		
  #endif
   		sp+=DEFDACBLKSIZE;
--- 527,536 ----
   		int offset = 2 * asio_bufsize;
   		if (asio_ringbuffer_inoffset <=  offset )
! 			copyblock(sp, asio_ringbuffer[i+j] + asio_ringbuffer_length +
!  				   asio_ringbuffer_inoffset - offset);
   		else
!  			copyblock(sp, asio_ringbuffer[i+j] + asio_ringbuffer_inoffset - offset);
! #else /* working but higher latency */
! 		copyblock(sp, asio_ringbuffer[i+j] + asio_ringbuffer_inoffset);
  #endif
   		sp+=DEFDACBLKSIZE;
***************
*** 545,549 ****
  	asio_ringbuffer_inoffset += DEFDACBLKSIZE;
  
- 	
  	if (asio_ringbuffer_inoffset >= asio_ringbuffer_outoffset + asio_bufsize)
  	{
--- 539,542 ----
***************
*** 590,603 ****
  	{
  	case kAsioSelectorSupported:
! 		return 1L;
  	case kAsioEngineVersion:
  		return ASIOVERSION;
  	case kAsioResetRequest:
  		/* how to handle this without changing the dsp scheduler? */
  		return 1L;
  	case kAsioBufferSizeChange:
  		/* todo */
  		return 0L; /* should be 1 */
  	case kAsioResyncRequest:
  		return 0L;
  	case kAsioLatenciesChanged: 
--- 583,604 ----
  	{
  	case kAsioSelectorSupported:
!         if(
!             value == kAsioResetRequest || 
!             value == kAsioSupportsTimeInfo
!         )
!             return 1L;
! 		return 0L;
  	case kAsioEngineVersion:
  		return ASIOVERSION;
  	case kAsioResetRequest:
  		/* how to handle this without changing the dsp scheduler? */
+         post("ASIO: Reset request");
  		return 1L;
  	case kAsioBufferSizeChange:
  		/* todo */
+         post("ASIO: Buffer size changed");
  		return 0L; /* should be 1 */
  	case kAsioResyncRequest:
+         post("ASIO: Resync request");
  		return 0L;
  	case kAsioLatenciesChanged: 
***************
*** 650,653 ****
--- 651,655 ----
  		}
  	}
+ 
  	pthread_cond_broadcast(&asio_ringbuf_cond);
  
***************
*** 745,749 ****
  		return Int32tofloat32_S;
  	case ASIOSTFloat32MSB:		// IEEE 754 32 bit float, as found on Intel x86 architecture
! 		float32tofloat32_S;
  	case ASIOSTFloat32LSB:		// IEEE 754 32 bit float, as found on Intel x86 architecture
          return float32tofloat32;
--- 747,751 ----
  		return Int32tofloat32_S;
  	case ASIOSTFloat32MSB:		// IEEE 754 32 bit float, as found on Intel x86 architecture
! 		return float32tofloat32_S;
  	case ASIOSTFloat32LSB:		// IEEE 754 32 bit float, as found on Intel x86 architecture
          return float32tofloat32;
***************
*** 849,855 ****
  #define SCALE_INT32 2147483647.f  /* (- (expt 2 31) 1) */
  
  static void float32tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
!     memcpy (outbuffer, inbuffer, frames* sizeof (float));
  }
  
--- 851,872 ----
  #define SCALE_INT32 2147483647.f  /* (- (expt 2 31) 1) */
  
+ 
+ /* Swap LSB to MSB and vice versa */
+ inline __int32 swaplong(__int32 v) 
+ {
+     return ((v>>24)&0xFF)|((v>>8)&0xFF00)|((v&0xFF00)<<8)|((v&0xFF)<<24);
+ }
+ 
+ inline __int16 swapshort(__int16 v) 
+ {
+     return ((v>>8)&0xFF)|((v&0xFF)<<8);
+ }
+ 
  static void float32tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
!     if(SIMD_CHECK2(frames,inbuffer,outbuffer))
!         copyvec_simd((float *)outbuffer,(float *)inbuffer,frames);
!     else
!         memcpy (outbuffer, inbuffer, frames* sizeof (float));
  }
  
***************
*** 859,871 ****
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
! 	{
! 		__int32 reverse = *(in++);
!         *out++ = SWAPLONG(reverse);
! 	}
  }
  
  static void float32tofloat64(void* inbuffer, void* outbuffer, long frames)
  {
!     float *in = (float *)inbuffer;
  	double* out = (double*)outbuffer;
  	while (frames--) *(out++) = *(in++);
--- 876,885 ----
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
!         *out++ = swaplong(*(in++));
  }
  
  static void float32tofloat64(void* inbuffer, void* outbuffer, long frames)
  {
!     const float *in = (const float *)inbuffer;
  	double* out = (double*)outbuffer;
  	while (frames--) *(out++) = *(in++);
***************
*** 874,878 ****
  static void float64tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
!     double *in = (double *)inbuffer;
  	float *out = (float *)outbuffer;
  	while (frames--) *(out++) = *(in++);
--- 888,892 ----
  static void float64tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
!     const double *in = (const double *)inbuffer;
  	float *out = (float *)outbuffer;
  	while (frames--) *(out++) = *(in++);
***************
*** 881,885 ****
  static void float32toInt16(void* inbuffer, void* outbuffer, long frames)
  {
!     float *in = (float *)inbuffer;
  	__int16* out = (__int16*)outbuffer;
  	while (frames--)
--- 895,899 ----
  static void float32toInt16(void* inbuffer, void* outbuffer, long frames)
  {
!     const float *in = (const float *)inbuffer;
  	__int16* out = (__int16*)outbuffer;
  	while (frames--)
***************
*** 897,901 ****
  			fld o
  			fistp srt
! 		};
  #endif
  		*out++ = srt;
--- 911,915 ----
  			fld o
  			fistp srt
! 		}
  #endif
  		*out++ = srt;
***************
*** 905,909 ****
  static void Int16tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
! 	__int16* in = (__int16*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
--- 919,923 ----
  static void Int16tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
! 	const __int16* in = (const __int16*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
***************
*** 913,917 ****
  static void float32toInt24(void* inbuffer, void* outbuffer, long frames)
  {
!     float *in = (float *)inbuffer;
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
--- 927,931 ----
  static void float32toInt24(void* inbuffer, void* outbuffer, long frames)
  {
!     const float *in = (const float *)inbuffer;
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
***************
*** 921,925 ****
          float o = *(in++) * SCALE_INT24;
  #ifdef __GNUC__
! 		*out++ = (__int32)lrintf(o);
  #else
  		__int32 intg;
--- 935,939 ----
          float o = *(in++) * SCALE_INT24;
  #ifdef __GNUC__
! 		__int32 intg = (__int32)lrintf(o);
  #else
  		__int32 intg;
***************
*** 928,934 ****
  			fld o
  			fistp intg
! 		};
! 		*out++ = intg;
  #endif
  	}
  }
--- 942,948 ----
  			fld o
  			fistp intg
! 		}
  #endif
+ 		*(out++) = intg;
  	}
  }
***************
*** 936,940 ****
  static void Int24tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
! 	__int32* in = (__int32*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
--- 950,954 ----
  static void Int24tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
! 	const __int32* in = (const __int32*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
***************
*** 944,948 ****
  static void float32toInt32(void* inbuffer, void* outbuffer, long frames)
  {
!     float *in = (float *)inbuffer;
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
--- 958,962 ----
  static void float32toInt32(void* inbuffer, void* outbuffer, long frames)
  {
!     const float *in = (const float *)inbuffer;
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
***************
*** 959,963 ****
  			fld o
  			fistp lng
! 		};
  		*out++ = lng;
  #endif
--- 973,977 ----
  			fld o
  			fistp lng
! 		}
  		*out++ = lng;
  #endif
***************
*** 967,971 ****
  static void Int32tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
! 	__int32* in = (__int32*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
--- 981,985 ----
  static void Int32tofloat32(void* inbuffer, void* outbuffer, long frames)
  {
! 	const __int32* in = (const __int32*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
***************
*** 975,979 ****
  static void float32toInt16_S(void* inbuffer, void* outbuffer, long frames)
  {
!     float *in = (float *)inbuffer;
  	__int16* out = (__int16*)outbuffer;
  	while (frames--)
--- 989,993 ----
  static void float32toInt16_S(void* inbuffer, void* outbuffer, long frames)
  {
!     const float *in = (const float *)inbuffer;
  	__int16* out = (__int16*)outbuffer;
  	while (frames--)
***************
*** 991,997 ****
  			fld o
  			fistp reverse
! 		};
  #endif
!         *out++ = SWAPSHORT(reverse);
  	}
  }
--- 1005,1011 ----
  			fld o
  			fistp reverse
! 		}
  #endif
!         *out++ = swapshort(reverse);
  	}
  }
***************
*** 999,1014 ****
  static void Int16tofloat32_S(void* inbuffer, void* outbuffer, long frames)
  {
! 	__int16* in = (__int16*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
! 	{
!         __int16 shin = *in++;
! 		*(out++) = (float)SWAPSHORT(shin) * (1.f / SCALE_INT16);
! 	}
  }
  
  static void float32toInt24_S(void* inbuffer, void* outbuffer, long frames)
  {
!     float *in = (float *)inbuffer;
  	char* out = (char*)outbuffer;
  	while (frames--)
--- 1013,1025 ----
  static void Int16tofloat32_S(void* inbuffer, void* outbuffer, long frames)
  {
! 	const __int16* in = (const __int16*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
! 		*(out++) = (float)swapshort(*(in++)) * (1.f / SCALE_INT16);
  }
  
  static void float32toInt24_S(void* inbuffer, void* outbuffer, long frames)
  {
!     const float *in = (const float *)inbuffer;
  	char* out = (char*)outbuffer;
  	while (frames--)
***************
*** 1025,1029 ****
  			fld o
  			fistp reverse
! 		};
  #endif
          out[2] = ((char *)&reverse)[0];
--- 1036,1040 ----
  			fld o
  			fistp reverse
! 		}
  #endif
          out[2] = ((char *)&reverse)[0];
***************
*** 1036,1040 ****
  static void Int24tofloat32_S(void* inbuffer, void* outbuffer, long frames)
  {
! 	char* in = (char*)inbuffer;
      float *out = (float *)outbuffer;
      __int32 d = 0;
--- 1047,1051 ----
  static void Int24tofloat32_S(void* inbuffer, void* outbuffer, long frames)
  {
! 	const char* in = (const char*)inbuffer;
      float *out = (float *)outbuffer;
      __int32 d = 0;
***************
*** 1051,1055 ****
  static void float32toInt32_S(void* inbuffer, void* outbuffer, long frames)
  {
!     float *in = (float *)inbuffer;
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
--- 1062,1066 ----
  static void float32toInt32_S(void* inbuffer, void* outbuffer, long frames)
  {
!     const float *in = (const float *)inbuffer;
  	__int32* out = (__int32*)outbuffer;
  	while (frames--)
***************
*** 1066,1072 ****
  			fld o
  			fistp reverse
! 		};
  #endif
!         *out++ = SWAPLONG(reverse);
  	}
  }
--- 1077,1083 ----
  			fld o
  			fistp reverse
! 		}
  #endif
!         *out++ = swaplong(reverse);
  	}
  }
***************
*** 1074,1084 ****
  static void Int32tofloat32_S(void* inbuffer, void* outbuffer, long frames)
  {
! 	__int32* in = (__int32*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
! 	{
!         __int32 lgin = *in++;
! 		*(out++) = (float)SWAPLONG(lgin) * (1.f / SCALE_INT32);
! 	}
  }
  
--- 1085,1092 ----
  static void Int32tofloat32_S(void* inbuffer, void* outbuffer, long frames)
  {
! 	const __int32* in = (const __int32*)inbuffer;
      float *out = (float *)outbuffer;
  	while (frames--)
! 		*(out++) = (float)swaplong(*(in++)) * (1.f / SCALE_INT32);
  }
  





More information about the Pd-cvs mailing list