56 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
57 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
64 #include <smmintrin.h>
67 volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
const lv_16sc_t* complexVector,
68 const float scalar,
unsigned int num_points)
70 float* iBufferPtr = iBuffer;
72 unsigned int number = 0;
73 const unsigned int quarterPoints = num_points / 4;
77 const float iScalar= 1.0 / scalar;
78 __m128 invScalar = _mm_set_ps1(iScalar);
79 __m128i complexVal, iIntVal;
82 __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
84 for(;number < quarterPoints; number++){
85 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
86 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
88 iIntVal = _mm_cvtepi16_epi32(complexVal);
89 iFloatValue = _mm_cvtepi32_ps(iIntVal);
91 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
93 _mm_store_ps(iBufferPtr, iFloatValue);
98 number = quarterPoints * 4;
99 int16_t* sixteenTComplexVectorPtr = (
int16_t*)&complexVector[number];
100 for(; number < num_points; number++){
101 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
102 sixteenTComplexVectorPtr++;
109 #include <xmmintrin.h>
112 volk_16ic_s32f_deinterleave_real_32f_a_sse(
float* iBuffer,
const lv_16sc_t* complexVector,
113 const float scalar,
unsigned int num_points)
115 float* iBufferPtr = iBuffer;
117 unsigned int number = 0;
118 const unsigned int quarterPoints = num_points / 4;
121 const float iScalar = 1.0/scalar;
122 __m128 invScalar = _mm_set_ps1(iScalar);
127 for(;number < quarterPoints; number++){
128 floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
129 floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
130 floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
131 floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
133 iValue = _mm_load_ps(floatBuffer);
135 iValue = _mm_mul_ps(iValue, invScalar);
137 _mm_store_ps(iBufferPtr, iValue);
142 number = quarterPoints * 4;
143 complexVectorPtr = (
int16_t*)&complexVector[number];
144 for(; number < num_points; number++){
145 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
152 #ifdef LV_HAVE_GENERIC
154 volk_16ic_s32f_deinterleave_real_32f_generic(
float* iBuffer,
const lv_16sc_t* complexVector,
155 const float scalar,
unsigned int num_points)
157 unsigned int number = 0;
159 float* iBufferPtr = iBuffer;
160 const float invScalar = 1.0 / scalar;
161 for(number = 0; number < num_points; number++){
162 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27