64 #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
65 #define INCLUDED_volk_32i_s32f_convert_32f_u_H
71 #include <emmintrin.h>
74 volk_32i_s32f_convert_32f_u_sse2(
float* outputVector,
const int32_t* inputVector,
75 const float scalar,
unsigned int num_points)
77 unsigned int number = 0;
78 const unsigned int quarterPoints = num_points / 4;
80 float* outputVectorPtr = outputVector;
81 const float iScalar = 1.0 / scalar;
82 __m128 invScalar = _mm_set_ps1(iScalar);
87 for(;number < quarterPoints; number++){
89 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
91 ret = _mm_cvtepi32_ps(inputVal);
92 ret = _mm_mul_ps(ret, invScalar);
94 _mm_storeu_ps(outputVectorPtr, ret);
100 number = quarterPoints * 4;
101 for(; number < num_points; number++){
102 outputVector[number] =((float)(inputVector[number])) * iScalar;
108 #ifdef LV_HAVE_GENERIC
111 volk_32i_s32f_convert_32f_generic(
float* outputVector,
const int32_t* inputVector,
112 const float scalar,
unsigned int num_points)
114 float* outputVectorPtr = outputVector;
115 const int32_t* inputVectorPtr = inputVector;
116 unsigned int number = 0;
117 const float iScalar = 1.0 / scalar;
119 for(number = 0; number < num_points; number++){
120 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
129 #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
130 #define INCLUDED_volk_32i_s32f_convert_32f_a_H
136 #include <emmintrin.h>
139 volk_32i_s32f_convert_32f_a_sse2(
float* outputVector,
const int32_t* inputVector,
140 const float scalar,
unsigned int num_points)
142 unsigned int number = 0;
143 const unsigned int quarterPoints = num_points / 4;
145 float* outputVectorPtr = outputVector;
146 const float iScalar = 1.0 / scalar;
147 __m128 invScalar = _mm_set_ps1(iScalar);
152 for(;number < quarterPoints; number++){
154 inputVal = _mm_load_si128((__m128i*)inputPtr);
156 ret = _mm_cvtepi32_ps(inputVal);
157 ret = _mm_mul_ps(ret, invScalar);
159 _mm_store_ps(outputVectorPtr, ret);
161 outputVectorPtr += 4;
165 number = quarterPoints * 4;
166 for(; number < num_points; number++){
167 outputVector[number] =((float)(inputVector[number])) * iScalar;
173 #ifdef LV_HAVE_GENERIC
176 volk_32i_s32f_convert_32f_a_generic(
float* outputVector,
const int32_t* inputVector,
177 const float scalar,
unsigned int num_points)
179 float* outputVectorPtr = outputVector;
180 const int32_t* inputVectorPtr = inputVector;
181 unsigned int number = 0;
182 const float iScalar = 1.0 / scalar;
184 for(number = 0; number < num_points; number++){
185 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
signed int int32_t
Definition: stdint.h:77