69 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
70 #define INCLUDED_volk_32f_s32f_convert_16i_u_H
77 #include <immintrin.h>
80 volk_32f_s32f_convert_16i_u_avx(
int16_t* outputVector,
const float* inputVector,
81 const float scalar,
unsigned int num_points)
83 unsigned int number = 0;
85 const unsigned int eighthPoints = num_points / 8;
87 const float* inputVectorPtr = (
const float*)inputVector;
88 int16_t* outputVectorPtr = outputVector;
90 float min_val = -32768;
91 float max_val = 32767;
94 __m256 vScalar = _mm256_set1_ps(scalar);
97 __m128i intInputVal1, intInputVal2;
98 __m256 vmin_val = _mm256_set1_ps(min_val);
99 __m256 vmax_val = _mm256_set1_ps(max_val);
101 for(;number < eighthPoints; number++){
102 inputVal = _mm256_loadu_ps(inputVectorPtr); inputVectorPtr += 8;
105 ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
107 intInputVal = _mm256_cvtps_epi32(ret);
109 intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
110 intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
112 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
114 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
115 outputVectorPtr += 8;
118 number = eighthPoints * 8;
119 for(; number < num_points; number++){
120 r = inputVector[number] * scalar;
132 #include <emmintrin.h>
135 volk_32f_s32f_convert_16i_u_sse2(
int16_t* outputVector,
const float* inputVector,
136 const float scalar,
unsigned int num_points)
138 unsigned int number = 0;
140 const unsigned int eighthPoints = num_points / 8;
142 const float* inputVectorPtr = (
const float*)inputVector;
143 int16_t* outputVectorPtr = outputVector;
145 float min_val = -32768;
146 float max_val = 32767;
149 __m128 vScalar = _mm_set_ps1(scalar);
150 __m128 inputVal1, inputVal2;
151 __m128i intInputVal1, intInputVal2;
153 __m128 vmin_val = _mm_set_ps1(min_val);
154 __m128 vmax_val = _mm_set_ps1(max_val);
156 for(;number < eighthPoints; number++){
157 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
158 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
161 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
162 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
164 intInputVal1 = _mm_cvtps_epi32(ret1);
165 intInputVal2 = _mm_cvtps_epi32(ret2);
167 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
169 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
170 outputVectorPtr += 8;
173 number = eighthPoints * 8;
174 for(; number < num_points; number++){
175 r = inputVector[number] * scalar;
187 #include <xmmintrin.h>
190 volk_32f_s32f_convert_16i_u_sse(
int16_t* outputVector,
const float* inputVector,
191 const float scalar,
unsigned int num_points)
193 unsigned int number = 0;
195 const unsigned int quarterPoints = num_points / 4;
197 const float* inputVectorPtr = (
const float*)inputVector;
198 int16_t* outputVectorPtr = outputVector;
200 float min_val = -32768;
201 float max_val = 32767;
204 __m128 vScalar = _mm_set_ps1(scalar);
206 __m128 vmin_val = _mm_set_ps1(min_val);
207 __m128 vmax_val = _mm_set_ps1(max_val);
211 for(;number < quarterPoints; number++){
212 ret = _mm_loadu_ps(inputVectorPtr);
216 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
218 _mm_store_ps(outputFloatBuffer, ret);
219 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[0]);
220 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[1]);
221 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[2]);
222 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[3]);
225 number = quarterPoints * 4;
226 for(; number < num_points; number++){
227 r = inputVector[number] * scalar;
238 #ifdef LV_HAVE_GENERIC
241 volk_32f_s32f_convert_16i_generic(
int16_t* outputVector,
const float* inputVector,
242 const float scalar,
unsigned int num_points)
244 int16_t* outputVectorPtr = outputVector;
245 const float* inputVectorPtr = inputVector;
246 unsigned int number = 0;
247 float min_val = -32768;
248 float max_val = 32767;
251 for(number = 0; number < num_points; number++){
252 r = *inputVectorPtr++ * scalar;
264 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
265 #define INCLUDED_volk_32f_s32f_convert_16i_a_H
273 #include <immintrin.h>
276 volk_32f_s32f_convert_16i_a_avx(
int16_t* outputVector,
const float* inputVector,
277 const float scalar,
unsigned int num_points)
279 unsigned int number = 0;
281 const unsigned int eighthPoints = num_points / 8;
283 const float* inputVectorPtr = (
const float*)inputVector;
284 int16_t* outputVectorPtr = outputVector;
286 float min_val = -32768;
287 float max_val = 32767;
290 __m256 vScalar = _mm256_set1_ps(scalar);
291 __m256 inputVal, ret;
293 __m128i intInputVal1, intInputVal2;
294 __m256 vmin_val = _mm256_set1_ps(min_val);
295 __m256 vmax_val = _mm256_set1_ps(max_val);
297 for(;number < eighthPoints; number++){
298 inputVal = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
301 ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), vmax_val), vmin_val);
303 intInputVal = _mm256_cvtps_epi32(ret);
305 intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
306 intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
308 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
310 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
311 outputVectorPtr += 8;
314 number = eighthPoints * 8;
315 for(; number < num_points; number++){
316 r = inputVector[number] * scalar;
327 #include <emmintrin.h>
330 volk_32f_s32f_convert_16i_a_sse2(
int16_t* outputVector,
const float* inputVector,
331 const float scalar,
unsigned int num_points)
333 unsigned int number = 0;
335 const unsigned int eighthPoints = num_points / 8;
337 const float* inputVectorPtr = (
const float*)inputVector;
338 int16_t* outputVectorPtr = outputVector;
340 float min_val = -32768;
341 float max_val = 32767;
344 __m128 vScalar = _mm_set_ps1(scalar);
345 __m128 inputVal1, inputVal2;
346 __m128i intInputVal1, intInputVal2;
348 __m128 vmin_val = _mm_set_ps1(min_val);
349 __m128 vmax_val = _mm_set_ps1(max_val);
351 for(;number < eighthPoints; number++){
352 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
353 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
356 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
357 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
359 intInputVal1 = _mm_cvtps_epi32(ret1);
360 intInputVal2 = _mm_cvtps_epi32(ret2);
362 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
364 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
365 outputVectorPtr += 8;
368 number = eighthPoints * 8;
369 for(; number < num_points; number++){
370 r = inputVector[number] * scalar;
382 #include <xmmintrin.h>
385 volk_32f_s32f_convert_16i_a_sse(
int16_t* outputVector,
const float* inputVector,
386 const float scalar,
unsigned int num_points)
388 unsigned int number = 0;
390 const unsigned int quarterPoints = num_points / 4;
392 const float* inputVectorPtr = (
const float*)inputVector;
393 int16_t* outputVectorPtr = outputVector;
395 float min_val = -32768;
396 float max_val = 32767;
399 __m128 vScalar = _mm_set_ps1(scalar);
401 __m128 vmin_val = _mm_set_ps1(min_val);
402 __m128 vmax_val = _mm_set_ps1(max_val);
406 for(;number < quarterPoints; number++){
407 ret = _mm_load_ps(inputVectorPtr);
411 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
413 _mm_store_ps(outputFloatBuffer, ret);
414 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[0]);
415 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[1]);
416 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[2]);
417 *outputVectorPtr++ = (
int16_t)
rintf(outputFloatBuffer[3]);
420 number = quarterPoints * 4;
421 for(; number < num_points; number++){
422 r = inputVector[number] * scalar;
433 #ifdef LV_HAVE_GENERIC
436 volk_32f_s32f_convert_16i_a_generic(
int16_t* outputVector,
const float* inputVector,
437 const float scalar,
unsigned int num_points)
439 int16_t* outputVectorPtr = outputVector;
440 const float* inputVectorPtr = inputVector;
441 unsigned int number = 0;
442 float min_val = -32768;
443 float max_val = 32767;
446 for(number = 0; number < num_points; number++){
447 r = *inputVectorPtr++ * scalar;
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
static float rintf(float x)
Definition: volk/cmake/msvc/config.h:30