GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_16i_s32f_convert_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_16i_s32f_convert_32f
25  *
26  * \b Overview
27  *
28  * Converts 16-bit shorts to scaled 32-bit floating point values.
29  *
30  * <b>Dispatcher Prototype</b>
31  * \code
32  * void volk_16i_s32f_convert_32f(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points);
33  * \endcode
34  *
35  * \b Inputs
36  * \li inputVector: The input vector of 16-bit shorts.
37  * \li scalar: The value divided against each point in the output buffer.
38  * \li num_points: The number of complex data points.
39  *
40  * \b Outputs
41  * \li outputVector: The output vector of 8-bit chars.
42  *
43  * \b Example
44  * \code
45  * int N = 10000;
46  *
47  * volk_16i_s32f_convert_32f();
48  *
49  * volk_free(x);
50  * volk_free(t);
51  * \endcode
52  */
53 
54 #ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H
55 #define INCLUDED_volk_16i_s32f_convert_32f_u_H
56 
57 #include <inttypes.h>
58 #include <stdio.h>
59 
60 #ifdef LV_HAVE_AVX
61 #include <immintrin.h>
62 
63 static inline void
64 volk_16i_s32f_convert_32f_u_avx(float* outputVector, const int16_t* inputVector,
65  const float scalar, unsigned int num_points)
66 {
67  unsigned int number = 0;
68  const unsigned int eighthPoints = num_points / 8;
69 
70  float* outputVectorPtr = outputVector;
71  __m128 invScalar = _mm_set_ps1(1.0/scalar);
72  int16_t* inputPtr = (int16_t*)inputVector;
73  __m128i inputVal, inputVal2;
74  __m128 ret;
75  __m256 output;
76  __m256 dummy = _mm256_setzero_ps();
77 
78  for(;number < eighthPoints; number++){
79 
80  // Load the 8 values
81  //inputVal = _mm_loadu_si128((__m128i*)inputPtr);
82  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
83 
84  // Shift the input data to the right by 64 bits ( 8 bytes )
85  inputVal2 = _mm_srli_si128(inputVal, 8);
86 
87  // Convert the lower 4 values into 32 bit words
88  inputVal = _mm_cvtepi16_epi32(inputVal);
89  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
90 
91  ret = _mm_cvtepi32_ps(inputVal);
92  ret = _mm_mul_ps(ret, invScalar);
93  output = _mm256_insertf128_ps(dummy, ret, 0);
94 
95  ret = _mm_cvtepi32_ps(inputVal2);
96  ret = _mm_mul_ps(ret, invScalar);
97  output = _mm256_insertf128_ps(output, ret, 1);
98 
99  _mm256_storeu_ps(outputVectorPtr, output);
100 
101  outputVectorPtr += 8;
102 
103  inputPtr += 8;
104  }
105 
106  number = eighthPoints * 8;
107  for(; number < num_points; number++){
108  outputVector[number] =((float)(inputVector[number])) / scalar;
109  }
110 }
111 #endif /* LV_HAVE_AVX */
112 
113 #ifdef LV_HAVE_SSE4_1
114 #include <smmintrin.h>
115 
116 static inline void
117 volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector,
118  const float scalar, unsigned int num_points)
119 {
120  unsigned int number = 0;
121  const unsigned int eighthPoints = num_points / 8;
122 
123  float* outputVectorPtr = outputVector;
124  __m128 invScalar = _mm_set_ps1(1.0/scalar);
125  int16_t* inputPtr = (int16_t*)inputVector;
126  __m128i inputVal;
127  __m128i inputVal2;
128  __m128 ret;
129 
130  for(;number < eighthPoints; number++){
131 
132  // Load the 8 values
133  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
134 
135  // Shift the input data to the right by 64 bits ( 8 bytes )
136  inputVal2 = _mm_srli_si128(inputVal, 8);
137 
138  // Convert the lower 4 values into 32 bit words
139  inputVal = _mm_cvtepi16_epi32(inputVal);
140  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
141 
142  ret = _mm_cvtepi32_ps(inputVal);
143  ret = _mm_mul_ps(ret, invScalar);
144  _mm_storeu_ps(outputVectorPtr, ret);
145  outputVectorPtr += 4;
146 
147  ret = _mm_cvtepi32_ps(inputVal2);
148  ret = _mm_mul_ps(ret, invScalar);
149  _mm_storeu_ps(outputVectorPtr, ret);
150 
151  outputVectorPtr += 4;
152 
153  inputPtr += 8;
154  }
155 
156  number = eighthPoints * 8;
157  for(; number < num_points; number++){
158  outputVector[number] =((float)(inputVector[number])) / scalar;
159  }
160 }
161 #endif /* LV_HAVE_SSE4_1 */
162 
163 #ifdef LV_HAVE_SSE
164 #include <xmmintrin.h>
165 
166 static inline void
167 volk_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector,
168  const float scalar, unsigned int num_points)
169 {
170  unsigned int number = 0;
171  const unsigned int quarterPoints = num_points / 4;
172 
173  float* outputVectorPtr = outputVector;
174  __m128 invScalar = _mm_set_ps1(1.0/scalar);
175  int16_t* inputPtr = (int16_t*)inputVector;
176  __m128 ret;
177 
178  for(;number < quarterPoints; number++){
179  ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
180 
181  ret = _mm_mul_ps(ret, invScalar);
182  _mm_storeu_ps(outputVectorPtr, ret);
183 
184  inputPtr += 4;
185  outputVectorPtr += 4;
186  }
187 
188  number = quarterPoints * 4;
189  for(; number < num_points; number++){
190  outputVector[number] = (float)(inputVector[number]) / scalar;
191  }
192 }
193 #endif /* LV_HAVE_SSE */
194 
195 #ifdef LV_HAVE_GENERIC
196 
197 static inline void
198 volk_16i_s32f_convert_32f_generic(float* outputVector, const int16_t* inputVector,
199  const float scalar, unsigned int num_points)
200 {
201  float* outputVectorPtr = outputVector;
202  const int16_t* inputVectorPtr = inputVector;
203  unsigned int number = 0;
204 
205  for(number = 0; number < num_points; number++){
206  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
207  }
208 }
209 #endif /* LV_HAVE_GENERIC */
210 
211 #ifdef LV_HAVE_NEON
212 
213 static inline void
214 volk_16i_s32f_convert_32f_neon(float* outputVector, const int16_t* inputVector,
215  const float scalar, unsigned int num_points)
216 {
217  float* outputPtr = outputVector;
218  const int16_t* inputPtr = inputVector;
219  unsigned int number = 0;
220  unsigned int eighth_points = num_points / 8;
221 
222  int16x4x2_t input16;
223  int32x4_t input32_0, input32_1;
224  float32x4_t input_float_0, input_float_1;
225  float32x4x2_t output_float;
226  float32x4_t inv_scale;
227 
228  inv_scale = vdupq_n_f32(1.0/scalar);
229 
230  // the generic disassembles to a 128-bit load
231  // and duplicates every instruction to operate on 64-bits
232  // at a time. This is only possible with lanes, which is faster
233  // than just doing a vld1_s16, but still slower.
234  for(number = 0; number < eighth_points; number++){
235  input16 = vld2_s16(inputPtr);
236  // widen 16-bit int to 32-bit int
237  input32_0 = vmovl_s16(input16.val[0]);
238  input32_1 = vmovl_s16(input16.val[1]);
239  // convert 32-bit int to float with scale
240  input_float_0 = vcvtq_f32_s32(input32_0);
241  input_float_1 = vcvtq_f32_s32(input32_1);
242  output_float.val[0] = vmulq_f32(input_float_0, inv_scale);
243  output_float.val[1] = vmulq_f32(input_float_1, inv_scale);
244  vst2q_f32(outputPtr, output_float);
245  inputPtr += 8;
246  outputPtr += 8;
247  }
248 
249  for(number = eighth_points*8; number < num_points; number++){
250  *outputPtr++ = ((float)(*inputPtr++)) / scalar;
251  }
252 }
253 #endif /* LV_HAVE_NEON */
254 
255 
256 #endif /* INCLUDED_volk_16i_s32f_convert_32f_u_H */
257 #ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H
258 #define INCLUDED_volk_16i_s32f_convert_32f_a_H
259 
260 #include <inttypes.h>
261 #include <stdio.h>
262 
263 #ifdef LV_HAVE_AVX
264 #include <immintrin.h>
265 
266 static inline void
267 volk_16i_s32f_convert_32f_a_avx(float* outputVector, const int16_t* inputVector,
268  const float scalar, unsigned int num_points)
269 {
270  unsigned int number = 0;
271  const unsigned int eighthPoints = num_points / 8;
272 
273  float* outputVectorPtr = outputVector;
274  __m128 invScalar = _mm_set_ps1(1.0/scalar);
275  int16_t* inputPtr = (int16_t*)inputVector;
276  __m128i inputVal, inputVal2;
277  __m128 ret;
278  __m256 output;
279  __m256 dummy = _mm256_setzero_ps();
280 
281  for(;number < eighthPoints; number++){
282 
283  // Load the 8 values
284  //inputVal = _mm_loadu_si128((__m128i*)inputPtr);
285  inputVal = _mm_load_si128((__m128i*)inputPtr);
286 
287  // Shift the input data to the right by 64 bits ( 8 bytes )
288  inputVal2 = _mm_srli_si128(inputVal, 8);
289 
290  // Convert the lower 4 values into 32 bit words
291  inputVal = _mm_cvtepi16_epi32(inputVal);
292  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
293 
294  ret = _mm_cvtepi32_ps(inputVal);
295  ret = _mm_mul_ps(ret, invScalar);
296  output = _mm256_insertf128_ps(dummy, ret, 0);
297 
298  ret = _mm_cvtepi32_ps(inputVal2);
299  ret = _mm_mul_ps(ret, invScalar);
300  output = _mm256_insertf128_ps(output, ret, 1);
301 
302  _mm256_store_ps(outputVectorPtr, output);
303 
304  outputVectorPtr += 8;
305 
306  inputPtr += 8;
307  }
308 
309  number = eighthPoints * 8;
310  for(; number < num_points; number++){
311  outputVector[number] =((float)(inputVector[number])) / scalar;
312  }
313 }
314 #endif /* LV_HAVE_AVX */
315 
316 #ifdef LV_HAVE_SSE4_1
317 #include <smmintrin.h>
318 
319 static inline void
320 volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector,
321  const float scalar, unsigned int num_points)
322 {
323  unsigned int number = 0;
324  const unsigned int eighthPoints = num_points / 8;
325 
326  float* outputVectorPtr = outputVector;
327  __m128 invScalar = _mm_set_ps1(1.0/scalar);
328  int16_t* inputPtr = (int16_t*)inputVector;
329  __m128i inputVal;
330  __m128i inputVal2;
331  __m128 ret;
332 
333  for(;number < eighthPoints; number++){
334 
335  // Load the 8 values
336  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
337 
338  // Shift the input data to the right by 64 bits ( 8 bytes )
339  inputVal2 = _mm_srli_si128(inputVal, 8);
340 
341  // Convert the lower 4 values into 32 bit words
342  inputVal = _mm_cvtepi16_epi32(inputVal);
343  inputVal2 = _mm_cvtepi16_epi32(inputVal2);
344 
345  ret = _mm_cvtepi32_ps(inputVal);
346  ret = _mm_mul_ps(ret, invScalar);
347  _mm_storeu_ps(outputVectorPtr, ret);
348  outputVectorPtr += 4;
349 
350  ret = _mm_cvtepi32_ps(inputVal2);
351  ret = _mm_mul_ps(ret, invScalar);
352  _mm_storeu_ps(outputVectorPtr, ret);
353 
354  outputVectorPtr += 4;
355 
356  inputPtr += 8;
357  }
358 
359  number = eighthPoints * 8;
360  for(; number < num_points; number++){
361  outputVector[number] =((float)(inputVector[number])) / scalar;
362  }
363 }
364 #endif /* LV_HAVE_SSE4_1 */
365 
366 #ifdef LV_HAVE_SSE
367 #include <xmmintrin.h>
368 
369 static inline void
370 volk_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector,
371  const float scalar, unsigned int num_points)
372 {
373  unsigned int number = 0;
374  const unsigned int quarterPoints = num_points / 4;
375 
376  float* outputVectorPtr = outputVector;
377  __m128 invScalar = _mm_set_ps1(1.0/scalar);
378  int16_t* inputPtr = (int16_t*)inputVector;
379  __m128 ret;
380 
381  for(;number < quarterPoints; number++){
382  ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
383 
384  ret = _mm_mul_ps(ret, invScalar);
385  _mm_storeu_ps(outputVectorPtr, ret);
386 
387  inputPtr += 4;
388  outputVectorPtr += 4;
389  }
390 
391  number = quarterPoints * 4;
392  for(; number < num_points; number++){
393  outputVector[number] = (float)(inputVector[number]) / scalar;
394  }
395 }
396 #endif /* LV_HAVE_SSE */
397 
398 #ifdef LV_HAVE_GENERIC
399 
400 static inline void
401 volk_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector,
402  const float scalar, unsigned int num_points)
403 {
404  float* outputVectorPtr = outputVector;
405  const int16_t* inputVectorPtr = inputVector;
406  unsigned int number = 0;
407 
408  for(number = 0; number < num_points; number++){
409  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
410  }
411 }
412 #endif /* LV_HAVE_GENERIC */
413 
414 #endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */
signed short int16_t
Definition: stdint.h:76