GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32fc_magnitude_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32fc_magnitude_32f
25  *
26  * \b Overview
27  *
28  * Calculates the magnitude of the complexVector and stores the
29  * results in the magnitudeVector.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32fc_magnitude_32f(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li complexVector: The complex input vector.
38  * \li num_points: The number of samples.
39  *
40  * \b Outputs
41  * \li magnitudeVector: The output value.
42  *
43  * \b Example
44  * Calculate the magnitude of \f$x^2 + x\f$ for points around the unit circle.
45  * \code
46  * int N = 10;
47  * unsigned int alignment = volk_get_alignment();
48  * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
49  * float* magnitude = (float*)volk_malloc(sizeof(float)*N, alignment);
50  *
51  * for(unsigned int ii = 0; ii < N/2; ++ii){
52  * float real = 2.f * ((float)ii / (float)N) - 1.f;
53  * float imag = std::sqrt(1.f - real * real);
54  * in[ii] = lv_cmake(real, imag);
55  * in[ii] = in[ii] * in[ii] + in[ii];
56  * in[N-ii] = lv_cmake(real, imag);
57  * in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
58  * }
59  *
60  * volk_32fc_magnitude_32f(magnitude, in, N);
61  *
62  * for(unsigned int ii = 0; ii < N; ++ii){
63  * printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
64  * }
65  *
66  * volk_free(in);
67  * volk_free(magnitude);
68  * \endcode
69  */
70 
71 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_32f_u_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 #include <math.h>
77 
78 #ifdef LV_HAVE_AVX
79 #include <immintrin.h>
80 static inline void
81 volk_32fc_magnitude_32f_u_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
82  unsigned int num_points)
83 {
84  unsigned int number = 0;
85  const unsigned int eighthPoints = num_points / 8;
86 
87  const float* complexVectorPtr = (float*)complexVector;
88  float* magnitudeVectorPtr = magnitudeVector;
89 
90  __m256 cplxValue1, cplxValue2, complex1, complex2, result;
91  for(;number < eighthPoints; number++){
92  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
93  complexVectorPtr += 8;
94 
95  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
96  complexVectorPtr += 8;
97 
98  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
99  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
100 
101  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
102  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
103 
104  result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
105 
106  result = _mm256_sqrt_ps(result);
107 
108  _mm256_storeu_ps(magnitudeVectorPtr, result);
109  magnitudeVectorPtr += 8;
110  }
111 
112  number = eighthPoints * 8;
113  for(; number < num_points; number++){
114  float val1Real = *complexVectorPtr++;
115  float val1Imag = *complexVectorPtr++;
116  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
117  }
118 }
119 #endif /* LV_HAVE_AVX */
120 
121 #ifdef LV_HAVE_SSE3
122 #include <pmmintrin.h>
123 
124 static inline void
125 volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
126  unsigned int num_points)
127 {
128  unsigned int number = 0;
129  const unsigned int quarterPoints = num_points / 4;
130 
131  const float* complexVectorPtr = (float*)complexVector;
132  float* magnitudeVectorPtr = magnitudeVector;
133 
134  __m128 cplxValue1, cplxValue2, result;
135  for(;number < quarterPoints; number++){
136  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
137  complexVectorPtr += 4;
138 
139  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
140  complexVectorPtr += 4;
141 
142  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
143  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
144 
145  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
146 
147  result = _mm_sqrt_ps(result);
148 
149  _mm_storeu_ps(magnitudeVectorPtr, result);
150  magnitudeVectorPtr += 4;
151  }
152 
153  number = quarterPoints * 4;
154  for(; number < num_points; number++){
155  float val1Real = *complexVectorPtr++;
156  float val1Imag = *complexVectorPtr++;
157  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
158  }
159 }
160 #endif /* LV_HAVE_SSE3 */
161 
162 
163 #ifdef LV_HAVE_SSE
164 #include <xmmintrin.h>
165 
166 static inline void
167 volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
168 {
169  unsigned int number = 0;
170  const unsigned int quarterPoints = num_points / 4;
171 
172  const float* complexVectorPtr = (float*)complexVector;
173  float* magnitudeVectorPtr = magnitudeVector;
174 
175  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
176  for(;number < quarterPoints; number++){
177  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
178  complexVectorPtr += 4;
179 
180  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
181  complexVectorPtr += 4;
182 
183  // Arrange in i1i2i3i4 format
184  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
185  // Arrange in q1q2q3q4 format
186  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
187 
188  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
189  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
190 
191  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
192 
193  result = _mm_sqrt_ps(result);
194 
195  _mm_storeu_ps(magnitudeVectorPtr, result);
196  magnitudeVectorPtr += 4;
197  }
198 
199  number = quarterPoints * 4;
200  for(; number < num_points; number++){
201  float val1Real = *complexVectorPtr++;
202  float val1Imag = *complexVectorPtr++;
203  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
204  }
205 }
206 #endif /* LV_HAVE_SSE */
207 
208 
209 #ifdef LV_HAVE_GENERIC
210 
211 static inline void
212 volk_32fc_magnitude_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
213 {
214  const float* complexVectorPtr = (float*)complexVector;
215  float* magnitudeVectorPtr = magnitudeVector;
216  unsigned int number = 0;
217  for(number = 0; number < num_points; number++){
218  const float real = *complexVectorPtr++;
219  const float imag = *complexVectorPtr++;
220  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
221  }
222 }
223 #endif /* LV_HAVE_GENERIC */
224 
225 
226 
227 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
228 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
229 #define INCLUDED_volk_32fc_magnitude_32f_a_H
230 
231 #include <inttypes.h>
232 #include <stdio.h>
233 #include <math.h>
234 
235 #ifdef LV_HAVE_AVX
236 #include <immintrin.h>
237 
238 static inline void
239 volk_32fc_magnitude_32f_a_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
240  unsigned int num_points)
241 {
242  unsigned int number = 0;
243  const unsigned int eighthPoints = num_points / 8;
244 
245  const float* complexVectorPtr = (float*)complexVector;
246  float* magnitudeVectorPtr = magnitudeVector;
247 
248  __m256 cplxValue1, cplxValue2, complex1, complex2, result;
249  for(;number < eighthPoints; number++){
250  cplxValue1 = _mm256_load_ps(complexVectorPtr);
251  complexVectorPtr += 8;
252 
253  cplxValue2 = _mm256_load_ps(complexVectorPtr);
254  complexVectorPtr += 8;
255 
256  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
257  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
258 
259  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
260  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
261 
262  result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
263 
264  result = _mm256_sqrt_ps(result);
265 
266  _mm256_store_ps(magnitudeVectorPtr, result);
267  magnitudeVectorPtr += 8;
268  }
269 
270  number = eighthPoints * 8;
271  for(; number < num_points; number++){
272  float val1Real = *complexVectorPtr++;
273  float val1Imag = *complexVectorPtr++;
274  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
275  }
276 }
277 #endif /* LV_HAVE_AVX */
278 
279 #ifdef LV_HAVE_SSE3
280 #include <pmmintrin.h>
281 
282 static inline void
283 volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
284  unsigned int num_points)
285 {
286  unsigned int number = 0;
287  const unsigned int quarterPoints = num_points / 4;
288 
289  const float* complexVectorPtr = (float*)complexVector;
290  float* magnitudeVectorPtr = magnitudeVector;
291 
292  __m128 cplxValue1, cplxValue2, result;
293  for(;number < quarterPoints; number++){
294  cplxValue1 = _mm_load_ps(complexVectorPtr);
295  complexVectorPtr += 4;
296 
297  cplxValue2 = _mm_load_ps(complexVectorPtr);
298  complexVectorPtr += 4;
299 
300  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
301  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
302 
303  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
304 
305  result = _mm_sqrt_ps(result);
306 
307  _mm_store_ps(magnitudeVectorPtr, result);
308  magnitudeVectorPtr += 4;
309  }
310 
311  number = quarterPoints * 4;
312  for(; number < num_points; number++){
313  float val1Real = *complexVectorPtr++;
314  float val1Imag = *complexVectorPtr++;
315  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
316  }
317 }
318 #endif /* LV_HAVE_SSE3 */
319 
320 #ifdef LV_HAVE_SSE
321 #include <xmmintrin.h>
322 
323 static inline void
324 volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector,
325  unsigned int num_points)
326 {
327  unsigned int number = 0;
328  const unsigned int quarterPoints = num_points / 4;
329 
330  const float* complexVectorPtr = (float*)complexVector;
331  float* magnitudeVectorPtr = magnitudeVector;
332 
333  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
334  for(;number < quarterPoints; number++){
335  cplxValue1 = _mm_load_ps(complexVectorPtr);
336  complexVectorPtr += 4;
337 
338  cplxValue2 = _mm_load_ps(complexVectorPtr);
339  complexVectorPtr += 4;
340 
341  // Arrange in i1i2i3i4 format
342  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
343  // Arrange in q1q2q3q4 format
344  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
345 
346  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
347  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
348 
349  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
350 
351  result = _mm_sqrt_ps(result);
352 
353  _mm_store_ps(magnitudeVectorPtr, result);
354  magnitudeVectorPtr += 4;
355  }
356 
357  number = quarterPoints * 4;
358  for(; number < num_points; number++){
359  float val1Real = *complexVectorPtr++;
360  float val1Imag = *complexVectorPtr++;
361  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
362  }
363 }
364 #endif /* LV_HAVE_SSE */
365 
366 
367 #ifdef LV_HAVE_GENERIC
368 
369 static inline void
370 volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector,
371  unsigned int num_points)
372 {
373  const float* complexVectorPtr = (float*)complexVector;
374  float* magnitudeVectorPtr = magnitudeVector;
375  unsigned int number = 0;
376  for(number = 0; number < num_points; number++){
377  const float real = *complexVectorPtr++;
378  const float imag = *complexVectorPtr++;
379  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
380  }
381 }
382 #endif /* LV_HAVE_GENERIC */
383 
384 
385 #ifdef LV_HAVE_NEON
386 #include <arm_neon.h>
387 
388 static inline void
389 volk_32fc_magnitude_32f_neon(float* magnitudeVector, const lv_32fc_t* complexVector,
390  unsigned int num_points)
391 {
392  unsigned int number;
393  unsigned int quarter_points = num_points / 4;
394  const float* complexVectorPtr = (float*)complexVector;
395  float* magnitudeVectorPtr = magnitudeVector;
396 
397  float32x4x2_t complex_vec;
398  float32x4_t magnitude_vec;
399  for(number = 0; number < quarter_points; number++){
400  complex_vec = vld2q_f32(complexVectorPtr);
401  complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
402  magnitude_vec = vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
403  magnitude_vec = vrsqrteq_f32(magnitude_vec);
404  magnitude_vec = vrecpeq_f32( magnitude_vec ); // no plain ol' sqrt
405  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
406 
407  complexVectorPtr += 8;
408  magnitudeVectorPtr += 4;
409  }
410 
411  for(number = quarter_points*4; number < num_points; number++){
412  const float real = *complexVectorPtr++;
413  const float imag = *complexVectorPtr++;
414  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
415  }
416 }
417 #endif /* LV_HAVE_NEON */
418 
419 
420 #ifdef LV_HAVE_NEON
421 /*!
422  \brief Calculates the magnitude of the complexVector and stores the results in the magnitudeVector
423 
424  This is an approximation from "Streamlining Digital Signal Processing" by
425  Richard Lyons. Apparently max error is about 1% and mean error is about 0.6%.
426  The basic idea is to do a weighted sum of the abs. value of imag and real parts
427  where weight A is always assigned to max(imag, real) and B is always min(imag,real).
428  There are two pairs of cofficients chosen based on whether min <= 0.4142 max.
429  This method is called equiripple-error magnitude estimation proposed by Filip in '73
430 
431  \param complexVector The vector containing the complex input values
432  \param magnitudeVector The vector containing the real output values
433  \param num_points The number of complex values in complexVector to be calculated and stored into cVector
434 */
435 static inline void
436 volk_32fc_magnitude_32f_neon_fancy_sweet(float* magnitudeVector, const lv_32fc_t* complexVector,
437  unsigned int num_points)
438 {
439  unsigned int number;
440  unsigned int quarter_points = num_points / 4;
441  const float* complexVectorPtr = (float*)complexVector;
442  float* magnitudeVectorPtr = magnitudeVector;
443 
444  const float threshold = 0.4142135;
445 
446  float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
447  a_high = vdupq_n_f32( 0.84 );
448  b_high = vdupq_n_f32( 0.561);
449  a_low = vdupq_n_f32( 0.99 );
450  b_low = vdupq_n_f32( 0.197);
451 
452  uint32x4_t comp0, comp1;
453 
454  float32x4x2_t complex_vec;
455  float32x4_t min_vec, max_vec, magnitude_vec;
456  float32x4_t real_abs, imag_abs;
457  for(number = 0; number < quarter_points; number++){
458  complex_vec = vld2q_f32(complexVectorPtr);
459 
460  real_abs = vabsq_f32(complex_vec.val[0]);
461  imag_abs = vabsq_f32(complex_vec.val[1]);
462 
463  min_vec = vminq_f32(real_abs, imag_abs);
464  max_vec = vmaxq_f32(real_abs, imag_abs);
465 
466  // effective branch to choose coefficient pair.
467  comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
468  comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
469 
470  // and 0s or 1s with coefficients from previous effective branch
471  a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
472  vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
473  b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
474  vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
475 
476  // coefficients chosen, do the weighted sum
477  min_vec = vmulq_f32(min_vec, b_vec);
478  max_vec = vmulq_f32(max_vec, a_vec);
479 
480  magnitude_vec = vaddq_f32(min_vec, max_vec);
481  vst1q_f32(magnitudeVectorPtr, magnitude_vec);
482 
483  complexVectorPtr += 8;
484  magnitudeVectorPtr += 4;
485  }
486 
487  for(number = quarter_points*4; number < num_points; number++){
488  const float real = *complexVectorPtr++;
489  const float imag = *complexVectorPtr++;
490  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
491  }
492 }
493 #endif /* LV_HAVE_NEON */
494 
495 
496 #ifdef LV_HAVE_ORC
497 
498 extern void
499 volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_32fc_t* complexVector,
500  unsigned int num_points);
501 
502 static inline void
503 volk_32fc_magnitude_32f_u_orc(float* magnitudeVector, const lv_32fc_t* complexVector,
504  unsigned int num_points)
505 {
506  volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
507 }
508 #endif /* LV_HAVE_ORC */
509 
510 
511 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56