GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32fc_s32f_atan2_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32fc_s32f_atan2_32f
25  *
26  * \b Overview
27  *
28  * Computes the arctan for each value in a complex vector and applies
29  * a normalization factor.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32fc_s32f_atan2_32f(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li inputVector: The byte-aligned input vector containing interleaved IQ data (I = cos, Q = sin).
38  * \li normalizeFactor: The atan results are divided by this normalization factor.
39  * \li num_points: The number of complex values in \p inputVector.
40  *
41  * \b Outputs
42  * \li outputVector: The vector where the results will be stored.
43  *
44  * \b Example
45  * Calculate the arctangent of points around the unit circle.
46  * \code
47  * int N = 10;
48  * unsigned int alignment = volk_get_alignment();
49  * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
50  * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
51  * float scale = 1.f; // we want unit circle
52  *
53  * for(unsigned int ii = 0; ii < N/2; ++ii){
54  * // Generate points around the unit circle
55  * float real = -4.f * ((float)ii / (float)N) + 1.f;
56  * float imag = std::sqrt(1.f - real * real);
57  * in[ii] = lv_cmake(real, imag);
58  * in[ii+N/2] = lv_cmake(-real, -imag);
59  * }
60  *
61  * volk_32fc_s32f_atan2_32f(out, in, scale, N);
62  *
63  * for(unsigned int ii = 0; ii < N; ++ii){
64  * printf("atan2(%1.2f, %1.2f) = %1.2f\n",
65  * lv_cimag(in[ii]), lv_creal(in[ii]), out[ii]);
66  * }
67  *
68  * volk_free(in);
69  * volk_free(out);
70  * \endcode
71  */
72 
73 
74 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
75 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
76 
77 #include <inttypes.h>
78 #include <stdio.h>
79 #include <math.h>
80 
81 #ifdef LV_HAVE_SSE4_1
82 #include <smmintrin.h>
83 
84 #ifdef LV_HAVE_LIB_SIMDMATH
85 #include <simdmath.h>
86 #endif /* LV_HAVE_LIB_SIMDMATH */
87 
88 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
89  const float* complexVectorPtr = (float*)complexVector;
90  float* outPtr = outputVector;
91 
92  unsigned int number = 0;
93  const float invNormalizeFactor = 1.0 / normalizeFactor;
94 
95 #ifdef LV_HAVE_LIB_SIMDMATH
96  const unsigned int quarterPoints = num_points / 4;
97  __m128 testVector = _mm_set_ps1(2*M_PI);
98  __m128 correctVector = _mm_set_ps1(M_PI);
99  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
100  __m128 phase;
101  __m128 complex1, complex2, iValue, qValue;
102  __m128 keepMask;
103 
104  for (; number < quarterPoints; number++) {
105  // Load IQ data:
106  complex1 = _mm_load_ps(complexVectorPtr);
107  complexVectorPtr += 4;
108  complex2 = _mm_load_ps(complexVectorPtr);
109  complexVectorPtr += 4;
110  // Deinterleave IQ data:
111  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
112  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
113  // Arctan to get phase:
114  phase = atan2f4(qValue, iValue);
115  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
116  // Compare to 2pi:
117  keepMask = _mm_cmpneq_ps(phase,testVector);
118  phase = _mm_blendv_ps(correctVector, phase, keepMask);
119  // done with above correction.
120  phase = _mm_mul_ps(phase, vNormalizeFactor);
121  _mm_store_ps((float*)outPtr, phase);
122  outPtr += 4;
123  }
124  number = quarterPoints * 4;
125 #endif /* LV_HAVE_SIMDMATH_H */
126 
127  for (; number < num_points; number++) {
128  const float real = *complexVectorPtr++;
129  const float imag = *complexVectorPtr++;
130  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
131  }
132 }
133 #endif /* LV_HAVE_SSE4_1 */
134 
135 
136 #ifdef LV_HAVE_SSE
137 #include <xmmintrin.h>
138 
139 #ifdef LV_HAVE_LIB_SIMDMATH
140 #include <simdmath.h>
141 #endif /* LV_HAVE_LIB_SIMDMATH */
142 
143 static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector, const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
144  const float* complexVectorPtr = (float*)complexVector;
145  float* outPtr = outputVector;
146 
147  unsigned int number = 0;
148  const float invNormalizeFactor = 1.0 / normalizeFactor;
149 
150 #ifdef LV_HAVE_LIB_SIMDMATH
151  const unsigned int quarterPoints = num_points / 4;
152  __m128 testVector = _mm_set_ps1(2*M_PI);
153  __m128 correctVector = _mm_set_ps1(M_PI);
154  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
155  __m128 phase;
156  __m128 complex1, complex2, iValue, qValue;
157  __m128 mask;
158  __m128 keepMask;
159 
160  for (; number < quarterPoints; number++) {
161  // Load IQ data:
162  complex1 = _mm_load_ps(complexVectorPtr);
163  complexVectorPtr += 4;
164  complex2 = _mm_load_ps(complexVectorPtr);
165  complexVectorPtr += 4;
166  // Deinterleave IQ data:
167  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
168  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
169  // Arctan to get phase:
170  phase = atan2f4(qValue, iValue);
171  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
172  // Compare to 2pi:
173  keepMask = _mm_cmpneq_ps(phase,testVector);
174  phase = _mm_and_ps(phase, keepMask);
175  mask = _mm_andnot_ps(keepMask, correctVector);
176  phase = _mm_or_ps(phase, mask);
177  // done with above correction.
178  phase = _mm_mul_ps(phase, vNormalizeFactor);
179  _mm_store_ps((float*)outPtr, phase);
180  outPtr += 4;
181  }
182  number = quarterPoints * 4;
183 #endif /* LV_HAVE_SIMDMATH_H */
184 
185  for (; number < num_points; number++) {
186  const float real = *complexVectorPtr++;
187  const float imag = *complexVectorPtr++;
188  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
189  }
190 }
191 #endif /* LV_HAVE_SSE */
192 
193 #ifdef LV_HAVE_GENERIC
194 
195 static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector, const lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
196  float* outPtr = outputVector;
197  const float* inPtr = (float*)inputVector;
198  const float invNormalizeFactor = 1.0 / normalizeFactor;
199  unsigned int number;
200  for ( number = 0; number < num_points; number++) {
201  const float real = *inPtr++;
202  const float imag = *inPtr++;
203  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
204  }
205 }
206 #endif /* LV_HAVE_GENERIC */
207 
208 
209 
210 
211 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
#define M_PI
Definition: volk/cmake/msvc/config.h:42
float complex lv_32fc_t
Definition: volk_complex.h:56