GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_16ic_s32f_magnitude_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_16ic_s32f_magnitude_32f
25  *
26  * \b Overview
27  *
28  * Computes the magnitude of the complexVector and stores the results
29  * in the magnitudeVector as a scaled floating point number.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_16ic_s32f_magnitude_32f(float* magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li complexVector: The complex input vector of complex 16-bit shorts.
38  * \li scalar: The value to be divided against each sample of the input complex vector.
39  * \li num_points: The number of samples.
40  *
41  * \b Outputs
42  * \li magnitudeVector: The magnitude of the complex values.
43  *
44  * \b Example
45  * \code
46  * int N = 10000;
47  *
48  * volk_16ic_s32f_magnitude_32f();
49  *
50  * volk_free(x);
51  * volk_free(t);
52  * \endcode
53  */
54 
55 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
56 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
57 
58 #include <volk/volk_common.h>
59 #include <inttypes.h>
60 #include <stdio.h>
61 #include <math.h>
62 
63 #ifdef LV_HAVE_SSE3
64 #include <pmmintrin.h>
65 
66 static inline void
67 volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, const lv_16sc_t* complexVector,
68  const float scalar, unsigned int num_points)
69 {
70  unsigned int number = 0;
71  const unsigned int quarterPoints = num_points / 4;
72 
73  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
74  float* magnitudeVectorPtr = magnitudeVector;
75 
76  __m128 invScalar = _mm_set_ps1(1.0/scalar);
77 
78  __m128 cplxValue1, cplxValue2, result;
79 
80  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
81 
82  for(;number < quarterPoints; number++){
83 
84  inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
85  inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
86  inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
87  inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
88 
89  inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
90  inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
91  inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
92  inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
93 
94  cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
95  cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
96 
97  complexVectorPtr += 8;
98 
99  cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
100  cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
101 
102  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
103  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
104 
105  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
106 
107  result = _mm_sqrt_ps(result); // Square root the values
108 
109  _mm_store_ps(magnitudeVectorPtr, result);
110 
111  magnitudeVectorPtr += 4;
112  }
113 
114  number = quarterPoints * 4;
115  magnitudeVectorPtr = &magnitudeVector[number];
116  complexVectorPtr = (const int16_t*)&complexVector[number];
117  for(; number < num_points; number++){
118  float val1Real = (float)(*complexVectorPtr++) / scalar;
119  float val1Imag = (float)(*complexVectorPtr++) / scalar;
120  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
121  }
122 }
123 #endif /* LV_HAVE_SSE3 */
124 
125 #ifdef LV_HAVE_SSE
126 #include <xmmintrin.h>
127 
128 static inline void
129 volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, const lv_16sc_t* complexVector,
130  const float scalar, unsigned int num_points)
131 {
132  unsigned int number = 0;
133  const unsigned int quarterPoints = num_points / 4;
134 
135  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
136  float* magnitudeVectorPtr = magnitudeVector;
137 
138  const float iScalar = 1.0 / scalar;
139  __m128 invScalar = _mm_set_ps1(iScalar);
140 
141  __m128 cplxValue1, cplxValue2, result, re, im;
142 
143  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
144 
145  for(;number < quarterPoints; number++){
146  inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
147  inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
148  inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
149  inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
150 
151  inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
152  inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
153  inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
154  inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
155 
156  cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
157  cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
158 
159  re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
160  im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
161 
162  complexVectorPtr += 8;
163 
164  cplxValue1 = _mm_mul_ps(re, invScalar);
165  cplxValue2 = _mm_mul_ps(im, invScalar);
166 
167  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
168  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
169 
170  result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
171 
172  result = _mm_sqrt_ps(result); // Square root the values
173 
174  _mm_store_ps(magnitudeVectorPtr, result);
175 
176  magnitudeVectorPtr += 4;
177  }
178 
179  number = quarterPoints * 4;
180  magnitudeVectorPtr = &magnitudeVector[number];
181  complexVectorPtr = (const int16_t*)&complexVector[number];
182  for(; number < num_points; number++){
183  float val1Real = (float)(*complexVectorPtr++) * iScalar;
184  float val1Imag = (float)(*complexVectorPtr++) * iScalar;
185  *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
186  }
187 }
188 
189 
190 #endif /* LV_HAVE_SSE */
191 
192 #ifdef LV_HAVE_GENERIC
193 
194 static inline void
195 volk_16ic_s32f_magnitude_32f_generic(float* magnitudeVector, const lv_16sc_t* complexVector,
196  const float scalar, unsigned int num_points)
197 {
198  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
199  float* magnitudeVectorPtr = magnitudeVector;
200  unsigned int number = 0;
201  const float invScalar = 1.0 / scalar;
202  for(number = 0; number < num_points; number++){
203  float real = ( (float) (*complexVectorPtr++)) * invScalar;
204  float imag = ( (float) (*complexVectorPtr++)) * invScalar;
205  *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
206  }
207 }
208 #endif /* LV_HAVE_GENERIC */
209 
210 #ifdef LV_HAVE_ORC_DISABLED
211 
212 extern void
213 volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector, const lv_16sc_t* complexVector,
214  const float scalar, unsigned int num_points);
215 
216 static inline void
217 volk_16ic_s32f_magnitude_32f_u_orc(float* magnitudeVector, const lv_16sc_t* complexVector,
218  const float scalar, unsigned int num_points)
219 {
220  volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
221 }
222 #endif /* LV_HAVE_ORC */
223 
224 
225 #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27