GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32fc_x2_s32f_square_dist_scalar_mult_32f
25  *
26  * \b Overview
27  *
28  * Calculates the square distance between a single complex input for each
29  * point in a complex vector scaled by a scalar value.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32fc_x2_s32f_square_dist_scalar_mult_32f(float* target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li src0: The complex input. Only the first point is used.
38  * \li points: A complex vector of reference points.
39  * \li scalar: A float to scale the distances by
40  * \li num_points: The number of data points.
41  *
42  * \b Outputs
43  * \li target: A vector of distances between src0 and the vector of points.
44  *
45  * \b Example
46  * Calculate the distance between an input and reference points in a square
47  * 16-qam constellation. Normalize distances by the area of the constellation.
48  * \code
49  * int N = 16;
50  * unsigned int alignment = volk_get_alignment();
51  * lv_32fc_t* constellation = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
52  * lv_32fc_t* rx = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
53  * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
54  * float const_vals[] = {-3, -1, 1, 3};
55  *
56  * unsigned int jj = 0;
57  * for(unsigned int ii = 0; ii < N; ++ii){
58  * constellation[ii] = lv_cmake(const_vals[ii%4], const_vals[jj]);
59  * if((ii+1)%4 == 0) ++jj;
60  * }
61  *
62  * *rx = lv_cmake(0.5f, 2.f);
63  * float scale = 1.f/64.f; // 1 / constellation area
64  *
65  * volk_32fc_x2_s32f_square_dist_scalar_mult_32f(out, rx, constellation, scale, N);
66  *
67  * printf("Distance from each constellation point:\n");
68  * for(unsigned int ii = 0; ii < N; ++ii){
69  * printf("%.4f ", out[ii]);
70  * if((ii+1)%4 == 0) printf("\n");
71  * }
72  *
73  * volk_free(rx);
74  * volk_free(constellation);
75  * volk_free(out);
76  * \endcode
77  */
78 
79 #ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H
80 #define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H
81 
82 #include<inttypes.h>
83 #include<stdio.h>
84 #include<volk/volk_complex.h>
85 #include <string.h>
86 
87 #ifdef LV_HAVE_SSE3
88 #include<xmmintrin.h>
89 #include<pmmintrin.h>
90 
91 static inline void
92 volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points,
93  float scalar, unsigned int num_points)
94 {
95  const unsigned int num_bytes = num_points*8;
96 
97  __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
98 
99  lv_32fc_t diff;
100  memset(&diff, 0x0, 2*sizeof(float));
101 
102  float sq_dist = 0.0;
103  int bound = num_bytes >> 5;
104  int leftovers0 = (num_bytes >> 4) & 1;
105  int leftovers1 = (num_bytes >> 3) & 1;
106  int i = 0;
107 
108  xmm1 = _mm_setzero_ps();
109  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);
110  xmm2 = _mm_load_ps((float*)&points[0]);
111  xmm8 = _mm_load1_ps(&scalar);
112  xmm1 = _mm_movelh_ps(xmm1, xmm1);
113  xmm3 = _mm_load_ps((float*)&points[2]);
114 
115  for(; i < bound - 1; ++i) {
116  xmm4 = _mm_sub_ps(xmm1, xmm2);
117  xmm5 = _mm_sub_ps(xmm1, xmm3);
118  points += 4;
119  xmm6 = _mm_mul_ps(xmm4, xmm4);
120  xmm7 = _mm_mul_ps(xmm5, xmm5);
121 
122  xmm2 = _mm_load_ps((float*)&points[0]);
123 
124  xmm4 = _mm_hadd_ps(xmm6, xmm7);
125 
126  xmm3 = _mm_load_ps((float*)&points[2]);
127 
128  xmm4 = _mm_mul_ps(xmm4, xmm8);
129 
130  _mm_store_ps(target, xmm4);
131 
132  target += 4;
133  }
134 
135  xmm4 = _mm_sub_ps(xmm1, xmm2);
136  xmm5 = _mm_sub_ps(xmm1, xmm3);
137 
138  points += 4;
139  xmm6 = _mm_mul_ps(xmm4, xmm4);
140  xmm7 = _mm_mul_ps(xmm5, xmm5);
141 
142  xmm4 = _mm_hadd_ps(xmm6, xmm7);
143 
144  xmm4 = _mm_mul_ps(xmm4, xmm8);
145 
146  _mm_store_ps(target, xmm4);
147 
148  target += 4;
149 
150  for(i = 0; i < leftovers0; ++i) {
151  xmm2 = _mm_load_ps((float*)&points[0]);
152 
153  xmm4 = _mm_sub_ps(xmm1, xmm2);
154 
155  points += 2;
156 
157  xmm6 = _mm_mul_ps(xmm4, xmm4);
158 
159  xmm4 = _mm_hadd_ps(xmm6, xmm6);
160 
161  xmm4 = _mm_mul_ps(xmm4, xmm8);
162 
163  _mm_storeh_pi((__m64*)target, xmm4);
164 
165  target += 2;
166  }
167 
168  for(i = 0; i < leftovers1; ++i) {
169 
170  diff = src0[0] - points[0];
171 
172  sq_dist = scalar * (lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff));
173 
174  target[0] = sq_dist;
175  }
176 }
177 
178 #endif /*LV_HAVE_SSE3*/
179 
180 
181 #ifdef LV_HAVE_GENERIC
182 static inline void
183 volk_32fc_x2_s32f_square_dist_scalar_mult_32f_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points,
184  float scalar, unsigned int num_points)
185 {
186  const unsigned int num_bytes = num_points*8;
187 
188  lv_32fc_t diff;
189  float sq_dist;
190  unsigned int i = 0;
191 
192  for(; i < num_bytes >> 3; ++i) {
193  diff = src0[0] - points[i];
194 
195  sq_dist = scalar * (lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff));
196 
197  target[i] = sq_dist;
198  }
199 }
200 
201 #endif /*LV_HAVE_GENERIC*/
202 
203 
204 #endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H*/
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
#define lv_cimag(x)
Definition: volk_complex.h:78