GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32fc_x2_square_dist_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32fc_x2_square_dist_32f
25  *
26  * \b Overview
27  *
28  * Calculates the square distance between a single complex input for each
29  * point in a complex vector.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32fc_x2_square_dist_32f(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_points) {
34  * \endcode
35  *
36  * \b Inputs
37  * \li src0: The complex input. Only the first point is used.
38  * \li points: A complex vector of reference points.
39  * \li num_points: The number of data points.
40  *
41  * \b Outputs
42  * \li target: A vector of distances between src0 and the vector of points.
43  *
44  * \b Example
45  * Calculate the distance between an input and reference points in a square
46  * 16-qam constellation.
47  * \code
48  * int N = 16;
49  * unsigned int alignment = volk_get_alignment();
50  * lv_32fc_t* constellation = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
51  * lv_32fc_t* rx = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
52  * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
53  * float const_vals[] = {-3, -1, 1, 3};
54  *
55  * // Generate 16-QAM constellation points
56  * unsigned int jj = 0;
57  * for(unsigned int ii = 0; ii < N; ++ii){
58  * constellation[ii] = lv_cmake(const_vals[ii%4], const_vals[jj]);
59  * if((ii+1)%4 == 0) ++jj;
60  * }
61  *
62  * *rx = lv_cmake(0.5f, 2.f);
63  *
64  * volk_32fc_x2_square_dist_32f(out, rx, constellation, N);
65  *
66  * printf("Distance from each constellation point:\n");
67  * for(unsigned int ii = 0; ii < N; ++ii){
68  * printf("%.4f ", out[ii]);
69  * if((ii+1)%4 == 0) printf("\n");
70  * }
71  *
72  * volk_free(rx);
73  * volk_free(constellation);
74  * volk_free(out);
75  * \endcode
76  */
77 
78 #ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a_H
79 #define INCLUDED_volk_32fc_x2_square_dist_32f_a_H
80 
81 #include<inttypes.h>
82 #include<stdio.h>
83 #include<volk/volk_complex.h>
84 
85 #ifdef LV_HAVE_SSE3
86 #include<xmmintrin.h>
87 #include<pmmintrin.h>
88 
89 static inline void
90 volk_32fc_x2_square_dist_32f_a_sse3(float* target, lv_32fc_t* src0, lv_32fc_t* points,
91  unsigned int num_points)
92 {
93  const unsigned int num_bytes = num_points*8;
94 
95  __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
96 
97  lv_32fc_t diff;
98  float sq_dist;
99  int bound = num_bytes >> 5;
100  int leftovers0 = (num_bytes >> 4) & 1;
101  int leftovers1 = (num_bytes >> 3) & 1;
102  int i = 0;
103 
104  xmm1 = _mm_setzero_ps();
105  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);
106  xmm2 = _mm_load_ps((float*)&points[0]);
107  xmm1 = _mm_movelh_ps(xmm1, xmm1);
108  xmm3 = _mm_load_ps((float*)&points[2]);
109 
110  for(; i < bound - 1; ++i) {
111  xmm4 = _mm_sub_ps(xmm1, xmm2);
112  xmm5 = _mm_sub_ps(xmm1, xmm3);
113  points += 4;
114  xmm6 = _mm_mul_ps(xmm4, xmm4);
115  xmm7 = _mm_mul_ps(xmm5, xmm5);
116 
117  xmm2 = _mm_load_ps((float*)&points[0]);
118 
119  xmm4 = _mm_hadd_ps(xmm6, xmm7);
120 
121  xmm3 = _mm_load_ps((float*)&points[2]);
122 
123  _mm_store_ps(target, xmm4);
124 
125  target += 4;
126  }
127 
128  xmm4 = _mm_sub_ps(xmm1, xmm2);
129  xmm5 = _mm_sub_ps(xmm1, xmm3);
130 
131  points += 4;
132  xmm6 = _mm_mul_ps(xmm4, xmm4);
133  xmm7 = _mm_mul_ps(xmm5, xmm5);
134 
135  xmm4 = _mm_hadd_ps(xmm6, xmm7);
136 
137  _mm_store_ps(target, xmm4);
138 
139  target += 4;
140 
141  for(i = 0; i < leftovers0; ++i) {
142 
143  xmm2 = _mm_load_ps((float*)&points[0]);
144 
145  xmm4 = _mm_sub_ps(xmm1, xmm2);
146 
147  points += 2;
148 
149  xmm6 = _mm_mul_ps(xmm4, xmm4);
150 
151  xmm4 = _mm_hadd_ps(xmm6, xmm6);
152 
153  _mm_storeh_pi((__m64*)target, xmm4);
154 
155  target += 2;
156  }
157 
158  for(i = 0; i < leftovers1; ++i) {
159 
160  diff = src0[0] - points[0];
161 
162  sq_dist = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff);
163 
164  target[0] = sq_dist;
165  }
166 }
167 
168 #endif /*LV_HAVE_SSE3*/
169 
170 
171 #ifdef LV_HAVE_NEON
172 #include <arm_neon.h>
173 static inline void
174 volk_32fc_x2_square_dist_32f_neon(float* target, lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_points)
175 {
176  const unsigned int quarter_points = num_points / 4;
177  unsigned int number;
178 
179  float32x4x2_t a_vec, b_vec;
180  float32x4x2_t diff_vec;
181  float32x4_t tmp, tmp1, dist_sq;
182  a_vec.val[0] = vdupq_n_f32( lv_creal(src0[0]) );
183  a_vec.val[1] = vdupq_n_f32( lv_cimag(src0[0]) );
184  for(number=0; number < quarter_points; ++number) {
185  b_vec = vld2q_f32((float*)points);
186  diff_vec.val[0] = vsubq_f32(a_vec.val[0], b_vec.val[0]);
187  diff_vec.val[1] = vsubq_f32(a_vec.val[1], b_vec.val[1]);
188  tmp = vmulq_f32(diff_vec.val[0], diff_vec.val[0]);
189  tmp1 = vmulq_f32(diff_vec.val[1], diff_vec.val[1]);
190 
191  dist_sq = vaddq_f32(tmp, tmp1);
192  vst1q_f32(target, dist_sq);
193  points += 4;
194  target += 4;
195  }
196  for(number=quarter_points*4; number < num_points; ++number) {
197  lv_32fc_t diff = src0[0] - *points++;
198  *target++ = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff);
199  }
200 }
201 #endif /* LV_HAVE_NEON */
202 
203 
204 #ifdef LV_HAVE_GENERIC
205 static inline void
206 volk_32fc_x2_square_dist_32f_generic(float* target, lv_32fc_t* src0, lv_32fc_t* points,
207  unsigned int num_points)
208 {
209  const unsigned int num_bytes = num_points*8;
210 
211  lv_32fc_t diff;
212  float sq_dist;
213  unsigned int i = 0;
214 
215  for(; i < num_bytes >> 3; ++i) {
216  diff = src0[0] - points[i];
217 
218  sq_dist = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * lv_cimag(diff);
219 
220  target[i] = sq_dist;
221  }
222 }
223 
224 #endif /*LV_HAVE_GENERIC*/
225 
226 
227 #endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a_H*/
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
#define lv_cimag(x)
Definition: volk_complex.h:78