GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_s32f_stddev_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_s32f_stddev_32f
25  *
26  * \b Overview
27  *
28  * Computes the standard deviation of the input buffer using the supplied mean.
29  *
30  * <b>Dispatcher Prototype</b>
31  * \code
32  * void volk_32f_s32f_stddev_32f(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points)
33  * \endcode
34  *
35  * \b Inputs
36  * \li inputBuffer: The input vector of floats.
37  * \li mean: The mean of the input buffer.
38  * \li num_points: The number of data points.
39  *
40  * \b Outputs
41  * \li stddev: The output vector.
42  *
43  * \b Example
44  * Calculate the standard deviation from numbers generated with c++11's normal generator
45  * \code
46  * int N = 1000;
47  * unsigned int alignment = volk_get_alignment();
48  * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
49  * float mean = 0.0f;
50  * float* stddev = (float*)volk_malloc(sizeof(float), alignment);
51  *
52  * // Use a normal generator with 0 mean, stddev = 1
53  * std::default_random_engine generator;
54  * std::normal_distribution<float> distribution(mean,1);
55  *
56  * for(unsigned int ii = 0; ii < N; ++ii){
57  * increasing[ii] = distribution(generator);
58  * }
59  *
60  * volk_32f_s32f_power_32f(stddev, increasing, mean, N);
61  *
62  * printf("std. dev. = %f\n", *stddev);
63  *
64  * volk_free(increasing);
65  * \endcode
66  */
67 
68 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H
69 #define INCLUDED_volk_32f_s32f_stddev_32f_a_H
70 
71 #include <volk/volk_common.h>
72 #include <inttypes.h>
73 #include <stdio.h>
74 #include <math.h>
75 
76 #ifdef LV_HAVE_SSE4_1
77 #include <smmintrin.h>
78 
79 static inline void
80 volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const float* inputBuffer,
81  const float mean, unsigned int num_points)
82 {
83  float returnValue = 0;
84  if(num_points > 0){
85  unsigned int number = 0;
86  const unsigned int sixteenthPoints = num_points / 16;
87 
88  const float* aPtr = inputBuffer;
89 
90  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
91 
92  __m128 squareAccumulator = _mm_setzero_ps();
93  __m128 aVal1, aVal2, aVal3, aVal4;
94  __m128 cVal1, cVal2, cVal3, cVal4;
95  for(;number < sixteenthPoints; number++) {
96  aVal1 = _mm_load_ps(aPtr); aPtr += 4;
97  cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
98 
99  aVal2 = _mm_load_ps(aPtr); aPtr += 4;
100  cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
101 
102  aVal3 = _mm_load_ps(aPtr); aPtr += 4;
103  cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
104 
105  aVal4 = _mm_load_ps(aPtr); aPtr += 4;
106  cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
107 
108  cVal1 = _mm_or_ps(cVal1, cVal2);
109  cVal3 = _mm_or_ps(cVal3, cVal4);
110  cVal1 = _mm_or_ps(cVal1, cVal3);
111 
112  squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
113  }
114  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
115  returnValue = squareBuffer[0];
116  returnValue += squareBuffer[1];
117  returnValue += squareBuffer[2];
118  returnValue += squareBuffer[3];
119 
120  number = sixteenthPoints * 16;
121  for(;number < num_points; number++){
122  returnValue += (*aPtr) * (*aPtr);
123  aPtr++;
124  }
125  returnValue /= num_points;
126  returnValue -= (mean * mean);
127  returnValue = sqrtf(returnValue);
128  }
129  *stddev = returnValue;
130 }
131 
132 #endif /* LV_HAVE_SSE4_1 */
133 
134 
135 #ifdef LV_HAVE_SSE
136 #include <xmmintrin.h>
137 
138 static inline void
139 volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* inputBuffer,
140  const float mean, unsigned int num_points)
141 {
142  float returnValue = 0;
143  if(num_points > 0){
144  unsigned int number = 0;
145  const unsigned int quarterPoints = num_points / 4;
146 
147  const float* aPtr = inputBuffer;
148 
149  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
150 
151  __m128 squareAccumulator = _mm_setzero_ps();
152  __m128 aVal = _mm_setzero_ps();
153  for(;number < quarterPoints; number++) {
154  aVal = _mm_load_ps(aPtr); // aVal = x
155  aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
156  squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
157  aPtr += 4;
158  }
159  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
160  returnValue = squareBuffer[0];
161  returnValue += squareBuffer[1];
162  returnValue += squareBuffer[2];
163  returnValue += squareBuffer[3];
164 
165  number = quarterPoints * 4;
166  for(;number < num_points; number++){
167  returnValue += (*aPtr) * (*aPtr);
168  aPtr++;
169  }
170  returnValue /= num_points;
171  returnValue -= (mean * mean);
172  returnValue = sqrtf(returnValue);
173  }
174  *stddev = returnValue;
175 }
176 #endif /* LV_HAVE_SSE */
177 
178 
179 #ifdef LV_HAVE_GENERIC
180 
181 static inline void
182 volk_32f_s32f_stddev_32f_generic(float* stddev, const float* inputBuffer,
183  const float mean, unsigned int num_points)
184 {
185  float returnValue = 0;
186  if(num_points > 0){
187  const float* aPtr = inputBuffer;
188  unsigned int number = 0;
189 
190  for(number = 0; number < num_points; number++){
191  returnValue += (*aPtr) * (*aPtr);
192  aPtr++;
193  }
194 
195  returnValue /= num_points;
196  returnValue -= (mean * mean);
197  returnValue = sqrtf(returnValue);
198  }
199  *stddev = returnValue;
200 }
201 
202 #endif /* LV_HAVE_GENERIC */
203 
204 
205 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27