69 #define Mln2 0.6931471805f
71 #define B 1065353216.0f
75 #ifndef INCLUDED_volk_32f_expfast_32f_a_H
76 #define INCLUDED_volk_32f_expfast_32f_a_H
80 #include <immintrin.h>
83 volk_32f_expfast_32f_a_avx(
float* bVector,
const float* aVector,
unsigned int num_points)
85 float* bPtr = bVector;
86 const float* aPtr = aVector;
88 unsigned int number = 0;
89 const unsigned int eighthPoints = num_points / 8;
91 __m256 aVal, bVal, a, b;
93 a = _mm256_set1_ps(
A/
Mln2);
94 b = _mm256_set1_ps(
B-
C);
96 for(;number < eighthPoints; number++){
97 aVal = _mm256_load_ps(aPtr);
98 exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
99 bVal = _mm256_castsi256_ps(exp);
101 _mm256_store_ps(bPtr, bVal);
106 number = eighthPoints * 8;
107 for(;number < num_points; number++){
108 *bPtr++ = expf(*aPtr++);
114 #ifdef LV_HAVE_SSE4_1
115 #include <smmintrin.h>
118 volk_32f_expfast_32f_a_sse4_1(
float* bVector,
const float* aVector,
unsigned int num_points)
120 float* bPtr = bVector;
121 const float* aPtr = aVector;
123 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
126 __m128 aVal, bVal, a, b;
128 a = _mm_set1_ps(
A/
Mln2);
129 b = _mm_set1_ps(
B-
C);
131 for(;number < quarterPoints; number++){
132 aVal = _mm_load_ps(aPtr);
133 exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
134 bVal = _mm_castsi128_ps(exp);
136 _mm_store_ps(bPtr, bVal);
141 number = quarterPoints * 4;
142 for(;number < num_points; number++){
143 *bPtr++ = expf(*aPtr++);
153 #ifndef INCLUDED_volk_32f_expfast_32f_u_H
154 #define INCLUDED_volk_32f_expfast_32f_u_H
157 #include <immintrin.h>
160 volk_32f_expfast_32f_u_avx(
float* bVector,
const float* aVector,
unsigned int num_points)
162 float* bPtr = bVector;
163 const float* aPtr = aVector;
165 unsigned int number = 0;
166 const unsigned int eighthPoints = num_points / 8;
168 __m256 aVal, bVal, a, b;
170 a = _mm256_set1_ps(
A/
Mln2);
171 b = _mm256_set1_ps(
B-
C);
173 for(;number < eighthPoints; number++){
174 aVal = _mm256_loadu_ps(aPtr);
175 exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
176 bVal = _mm256_castsi256_ps(exp);
178 _mm256_storeu_ps(bPtr, bVal);
183 number = eighthPoints * 8;
184 for(;number < num_points; number++){
185 *bPtr++ = expf(*aPtr++);
192 #ifdef LV_HAVE_SSE4_1
193 #include <smmintrin.h>
196 volk_32f_expfast_32f_u_sse4_1(
float* bVector,
const float* aVector,
unsigned int num_points)
198 float* bPtr = bVector;
199 const float* aPtr = aVector;
201 unsigned int number = 0;
202 const unsigned int quarterPoints = num_points / 4;
204 __m128 aVal, bVal, a, b;
206 a = _mm_set1_ps(
A/
Mln2);
207 b = _mm_set1_ps(
B-
C);
209 for(;number < quarterPoints; number++){
210 aVal = _mm_loadu_ps(aPtr);
211 exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
212 bVal = _mm_castsi128_ps(exp);
214 _mm_storeu_ps(bPtr, bVal);
219 number = quarterPoints * 4;
220 for(;number < num_points; number++){
221 *bPtr++ = expf(*aPtr++);
228 #ifdef LV_HAVE_GENERIC
231 volk_32f_expfast_32f_generic(
float* bVector,
const float* aVector,
unsigned int num_points)
233 float* bPtr = bVector;
234 const float* aPtr = aVector;
235 unsigned int number = 0;
237 for(number = 0; number < num_points; number++){
238 *bPtr++ = expf(*aPtr++);
#define B
Definition: volk_32f_expfast_32f.h:71
#define C
Definition: volk_32f_expfast_32f.h:72
#define Mln2
Definition: volk_32f_expfast_32f.h:69
#define A
Definition: volk_32f_expfast_32f.h:70