65 #ifndef INCLUDED_volk_32f_sqrt_32f_a_H
66 #define INCLUDED_volk_32f_sqrt_32f_a_H
73 #include <xmmintrin.h>
76 volk_32f_sqrt_32f_a_sse(
float* cVector,
const float* aVector,
unsigned int num_points)
78 unsigned int number = 0;
79 const unsigned int quarterPoints = num_points / 4;
81 float* cPtr = cVector;
82 const float* aPtr = aVector;
85 for(;number < quarterPoints; number++) {
86 aVal = _mm_load_ps(aPtr);
88 cVal = _mm_sqrt_ps(aVal);
90 _mm_store_ps(cPtr,cVal);
96 number = quarterPoints * 4;
97 for(;number < num_points; number++) {
98 *cPtr++ = sqrtf(*aPtr++);
106 #include <arm_neon.h>
109 volk_32f_sqrt_32f_neon(
float* cVector,
const float* aVector,
unsigned int num_points)
111 float* cPtr = cVector;
112 const float* aPtr = aVector;
113 unsigned int number = 0;
114 unsigned int quarter_points = num_points / 4;
115 float32x4_t in_vec, out_vec;
117 for(number = 0; number < quarter_points; number++) {
118 in_vec = vld1q_f32(aPtr);
120 out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec) );
121 vst1q_f32(cPtr, out_vec);
126 for(number = quarter_points * 4; number < num_points; number++) {
127 *cPtr++ = sqrtf(*aPtr++);
134 #ifdef LV_HAVE_GENERIC
137 volk_32f_sqrt_32f_generic(
float* cVector,
const float* aVector,
unsigned int num_points)
139 float* cPtr = cVector;
140 const float* aPtr = aVector;
141 unsigned int number = 0;
143 for(number = 0; number < num_points; number++) {
144 *cPtr++ = sqrtf(*aPtr++);
154 volk_32f_sqrt_32f_a_orc_impl(
float *,
const float*,
unsigned int);
157 volk_32f_sqrt_32f_u_orc(
float* cVector,
const float* aVector,
unsigned int num_points)
159 volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points);