56 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
57 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
64 #include <smmintrin.h>
67 volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(
lv_32fc_t* cVector,
const lv_8sc_t* aVector,
68 const lv_8sc_t* bVector,
const float scalar,
69 unsigned int num_points)
71 unsigned int number = 0;
72 const unsigned int quarterPoints = num_points / 4;
74 __m128i x, y, realz, imagz;
79 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
81 __m128 invScalar = _mm_set_ps1(1.0/scalar);
83 for(;number < quarterPoints; number++){
85 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
86 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
89 realz = _mm_madd_epi16(x,y);
92 y = _mm_sign_epi16(y, conjugateSign);
95 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
98 imagz = _mm_madd_epi16(x,y);
101 ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
104 ret = _mm_mul_ps(ret, invScalar);
107 _mm_store_ps((
float*)c, ret);
111 ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
114 ret = _mm_mul_ps(ret, invScalar);
117 _mm_store_ps((
float*)c, ret);
124 number = quarterPoints * 4;
125 float* cFloatPtr = (
float*)&cVector[number];
128 for(; number < num_points; number++){
129 float aReal = (float)*a8Ptr++;
130 float aImag = (float)*a8Ptr++;
132 float bReal = (float)*b8Ptr++;
133 float bImag = (float)*b8Ptr++;
137 *cFloatPtr++ =
lv_creal(temp) / scalar;
138 *cFloatPtr++ =
lv_cimag(temp) / scalar;
144 #ifdef LV_HAVE_GENERIC
147 volk_8ic_x2_s32f_multiply_conjugate_32fc_generic(
lv_32fc_t* cVector,
const lv_8sc_t* aVector,
148 const lv_8sc_t* bVector,
const float scalar,
149 unsigned int num_points)
151 unsigned int number = 0;
152 float* cPtr = (
float*)cVector;
153 const float invScalar = 1.0 / scalar;
156 for(number = 0; number < num_points; number++){
157 float aReal = (float)*a8Ptr++;
158 float aImag = (float)*a8Ptr++;
160 float bReal = (float)*b8Ptr++;
161 float bImag = (float)*b8Ptr++;
165 *cPtr++ = (
lv_creal(temp) * invScalar);
166 *cPtr++ = (
lv_cimag(temp) * invScalar);
#define lv_cmake(r, i)
Definition: volk_complex.h:59
signed char int8_t
Definition: stdint.h:75
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52
#define lv_cimag(x)
Definition: volk_complex.h:78