54 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
55 #define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
61 #include <smmintrin.h>
64 volk_8ic_deinterleave_16i_x2_a_sse4_1(
int16_t* iBuffer,
int16_t* qBuffer,
65 const lv_8sc_t* complexVector,
unsigned int num_points)
67 unsigned int number = 0;
71 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
72 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
73 __m128i complexVal, iOutputVal, qOutputVal;
75 unsigned int eighthPoints = num_points / 8;
77 for(number = 0; number < eighthPoints; number++){
78 complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
80 iOutputVal = _mm_shuffle_epi8(complexVal, iMoveMask);
81 qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
83 iOutputVal = _mm_cvtepi8_epi16(iOutputVal);
84 iOutputVal = _mm_slli_epi16(iOutputVal, 8);
86 qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
87 qOutputVal = _mm_slli_epi16(qOutputVal, 8);
89 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
90 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
96 number = eighthPoints * 8;
97 for(; number < num_points; number++){
98 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
99 *qBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
106 #include <immintrin.h>
109 volk_8ic_deinterleave_16i_x2_a_avx(
int16_t* iBuffer,
int16_t* qBuffer,
110 const lv_8sc_t* complexVector,
unsigned int num_points)
112 unsigned int number = 0;
113 const int8_t* complexVectorPtr = (
int8_t*)complexVector;
116 __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
117 __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
118 __m256i complexVal, iOutputVal, qOutputVal;
119 __m128i complexVal1, complexVal0;
120 __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
122 unsigned int sixteenthPoints = num_points / 16;
124 for(number = 0; number < sixteenthPoints; number++){
125 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32;
128 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
129 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
131 iOutputVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask);
132 iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
133 qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
134 qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
136 iOutputVal1 = _mm_cvtepi8_epi16(iOutputVal1);
137 iOutputVal1 = _mm_slli_epi16(iOutputVal1, 8);
138 iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
139 iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
141 qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
142 qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
143 qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
144 qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
147 __m256i dummy = _mm256_setzero_si256();
148 iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
149 iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
150 qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
151 qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
153 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
154 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
160 number = sixteenthPoints * 16;
161 for(; number < num_points; number++){
162 *iBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
163 *qBufferPtr++ = ((
int16_t)*complexVectorPtr++) * 256;
169 #ifdef LV_HAVE_GENERIC
172 volk_8ic_deinterleave_16i_x2_generic(
int16_t* iBuffer,
int16_t* qBuffer,
173 const lv_8sc_t* complexVector,
unsigned int num_points)
175 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
179 for(number = 0; number < num_points; number++){
180 *iBufferPtr++ = (
int16_t)(*complexVectorPtr++)*256;
181 *qBufferPtr++ = (
int16_t)(*complexVectorPtr++)*256;
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52