53 #ifndef INCLUDED_volk_16u_byteswap_u_H
54 #define INCLUDED_volk_16u_byteswap_u_H
60 #include <emmintrin.h>
62 static inline void volk_16u_byteswap_u_sse2(
uint16_t* intsToSwap,
unsigned int num_points){
63 unsigned int number = 0;
65 __m128i input, left, right, output;
67 const unsigned int eighthPoints = num_points / 8;
68 for(;number < eighthPoints; number++){
70 input = _mm_loadu_si128((__m128i*)inputPtr);
72 left = _mm_slli_epi16(input, 8);
73 right = _mm_srli_epi16(input, 8);
75 output = _mm_or_si128(left, right);
77 _mm_storeu_si128((__m128i*)inputPtr, output);
82 number = eighthPoints*8;
83 for(; number < num_points; number++){
85 outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
86 *inputPtr = outputVal;
92 #ifdef LV_HAVE_GENERIC
94 static inline void volk_16u_byteswap_generic(
uint16_t* intsToSwap,
unsigned int num_points){
97 for(point = 0; point < num_points; point++){
99 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
107 #ifndef INCLUDED_volk_16u_byteswap_a_H
108 #define INCLUDED_volk_16u_byteswap_a_H
114 #include <emmintrin.h>
116 static inline void volk_16u_byteswap_a_sse2(
uint16_t* intsToSwap,
unsigned int num_points){
117 unsigned int number = 0;
119 __m128i input, left, right, output;
121 const unsigned int eighthPoints = num_points / 8;
122 for(;number < eighthPoints; number++){
124 input = _mm_load_si128((__m128i*)inputPtr);
126 left = _mm_slli_epi16(input, 8);
127 right = _mm_srli_epi16(input, 8);
129 output = _mm_or_si128(left, right);
131 _mm_store_si128((__m128i*)inputPtr, output);
137 number = eighthPoints*8;
138 for(; number < num_points; number++){
140 outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
141 *inputPtr = outputVal;
148 #include <arm_neon.h>
150 static inline void volk_16u_byteswap_neon(
uint16_t* intsToSwap,
unsigned int num_points){
152 unsigned int eighth_points = num_points / 8;
153 uint16x8_t input, output;
156 for(number = 0; number < eighth_points; number++) {
157 input = vld1q_u16(inputPtr);
158 output = vsriq_n_u16(output, input, 8);
159 output = vsliq_n_u16(output, input, 8);
160 vst1q_u16(inputPtr, output);
164 for(number = eighth_points * 8; number < num_points; number++){
166 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
174 #include <arm_neon.h>
176 static inline void volk_16u_byteswap_neon_table(
uint16_t* intsToSwap,
unsigned int num_points){
178 unsigned int number = 0;
179 unsigned int n16points = num_points / 16;
181 uint8x8x4_t input_table;
182 uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
183 uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
193 int_lookup01 = vcreate_u8(1232017111498883080);
194 int_lookup23 = vcreate_u8(1376697457175036426);
195 int_lookup45 = vcreate_u8(1521377802851189772);
196 int_lookup67 = vcreate_u8(1666058148527343118);
198 for(number = 0; number < n16points; ++number){
199 input_table = vld4_u8((
uint8_t*) inputPtr);
200 swapped_int01 = vtbl4_u8(input_table, int_lookup01);
201 swapped_int23 = vtbl4_u8(input_table, int_lookup23);
202 swapped_int45 = vtbl4_u8(input_table, int_lookup45);
203 swapped_int67 = vtbl4_u8(input_table, int_lookup67);
204 vst1_u8((
uint8_t*)inputPtr, swapped_int01);
205 vst1_u8((
uint8_t*)(inputPtr+4), swapped_int23);
206 vst1_u8((
uint8_t*)(inputPtr+8), swapped_int45);
207 vst1_u8((
uint8_t*)(inputPtr+12), swapped_int67);
212 for(number = n16points * 16; number < num_points; ++number){
214 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
221 #ifdef LV_HAVE_GENERIC
223 static inline void volk_16u_byteswap_a_generic(
uint16_t* intsToSwap,
unsigned int num_points){
226 for(point = 0; point < num_points; point++){
228 output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
237 extern void volk_16u_byteswap_a_orc_impl(
uint16_t* intsToSwap,
unsigned int num_points);
238 static inline void volk_16u_byteswap_u_orc(
uint16_t* intsToSwap,
unsigned int num_points){
239 volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
unsigned short uint16_t
Definition: stdint.h:79
unsigned char uint8_t
Definition: stdint.h:78