64 #ifndef INCLUDED_volk_32f_index_max_16u_a_H
65 #define INCLUDED_volk_32f_index_max_16u_a_H
76 volk_32f_index_max_16u_a_sse4_1(
unsigned int* target,
const float* src0,
unsigned int num_points)
79 unsigned int number = 0;
80 const unsigned int quarterPoints = num_points / 4;
82 float* inputPtr = (
float*)src0;
84 __m128 indexIncrementValues = _mm_set1_ps(4);
85 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
89 __m128 maxValues = _mm_set1_ps(max);
90 __m128 maxValuesIndex = _mm_setzero_ps();
91 __m128 compareResults;
97 for(;number < quarterPoints; number++){
99 currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
100 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
102 compareResults = _mm_cmpgt_ps(maxValues, currentValues);
104 maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
105 maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
109 _mm_store_ps(maxValuesBuffer, maxValues);
110 _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
112 for(number = 0; number < 4; number++){
113 if(maxValuesBuffer[number] > max){
114 index = maxIndexesBuffer[number];
115 max = maxValuesBuffer[number];
119 number = quarterPoints * 4;
120 for(;number < num_points; number++){
121 if(src0[number] > max){
126 target[0] = (
unsigned int)index;
135 #include<xmmintrin.h>
138 volk_32f_index_max_16u_a_sse(
unsigned int* target,
const float* src0,
unsigned int num_points)
141 unsigned int number = 0;
142 const unsigned int quarterPoints = num_points / 4;
144 float* inputPtr = (
float*)src0;
146 __m128 indexIncrementValues = _mm_set1_ps(4);
147 __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
151 __m128 maxValues = _mm_set1_ps(max);
152 __m128 maxValuesIndex = _mm_setzero_ps();
153 __m128 compareResults;
154 __m128 currentValues;
159 for(;number < quarterPoints; number++){
161 currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
162 currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
164 compareResults = _mm_cmpgt_ps(maxValues, currentValues);
166 maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
168 maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
172 _mm_store_ps(maxValuesBuffer, maxValues);
173 _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
175 for(number = 0; number < 4; number++){
176 if(maxValuesBuffer[number] > max){
177 index = maxIndexesBuffer[number];
178 max = maxValuesBuffer[number];
182 number = quarterPoints * 4;
183 for(;number < num_points; number++){
184 if(src0[number] > max){
189 target[0] = (
unsigned int)index;
196 #ifdef LV_HAVE_GENERIC
199 volk_32f_index_max_16u_generic(
unsigned int* target,
const float* src0,
unsigned int num_points)
203 unsigned int index = 0;
207 for(; i < num_points; ++i) {
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27