GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32i_x2_and_32i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32i_x2_and_32i
25  *
26  * \b Overview
27  *
28  * Computes the Boolean AND operation between two input 32-bit integer vectors.
29  *
30  * <b>Dispatcher Prototype</b>
31  * \code
32  * void volk_32i_x2_and_32i(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points)
33  * \endcode
34  *
35  * \b Inputs
36  * \li aVector: Input vector of samples.
37  * \li bVector: Input vector of samples.
38  * \li num_points: The number of values.
39  *
40  * \b Outputs
41  * \li cVector: The output vector.
42  *
43  * \b Example
44  * This example generates a Karnaugh map for the lower two bits of x AND y.
45  * \code
46  * int N = 1<<4;
47  * unsigned int alignment = volk_get_alignment();
48  *
49  * int32_t* x = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
50  * int32_t* y = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
51  * int32_t* z = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
52  * int32_t in_seq[] = {0,1,3,2};
53  * unsigned int jj=0;
54  * for(unsigned int ii=0; ii<N; ++ii){
55  * x[ii] = in_seq[ii%4];
56  * y[ii] = in_seq[jj];
57  * if(((ii+1) % 4) == 0) jj++;
58  * }
59  *
60  * volk_32i_x2_and_32i(z, x, y, N);
61  *
62  * printf("Karnaugh map for x AND y\n");
63  * printf("y\\x|");
64  * for(unsigned int ii=0; ii<4; ++ii){
65  * printf(" %.2x ", in_seq[ii]);
66  * }
67  * printf("\n---|---------------\n");
68  * jj = 0;
69  * for(unsigned int ii=0; ii<N; ++ii){
70  * if(((ii+1) % 4) == 1){
71  * printf("%.2x | ", in_seq[jj++]);
72  * }
73  * printf("%.2x ", z[ii]);
74  * if(!((ii+1) % 4)){
75  * printf("\n");
76  * }
77  * }
78  * \endcode
79  */
80 
81 #ifndef INCLUDED_volk_32i_x2_and_32i_a_H
82 #define INCLUDED_volk_32i_x2_and_32i_a_H
83 
84 #include <inttypes.h>
85 #include <stdio.h>
86 
87 #ifdef LV_HAVE_SSE
88 #include <xmmintrin.h>
89 
90 static inline void
91 volk_32i_x2_and_32i_a_sse(int32_t* cVector, const int32_t* aVector,
92  const int32_t* bVector, unsigned int num_points)
93 {
94  unsigned int number = 0;
95  const unsigned int quarterPoints = num_points / 4;
96 
97  float* cPtr = (float*)cVector;
98  const float* aPtr = (float*)aVector;
99  const float* bPtr = (float*)bVector;
100 
101  __m128 aVal, bVal, cVal;
102  for(;number < quarterPoints; number++){
103 
104  aVal = _mm_load_ps(aPtr);
105  bVal = _mm_load_ps(bPtr);
106 
107  cVal = _mm_and_ps(aVal, bVal);
108 
109  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
110 
111  aPtr += 4;
112  bPtr += 4;
113  cPtr += 4;
114  }
115 
116  number = quarterPoints * 4;
117  for(;number < num_points; number++){
118  cVector[number] = aVector[number] & bVector[number];
119  }
120 }
121 #endif /* LV_HAVE_SSE */
122 
123 
124 #ifdef LV_HAVE_NEON
125 #include <arm_neon.h>
126 
127 static inline void
128 volk_32i_x2_and_32i_neon(int32_t* cVector, const int32_t* aVector, const int32_t* bVector, unsigned int num_points)
129 {
130  int32_t* cPtr = cVector;
131  const int32_t* aPtr = aVector;
132  const int32_t* bPtr= bVector;
133  unsigned int number = 0;
134  unsigned int quarter_points = num_points / 4;
135 
136  int32x4_t a_val, b_val, c_val;
137 
138  for(number = 0; number < quarter_points; number++){
139  a_val = vld1q_s32(aPtr);
140  b_val = vld1q_s32(bPtr);
141  c_val = vandq_s32(a_val, b_val);
142  vst1q_s32(cPtr, c_val);
143  aPtr += 4;
144  bPtr += 4;
145  cPtr += 4;
146  }
147 
148  for(number = quarter_points * 4; number < num_points; number++){
149  *cPtr++ = (*aPtr++) & (*bPtr++);
150  }
151 }
152 #endif /* LV_HAVE_NEON */
153 
154 
155 #ifdef LV_HAVE_GENERIC
156 
157 static inline void
158 volk_32i_x2_and_32i_generic(int32_t* cVector, const int32_t* aVector,
159  const int32_t* bVector, unsigned int num_points)
160 {
161  int32_t* cPtr = cVector;
162  const int32_t* aPtr = aVector;
163  const int32_t* bPtr= bVector;
164  unsigned int number = 0;
165 
166  for(number = 0; number < num_points; number++){
167  *cPtr++ = (*aPtr++) & (*bPtr++);
168  }
169 }
170 #endif /* LV_HAVE_GENERIC */
171 
172 
173 #ifdef LV_HAVE_ORC
174 extern void
175 volk_32i_x2_and_32i_a_orc_impl(int32_t* cVector, const int32_t* aVector,
176  const int32_t* bVector, unsigned int num_points);
177 
178 static inline void
179 volk_32i_x2_and_32i_u_orc(int32_t* cVector, const int32_t* aVector,
180  const int32_t* bVector, unsigned int num_points)
181 {
182  volk_32i_x2_and_32i_a_orc_impl(cVector, aVector, bVector, num_points);
183 }
184 #endif /* LV_HAVE_ORC */
185 
186 
187 #endif /* INCLUDED_volk_32i_x2_and_32i_a_H */
signed int int32_t
Definition: stdint.h:77