GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_8u_x4_conv_k7_r2_8u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_8u_x4_conv_k7_r2_8u
25  *
26  * \b Overview
27  *
28  * Performs convolutional decoding for a K=7, rate 1/2 convolutional
29  * code. The polynomials user defined.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_8u_x4_conv_k7_r2_8u(unsigned char* Y, unsigned char* X, unsigned char* syms, unsigned char* dec, unsigned int framebits, unsigned int excess, unsigned char* Branchtab)
34  * \endcode
35  *
36  * \b Inputs
37  * \li X: <FIXME>
38  * \li syms: <FIXME>
39  * \li dec: <FIXME>
40  * \li framebits: size of the frame to decode in bits.
41  * \li excess: <FIXME>
42  * \li Branchtab: <FIXME>
43  *
44  * \b Outputs
45  * \li Y: The decoded output bits.
46  *
47  * \b Example
48  * \code
49  * int N = 10000;
50  *
51  * volk_8u_x4_conv_k7_r2_8u();
52  *
53  * volk_free(x);
54  * \endcode
55  */
56 
57 #ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
58 #define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
59 
60 typedef union {
61  unsigned char/*DECISIONTYPE*/ t[64/*NUMSTATES*//8/*DECISIONTYPE_BITSIZE*/];
62  unsigned int w[64/*NUMSTATES*//32];
63  unsigned short s[64/*NUMSTATES*//16];
64  unsigned char c[64/*NUMSTATES*//8];
65 #ifdef _MSC_VER
66 } decision_t;
67 #else
68 } decision_t __attribute__ ((aligned (16)));
69 #endif
70 
71 
72 static inline void
73 renormalize(unsigned char* X, unsigned char threshold)
74 {
75  int NUMSTATES = 64;
76  int i;
77 
78  unsigned char min=X[0];
79  //if(min > threshold) {
80  for(i=0;i<NUMSTATES;i++)
81  if (min>X[i])
82  min=X[i];
83  for(i=0;i<NUMSTATES;i++)
84  X[i]-=min;
85  //}
86 }
87 
88 
89 //helper BFLY for GENERIC version
90 static inline void
91 BFLY(int i, int s, unsigned char * syms, unsigned char *Y,
92  unsigned char *X, decision_t * d, unsigned char* Branchtab)
93 {
94  int j, decision0, decision1;
95  unsigned char metric,m0,m1,m2,m3;
96 
97  int NUMSTATES = 64;
98  int RATE = 2;
99  int METRICSHIFT = 1;
100  int PRECISIONSHIFT = 2;
101 
102  metric =0;
103  for(j=0;j<RATE;j++)
104  metric += (Branchtab[i+j*NUMSTATES/2] ^ syms[s*RATE+j])>>METRICSHIFT;
105  metric=metric>>PRECISIONSHIFT;
106 
107  unsigned char max = ((RATE*((256 -1)>>METRICSHIFT))>>PRECISIONSHIFT);
108 
109  m0 = X[i] + metric;
110  m1 = X[i+NUMSTATES/2] + (max - metric);
111  m2 = X[i] + (max - metric);
112  m3 = X[i+NUMSTATES/2] + metric;
113 
114  decision0 = (signed int)(m0-m1) > 0;
115  decision1 = (signed int)(m2-m3) > 0;
116 
117  Y[2*i] = decision0 ? m1 : m0;
118  Y[2*i+1] = decision1 ? m3 : m2;
119 
120  d->w[i/(sizeof(unsigned int)*8/2)+s*(sizeof(decision_t)/sizeof(unsigned int))] |=
121  (decision0|decision1<<1) << ((2*i)&(sizeof(unsigned int)*8-1));
122 }
123 
124 
125 #if LV_HAVE_SSE3
126 
127 #include <pmmintrin.h>
128 #include <emmintrin.h>
129 #include <xmmintrin.h>
130 #include <mmintrin.h>
131 #include <stdio.h>
132 
133 static inline void
134 volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char* Y, unsigned char* X,
135  unsigned char* syms, unsigned char* dec,
136  unsigned int framebits, unsigned int excess,
137  unsigned char* Branchtab)
138 {
139  unsigned int i9;
140  for(i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
141  unsigned char a75, a81;
142  int a73, a92;
143  short int s20, s21, s26, s27;
144  unsigned char *a74, *a80, *b6;
145  short int *a110, *a111, *a91, *a93, *a94;
146  __m128i *a102, *a112, *a113, *a71, *a72, *a77, *a83
147  , *a95, *a96, *a97, *a98, *a99;
148  __m128i a105, a106, a86, a87;
149  __m128i a100, a101, a103, a104, a107, a108, a109
150  , a76, a78, a79, a82, a84, a85, a88, a89
151  , a90, d10, d11, d12, d9, m23, m24, m25
152  , m26, m27, m28, m29, m30, s18, s19, s22
153  , s23, s24, s25, s28, s29, t13, t14, t15
154  , t16, t17, t18;
155  a71 = ((__m128i *) X);
156  s18 = *(a71);
157  a72 = (a71 + 2);
158  s19 = *(a72);
159  a73 = (4 * i9);
160  a74 = (syms + a73);
161  a75 = *(a74);
162  a76 = _mm_set1_epi8(a75);
163  a77 = ((__m128i *) Branchtab);
164  a78 = *(a77);
165  a79 = _mm_xor_si128(a76, a78);
166  b6 = (a73 + syms);
167  a80 = (b6 + 1);
168  a81 = *(a80);
169  a82 = _mm_set1_epi8(a81);
170  a83 = (a77 + 2);
171  a84 = *(a83);
172  a85 = _mm_xor_si128(a82, a84);
173  t13 = _mm_avg_epu8(a79,a85);
174  a86 = ((__m128i ) t13);
175  a87 = _mm_srli_epi16(a86, 2);
176  a88 = ((__m128i ) a87);
177  t14 = _mm_and_si128(a88, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
178  , 63, 63, 63, 63, 63, 63, 63, 63
179  , 63));
180  t15 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
181  , 63, 63, 63, 63, 63, 63, 63, 63
182  , 63), t14);
183  m23 = _mm_adds_epu8(s18, t14);
184  m24 = _mm_adds_epu8(s19, t15);
185  m25 = _mm_adds_epu8(s18, t15);
186  m26 = _mm_adds_epu8(s19, t14);
187  a89 = _mm_min_epu8(m24, m23);
188  d9 = _mm_cmpeq_epi8(a89, m24);
189  a90 = _mm_min_epu8(m26, m25);
190  d10 = _mm_cmpeq_epi8(a90, m26);
191  s20 = _mm_movemask_epi8(_mm_unpacklo_epi8(d9,d10));
192  a91 = ((short int *) dec);
193  a92 = (8 * i9);
194  a93 = (a91 + a92);
195  *(a93) = s20;
196  s21 = _mm_movemask_epi8(_mm_unpackhi_epi8(d9,d10));
197  a94 = (a93 + 1);
198  *(a94) = s21;
199  s22 = _mm_unpacklo_epi8(a89, a90);
200  s23 = _mm_unpackhi_epi8(a89, a90);
201  a95 = ((__m128i *) Y);
202  *(a95) = s22;
203  a96 = (a95 + 1);
204  *(a96) = s23;
205  a97 = (a71 + 1);
206  s24 = *(a97);
207  a98 = (a71 + 3);
208  s25 = *(a98);
209  a99 = (a77 + 1);
210  a100 = *(a99);
211  a101 = _mm_xor_si128(a76, a100);
212  a102 = (a77 + 3);
213  a103 = *(a102);
214  a104 = _mm_xor_si128(a82, a103);
215  t16 = _mm_avg_epu8(a101,a104);
216  a105 = ((__m128i ) t16);
217  a106 = _mm_srli_epi16(a105, 2);
218  a107 = ((__m128i ) a106);
219  t17 = _mm_and_si128(a107, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
220  , 63, 63, 63, 63, 63, 63, 63, 63
221  , 63));
222  t18 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
223  , 63, 63, 63, 63, 63, 63, 63, 63
224  , 63), t17);
225  m27 = _mm_adds_epu8(s24, t17);
226  m28 = _mm_adds_epu8(s25, t18);
227  m29 = _mm_adds_epu8(s24, t18);
228  m30 = _mm_adds_epu8(s25, t17);
229  a108 = _mm_min_epu8(m28, m27);
230  d11 = _mm_cmpeq_epi8(a108, m28);
231  a109 = _mm_min_epu8(m30, m29);
232  d12 = _mm_cmpeq_epi8(a109, m30);
233  s26 = _mm_movemask_epi8(_mm_unpacklo_epi8(d11,d12));
234  a110 = (a93 + 2);
235  *(a110) = s26;
236  s27 = _mm_movemask_epi8(_mm_unpackhi_epi8(d11,d12));
237  a111 = (a93 + 3);
238  *(a111) = s27;
239  s28 = _mm_unpacklo_epi8(a108, a109);
240  s29 = _mm_unpackhi_epi8(a108, a109);
241  a112 = (a95 + 2);
242  *(a112) = s28;
243  a113 = (a95 + 3);
244  *(a113) = s29;
245  if ((((unsigned char *) Y)[0]>210)) {
246  __m128i m5, m6;
247  m5 = ((__m128i *) Y)[0];
248  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[1]);
249  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[2]);
250  m5 = _mm_min_epu8(m5, ((__m128i *) Y)[3]);
251  __m128i m7;
252  m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
253  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 32)), ((__m128i ) m7)));
254  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 16)), ((__m128i ) m7)));
255  m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 8)), ((__m128i ) m7)));
256  m7 = _mm_unpacklo_epi8(m7, m7);
257  m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
258  m6 = _mm_unpacklo_epi64(m7, m7);
259  ((__m128i *) Y)[0] = _mm_subs_epu8(((__m128i *) Y)[0], m6);
260  ((__m128i *) Y)[1] = _mm_subs_epu8(((__m128i *) Y)[1], m6);
261  ((__m128i *) Y)[2] = _mm_subs_epu8(((__m128i *) Y)[2], m6);
262  ((__m128i *) Y)[3] = _mm_subs_epu8(((__m128i *) Y)[3], m6);
263  }
264  unsigned char a188, a194;
265  int a186, a205;
266  short int s48, s49, s54, s55;
267  unsigned char *a187, *a193, *b15;
268  short int *a204, *a206, *a207, *a223, *a224, *b16;
269  __m128i *a184, *a185, *a190, *a196, *a208, *a209, *a210
270  , *a211, *a212, *a215, *a225, *a226;
271  __m128i a199, a200, a218, a219;
272  __m128i a189, a191, a192, a195, a197, a198, a201
273  , a202, a203, a213, a214, a216, a217, a220, a221
274  , a222, d17, d18, d19, d20, m39, m40, m41
275  , m42, m43, m44, m45, m46, s46, s47, s50
276  , s51, s52, s53, s56, s57, t25, t26, t27
277  , t28, t29, t30;
278  a184 = ((__m128i *) Y);
279  s46 = *(a184);
280  a185 = (a184 + 2);
281  s47 = *(a185);
282  a186 = (4 * i9);
283  b15 = (a186 + syms);
284  a187 = (b15 + 2);
285  a188 = *(a187);
286  a189 = _mm_set1_epi8(a188);
287  a190 = ((__m128i *) Branchtab);
288  a191 = *(a190);
289  a192 = _mm_xor_si128(a189, a191);
290  a193 = (b15 + 3);
291  a194 = *(a193);
292  a195 = _mm_set1_epi8(a194);
293  a196 = (a190 + 2);
294  a197 = *(a196);
295  a198 = _mm_xor_si128(a195, a197);
296  t25 = _mm_avg_epu8(a192,a198);
297  a199 = ((__m128i ) t25);
298  a200 = _mm_srli_epi16(a199, 2);
299  a201 = ((__m128i ) a200);
300  t26 = _mm_and_si128(a201, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
301  , 63, 63, 63, 63, 63, 63, 63, 63
302  , 63));
303  t27 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
304  , 63, 63, 63, 63, 63, 63, 63, 63
305  , 63), t26);
306  m39 = _mm_adds_epu8(s46, t26);
307  m40 = _mm_adds_epu8(s47, t27);
308  m41 = _mm_adds_epu8(s46, t27);
309  m42 = _mm_adds_epu8(s47, t26);
310  a202 = _mm_min_epu8(m40, m39);
311  d17 = _mm_cmpeq_epi8(a202, m40);
312  a203 = _mm_min_epu8(m42, m41);
313  d18 = _mm_cmpeq_epi8(a203, m42);
314  s48 = _mm_movemask_epi8(_mm_unpacklo_epi8(d17,d18));
315  a204 = ((short int *) dec);
316  a205 = (8 * i9);
317  b16 = (a204 + a205);
318  a206 = (b16 + 4);
319  *(a206) = s48;
320  s49 = _mm_movemask_epi8(_mm_unpackhi_epi8(d17,d18));
321  a207 = (b16 + 5);
322  *(a207) = s49;
323  s50 = _mm_unpacklo_epi8(a202, a203);
324  s51 = _mm_unpackhi_epi8(a202, a203);
325  a208 = ((__m128i *) X);
326  *(a208) = s50;
327  a209 = (a208 + 1);
328  *(a209) = s51;
329  a210 = (a184 + 1);
330  s52 = *(a210);
331  a211 = (a184 + 3);
332  s53 = *(a211);
333  a212 = (a190 + 1);
334  a213 = *(a212);
335  a214 = _mm_xor_si128(a189, a213);
336  a215 = (a190 + 3);
337  a216 = *(a215);
338  a217 = _mm_xor_si128(a195, a216);
339  t28 = _mm_avg_epu8(a214,a217);
340  a218 = ((__m128i ) t28);
341  a219 = _mm_srli_epi16(a218, 2);
342  a220 = ((__m128i ) a219);
343  t29 = _mm_and_si128(a220, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
344  , 63, 63, 63, 63, 63, 63, 63, 63
345  , 63));
346  t30 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
347  , 63, 63, 63, 63, 63, 63, 63, 63
348  , 63), t29);
349  m43 = _mm_adds_epu8(s52, t29);
350  m44 = _mm_adds_epu8(s53, t30);
351  m45 = _mm_adds_epu8(s52, t30);
352  m46 = _mm_adds_epu8(s53, t29);
353  a221 = _mm_min_epu8(m44, m43);
354  d19 = _mm_cmpeq_epi8(a221, m44);
355  a222 = _mm_min_epu8(m46, m45);
356  d20 = _mm_cmpeq_epi8(a222, m46);
357  s54 = _mm_movemask_epi8(_mm_unpacklo_epi8(d19,d20));
358  a223 = (b16 + 6);
359  *(a223) = s54;
360  s55 = _mm_movemask_epi8(_mm_unpackhi_epi8(d19,d20));
361  a224 = (b16 + 7);
362  *(a224) = s55;
363  s56 = _mm_unpacklo_epi8(a221, a222);
364  s57 = _mm_unpackhi_epi8(a221, a222);
365  a225 = (a208 + 2);
366  *(a225) = s56;
367  a226 = (a208 + 3);
368  *(a226) = s57;
369  if ((((unsigned char *) X)[0]>210)) {
370  __m128i m12, m13;
371  m12 = ((__m128i *) X)[0];
372  m12 = _mm_min_epu8(m12, ((__m128i *) X)[1]);
373  m12 = _mm_min_epu8(m12, ((__m128i *) X)[2]);
374  m12 = _mm_min_epu8(m12, ((__m128i *) X)[3]);
375  __m128i m14;
376  m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
377  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 32)), ((__m128i ) m14)));
378  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 16)), ((__m128i ) m14)));
379  m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 8)), ((__m128i ) m14)));
380  m14 = _mm_unpacklo_epi8(m14, m14);
381  m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
382  m13 = _mm_unpacklo_epi64(m14, m14);
383  ((__m128i *) X)[0] = _mm_subs_epu8(((__m128i *) X)[0], m13);
384  ((__m128i *) X)[1] = _mm_subs_epu8(((__m128i *) X)[1], m13);
385  ((__m128i *) X)[2] = _mm_subs_epu8(((__m128i *) X)[2], m13);
386  ((__m128i *) X)[3] = _mm_subs_epu8(((__m128i *) X)[3], m13);
387  }
388  }
389 
390  renormalize(X, 210);
391 
392  /*int ch;
393  for(ch = 0; ch < 64; ch++) {
394  printf("%d,", X[ch]);
395  }
396  printf("\n");*/
397 
398  unsigned int j;
399  for(j=0; j < (framebits + excess) % 2; ++j) {
400  int i;
401  for(i=0;i<64/2;i++){
402  BFLY(i, (((framebits+excess) >> 1) << 1) + j , syms, Y, X, (decision_t *)dec, Branchtab);
403  }
404 
405 
406  renormalize(Y, 210);
407 
408  /*printf("\n");
409  for(ch = 0; ch < 64; ch++) {
410  printf("%d,", Y[ch]);
411  }
412  printf("\n");*/
413 
414  }
415  /*skip*/
416 }
417 
418 #endif /*LV_HAVE_SSE3*/
419 
420 
421 #if LV_HAVE_GENERIC
422 
423 static inline void
424 volk_8u_x4_conv_k7_r2_8u_generic(unsigned char* Y, unsigned char* X,
425  unsigned char* syms, unsigned char* dec,
426  unsigned int framebits, unsigned int excess,
427  unsigned char* Branchtab)
428 {
429  int nbits = framebits + excess;
430  int NUMSTATES = 64;
431  int RENORMALIZE_THRESHOLD = 210;
432 
433  int s,i;
434  for (s=0;s<nbits;s++){
435  void *tmp;
436  for(i=0;i<NUMSTATES/2;i++){
437  BFLY(i, s, syms, Y, X, (decision_t *)dec, Branchtab);
438  }
439 
440  renormalize(Y, RENORMALIZE_THRESHOLD);
441 
442  /// Swap pointers to old and new metrics
443  tmp = (void *)X;
444  X = Y;
445  Y = (unsigned char*)tmp;
446  }
447 }
448 
449 #endif /* LV_HAVE_GENERIC */
450 
451 #endif /*INCLUDED_volk_8u_x4_conv_k7_r2_8u_H*/
float min(float a, float b)
unsigned int * w
Definition: cc_common.h:36
static void renormalize(unsigned char *X, unsigned char threshold)
Definition: volk_8u_x4_conv_k7_r2_8u.h:73
Definition: cc_common.h:33
static void BFLY(int i, int s, unsigned char *syms, unsigned char *Y, unsigned char *X, decision_t *d, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:91