Vector Optimized Library of Kernels 3.1.2
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10#ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
11#define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
12
13#include <string.h>
14#include <volk/volk.h>
16
17typedef union {
18 // decision_t is a BIT vector
19 unsigned char* t;
20 unsigned int* w;
22
23static inline int parity(int x, unsigned char* Partab)
24{
25 x ^= (x >> 16);
26 x ^= (x >> 8);
27 return Partab[x];
28}
29
30static inline int chainback_viterbi(unsigned char* data,
31 unsigned int nbits,
32 unsigned int endstate,
33 unsigned int tailsize,
34 unsigned char* decisions)
35{
36 unsigned char* d;
37 int d_ADDSHIFT = 0;
38 int d_numstates = (1 << 6);
39 int d_decision_t_size = d_numstates / 8;
40 unsigned int d_k = 7;
41 int d_framebits = nbits;
42 /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
43 d = decisions;
44 /* Make room beyond the end of the encoder register so we can
45 * accumulate a full byte of decoded data
46 */
47
48 endstate = (endstate % d_numstates) << d_ADDSHIFT;
49
50 /* The store into data[] only needs to be done every 8 bits.
51 * But this avoids a conditional branch, and the writes will
52 * combine in the cache anyway
53 */
54
55 d += tailsize * d_decision_t_size; /* Look past tail */
56 int retval;
57 int dif = tailsize - (d_k - 1);
58 // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
59 p_decision_t dec;
60 while (nbits-- > d_framebits - (d_k - 1)) {
61 int k;
62 dec.t = &d[nbits * d_decision_t_size];
63 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
64
65 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
66 // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
67 // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
68 data[((nbits + dif) % d_framebits)] = k;
69
70 retval = endstate;
71 }
72 nbits += 1;
73
74 while (nbits-- != 0) {
75 int k;
76
77 dec.t = &d[nbits * d_decision_t_size];
78
79 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
80
81 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
82 data[((nbits + dif) % d_framebits)] = k;
83 }
84 // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
85 // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
86
87
88 return retval >> d_ADDSHIFT;
89}
90
91
92#if LV_HAVE_SSE3
93
94#include <emmintrin.h>
95#include <mmintrin.h>
96#include <pmmintrin.h>
97#include <stdio.h>
98#include <xmmintrin.h>
99
100static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* dec,
101 unsigned char* syms,
102 unsigned int framebits)
103{
104 if (framebits < 12) {
105 return;
106 }
107
108 static int once = 1;
109 int d_numstates = (1 << 6);
110 int rate = 2;
111 static unsigned char* D;
112 static unsigned char* Y;
113 static unsigned char* X;
114 static unsigned int excess = 6;
115 static unsigned char* Branchtab;
116 static unsigned char Partab[256];
117
118 int d_polys[2] = { 79, 109 };
119
120
121 if (once) {
122
123 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
124 Y = X + d_numstates;
125 Branchtab =
126 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
127 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
129 int state, i;
130 int cnt, ti;
131
132 /* Initialize parity lookup table */
133 for (i = 0; i < 256; i++) {
134 cnt = 0;
135 ti = i;
136 while (ti) {
137 if (ti & 1)
138 cnt++;
139 ti >>= 1;
140 }
141 Partab[i] = cnt & 1;
142 }
143 /* Initialize the branch table */
144 for (state = 0; state < d_numstates / 2; state++) {
145 for (i = 0; i < rate; i++) {
146 Branchtab[i * d_numstates / 2 + state] =
147 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
148 }
149 }
150
151 once = 0;
152 }
153
154 // unbias the old_metrics
155 memset(X, 31, d_numstates);
156
157 // initialize decisions
158 memset(D, 0, (d_numstates / 8) * (framebits + 6));
159
161 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
162
163 unsigned int min = X[0];
164 int i = 0, state = 0;
165 for (i = 0; i < (d_numstates); ++i) {
166 if (X[i] < min) {
167 min = X[i];
168 state = i;
169 }
170 }
171
172 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
173
174 return;
175}
176
177#endif /*LV_HAVE_SSE3*/
178
179
180#if LV_HAVE_NEON
181
182static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* dec,
183 unsigned char* syms,
184 unsigned int framebits)
185{
186 if (framebits < 12) {
187 return;
188 }
189
190 static int once = 1;
191 int d_numstates = (1 << 6);
192 int rate = 2;
193 static unsigned char* D;
194 static unsigned char* Y;
195 static unsigned char* X;
196 static unsigned int excess = 6;
197 static unsigned char* Branchtab;
198 static unsigned char Partab[256];
199
200 int d_polys[2] = { 79, 109 };
201
202
203 if (once) {
204
205 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
206 Y = X + d_numstates;
207 Branchtab =
208 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
209 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
211 int state, i;
212 int cnt, ti;
213
214 /* Initialize parity lookup table */
215 for (i = 0; i < 256; i++) {
216 cnt = 0;
217 ti = i;
218 while (ti) {
219 if (ti & 1)
220 cnt++;
221 ti >>= 1;
222 }
223 Partab[i] = cnt & 1;
224 }
225 /* Initialize the branch table */
226 for (state = 0; state < d_numstates / 2; state++) {
227 for (i = 0; i < rate; i++) {
228 Branchtab[i * d_numstates / 2 + state] =
229 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
230 }
231 }
232
233 once = 0;
234 }
235
236 // unbias the old_metrics
237 memset(X, 31, d_numstates);
238
239 // initialize decisions
240 memset(D, 0, (d_numstates / 8) * (framebits + 6));
241
243 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
244
245 unsigned int min = X[0];
246 int i = 0, state = 0;
247 for (i = 0; i < (d_numstates); ++i) {
248 if (X[i] < min) {
249 min = X[i];
250 state = i;
251 }
252 }
253
254 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
255
256 return;
257}
258
259#endif /*LV_HAVE_NEON*/
260
261
262#if LV_HAVE_AVX2
263
264#include <immintrin.h>
265#include <stdio.h>
266
267static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* dec,
268 unsigned char* syms,
269 unsigned int framebits)
270{
271 if (framebits < 12) {
272 return;
273 }
274
275 static int once = 1;
276 int d_numstates = (1 << 6);
277 int rate = 2;
278 static unsigned char* D;
279 static unsigned char* Y;
280 static unsigned char* X;
281 static unsigned int excess = 6;
282 static unsigned char* Branchtab;
283 static unsigned char Partab[256];
284
285 int d_polys[2] = { 79, 109 };
286
287
288 if (once) {
289
290 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
291 Y = X + d_numstates;
292 Branchtab =
293 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
294 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
296 int state, i;
297 int cnt, ti;
298
299 /* Initialize parity lookup table */
300 for (i = 0; i < 256; i++) {
301 cnt = 0;
302 ti = i;
303 while (ti) {
304 if (ti & 1)
305 cnt++;
306 ti >>= 1;
307 }
308 Partab[i] = cnt & 1;
309 }
310 /* Initialize the branch table */
311 for (state = 0; state < d_numstates / 2; state++) {
312 for (i = 0; i < rate; i++) {
313 Branchtab[i * d_numstates / 2 + state] =
314 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
315 }
316 }
317
318 once = 0;
319 }
320
321 // unbias the old_metrics
322 memset(X, 31, d_numstates);
323
324 // initialize decisions
325 memset(D, 0, (d_numstates / 8) * (framebits + 6));
326
327 volk_8u_x4_conv_k7_r2_8u_avx2(
328 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
329
330 unsigned int min = X[0];
331 int i = 0, state = 0;
332 for (i = 0; i < (d_numstates); ++i) {
333 if (X[i] < min) {
334 min = X[i];
335 state = i;
336 }
337 }
338
339 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
340
341 return;
342}
343
344#endif /*LV_HAVE_AVX2*/
345
346
347#if LV_HAVE_GENERIC
348
349
350static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
351 unsigned char* syms,
352 unsigned int framebits)
353{
354 if (framebits < 12) {
355 return;
356 }
357
358 static int once = 1;
359 int d_numstates = (1 << 6);
360 int rate = 2;
361 static unsigned char* Y;
362 static unsigned char* X;
363 static unsigned char* D;
364 static unsigned int excess = 6;
365 static unsigned char* Branchtab;
366 static unsigned char Partab[256];
367
368 int d_polys[2] = { 79, 109 };
369
370
371 if (once) {
372
373 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
374 Y = X + d_numstates;
375 Branchtab =
376 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
377 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
379
380 int state, i;
381 int cnt, ti;
382
383 /* Initialize parity lookup table */
384 for (i = 0; i < 256; i++) {
385 cnt = 0;
386 ti = i;
387 while (ti) {
388 if (ti & 1)
389 cnt++;
390 ti >>= 1;
391 }
392 Partab[i] = cnt & 1;
393 }
394 /* Initialize the branch table */
395 for (state = 0; state < d_numstates / 2; state++) {
396 for (i = 0; i < rate; i++) {
397 Branchtab[i * d_numstates / 2 + state] =
398 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
399 }
400 }
401
402 once = 0;
403 }
404
405 // unbias the old_metrics
406 memset(X, 31, d_numstates);
407
408 // initialize decisions
409 memset(D, 0, (d_numstates / 8) * (framebits + 6));
410
412 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
413
414 unsigned int min = X[0];
415 int i = 0, state = 0;
416 for (i = 0; i < (d_numstates); ++i) {
417 if (X[i] < min) {
418 min = X[i];
419 state = i;
420 }
421 }
422
423 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
424
425 return;
426}
427
428#endif /* LV_HAVE_GENERIC */
429
430#endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
Definition volk_8u_conv_k7_r2puppet_8u.h:17
unsigned int * w
Definition volk_8u_conv_k7_r2puppet_8u.h:20
unsigned char * t
Definition volk_8u_conv_k7_r2puppet_8u.h:19
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition volk.tmpl.c:90
static void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:182
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition volk_8u_conv_k7_r2puppet_8u.h:30
static int parity(int x, unsigned char *Partab)
Definition volk_8u_conv_k7_r2puppet_8u.h:23
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:100
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:350
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:208
static void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:306
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:439
for i
Definition volk_config_fixed.tmpl.h:13
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition volk_malloc.c:38