diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
| commit | dcacc00e3750300617ba6e16eb346713f91a783a (patch) | |
| tree | 38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/redis-unstable/src/crcspeed.c | |
| parent | 58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff) | |
| download | crep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz | |
Remove testing data
Diffstat (limited to 'examples/redis-unstable/src/crcspeed.c')
| -rw-r--r-- | examples/redis-unstable/src/crcspeed.c | 410 |
1 files changed, 0 insertions, 410 deletions
diff --git a/examples/redis-unstable/src/crcspeed.c b/examples/redis-unstable/src/crcspeed.c deleted file mode 100644 index c7073cb..0000000 --- a/examples/redis-unstable/src/crcspeed.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Copyright (C) 2013 Mark Adler - * Copyright (C) 2019-2024 Josiah Carlson - * Originally by: crc64.c Version 1.4 16 Dec 2013 Mark Adler - * Modifications by Matt Stancliff <matt@genges.com>: - * - removed CRC64-specific behavior - * - added generation of lookup tables by parameters - * - removed inversion of CRC input/result - * - removed automatic initialization in favor of explicit initialization - * Modifications by Josiah Carlson <josiah.carlson@gmail.com> - * - Added case/vector/AVX/+ versions of crc combine function; see crccombine.c - * - added optional static cache - * - Modified to use 1 thread to: - * - Partition large crc blobs into 2-3 segments - * - Process the 2-3 segments in parallel - * - Merge the resulting crcs - * -> Resulting in 10-90% performance boost for data > 1 meg - * - macro-ized to reduce copy/pasta - - This software is provided 'as-is', without any express or implied - warranty. In no event will the author be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Mark Adler - madler@alumni.caltech.edu - */ - -#include "crcspeed.h" -#include "crccombine.h" - -#define CRC64_LEN_MASK UINT64_C(0x7ffffffffffffff8) -#define CRC64_REVERSED_POLY UINT64_C(0x95ac9329ac4bc9b5) - -/* Fill in a CRC constants table. */ -void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { - uint64_t crc; - - /* generate CRCs for all single byte sequences */ - for (int n = 0; n < 256; n++) { - unsigned char v = n; - table[0][n] = crcfn(0, &v, 1); - } - - /* generate nested CRC table for future slice-by-8/16/24+ lookup */ - for (int n = 0; n < 256; n++) { - crc = table[0][n]; - for (int k = 1; k < 8; k++) { - crc = table[0][crc & 0xff] ^ (crc >> 8); - table[k][n] = crc; - } - } -#if USE_STATIC_COMBINE_CACHE - /* initialize combine cache for CRC stapling for slice-by 16/24+ */ - init_combine_cache(CRC64_REVERSED_POLY, 64); -#endif -} - -void crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) { - uint16_t crc; - - /* generate CRCs for all single byte sequences */ - for (int n = 0; n < 256; n++) { - table[0][n] = crcfn(0, &n, 1); - } - - /* generate nested CRC table for future slice-by-8 lookup */ - for (int n = 0; n < 256; n++) { - crc = table[0][n]; - for (int k = 1; k < 8; k++) { - crc = table[0][(crc >> 8) & 0xff] ^ (crc << 8); - table[k][n] = crc; - } - } -} - -/* Reverse the bytes in a 64-bit word. */ -static inline uint64_t rev8(uint64_t a) { -#if defined(__GNUC__) || defined(__clang__) - return __builtin_bswap64(a); -#else - uint64_t m; - - m = UINT64_C(0xff00ff00ff00ff); - a = ((a >> 8) & m) | (a & m) << 8; - m = UINT64_C(0xffff0000ffff); - a = ((a >> 16) & m) | (a & m) << 16; - return a >> 32 | a << 32; -#endif -} - -/* This function is called once to initialize the CRC table for use on a - big-endian architecture. */ -void crcspeed64big_init(crcfn64 fn, uint64_t big_table[8][256]) { - /* Create the little endian table then reverse all the entries. */ - crcspeed64little_init(fn, big_table); - for (int k = 0; k < 8; k++) { - for (int n = 0; n < 256; n++) { - big_table[k][n] = rev8(big_table[k][n]); - } - } -} - -void crcspeed16big_init(crcfn16 fn, uint16_t big_table[8][256]) { - /* Create the little endian table then reverse all the entries. */ - crcspeed16little_init(fn, big_table); - for (int k = 0; k < 8; k++) { - for (int n = 0; n < 256; n++) { - big_table[k][n] = rev8(big_table[k][n]); - } - } -} - -/* Note: doing all of our crc/next modifications *before* the crc table - * references is an absolute speedup on all CPUs tested. So... keep these - * macros separate. - */ - -#define DO_8_1(crc, next) \ - crc ^= *(uint64_t *)next; \ - next += 8 - -#define DO_8_2(crc) \ - crc = little_table[7][(uint8_t)crc] ^ \ - little_table[6][(uint8_t)(crc >> 8)] ^ \ - little_table[5][(uint8_t)(crc >> 16)] ^ \ - little_table[4][(uint8_t)(crc >> 24)] ^ \ - little_table[3][(uint8_t)(crc >> 32)] ^ \ - little_table[2][(uint8_t)(crc >> 40)] ^ \ - little_table[1][(uint8_t)(crc >> 48)] ^ \ - little_table[0][crc >> 56] - -#define CRC64_SPLIT(div) \ - olen = len; \ - next2 = next1 + ((len / div) & CRC64_LEN_MASK); \ - len = (next2 - next1) - -#define MERGE_CRC(crcn) \ - crc1 = crc64_combine(crc1, crcn, next2 - next1, CRC64_REVERSED_POLY, 64) - -#define MERGE_END(last, DIV) \ - len = olen - ((next2 - next1) * DIV); \ - next1 = last - -/* Variables so we can change for benchmarking; these seem to be fairly - * reasonable for Intel CPUs made since 2010. Please adjust as necessary if - * or when your CPU has more load / execute units. We've written benchmark code - * to help you tune your platform, see crc64Test. */ -#if defined(__i386__) || defined(__X86_64__) -static size_t CRC64_TRI_CUTOFF = (2*1024); -static size_t CRC64_DUAL_CUTOFF = (128); -#else -static size_t CRC64_TRI_CUTOFF = (16*1024); -static size_t CRC64_DUAL_CUTOFF = (1024); -#endif - - -void set_crc64_cutoffs(size_t dual_cutoff, size_t tri_cutoff) { - CRC64_DUAL_CUTOFF = dual_cutoff; - CRC64_TRI_CUTOFF = tri_cutoff; -} - -/* Calculate a non-inverted CRC multiple bytes at a time on a little-endian - * architecture. If you need inverted CRC, invert *before* calling and invert - * *after* calling. - * 64 bit crc = process 8/16/24 bytes at once; - */ -uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc1, - void *buf, size_t len) { - unsigned char *next1 = buf; - - if (CRC64_DUAL_CUTOFF < 1) { - goto final; - } - - /* process individual bytes until we reach an 8-byte aligned pointer */ - while (len && ((uintptr_t)next1 & 7) != 0) { - crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8); - len--; - } - - if (len > CRC64_TRI_CUTOFF) { - /* 24 bytes per loop, doing 3 parallel 8 byte chunks at a time */ - unsigned char *next2, *next3; - uint64_t olen, crc2=0, crc3=0; - CRC64_SPLIT(3); - /* len is now the length of the first segment, the 3rd segment possibly - * having extra bytes to clean up at the end - */ - next3 = next2 + len; - while (len >= 8) { - len -= 8; - DO_8_1(crc1, next1); - DO_8_1(crc2, next2); - DO_8_1(crc3, next3); - DO_8_2(crc1); - DO_8_2(crc2); - DO_8_2(crc3); - } - - /* merge the 3 crcs */ - MERGE_CRC(crc2); - MERGE_CRC(crc3); - MERGE_END(next3, 3); - } else if (len > CRC64_DUAL_CUTOFF) { - /* 16 bytes per loop, doing 2 parallel 8 byte chunks at a time */ - unsigned char *next2; - uint64_t olen, crc2=0; - CRC64_SPLIT(2); - /* len is now the length of the first segment, the 2nd segment possibly - * having extra bytes to clean up at the end - */ - while (len >= 8) { - len -= 8; - DO_8_1(crc1, next1); - DO_8_1(crc2, next2); - DO_8_2(crc1); - DO_8_2(crc2); - } - - /* merge the 2 crcs */ - MERGE_CRC(crc2); - MERGE_END(next2, 2); - } - /* We fall through here to handle our <CRC64_DUAL_CUTOFF inputs, and for any trailing - * bytes that wasn't evenly divisble by 16 or 24 above. */ - - /* fast processing, 8 bytes (aligned!) per loop */ - while (len >= 8) { - len -= 8; - DO_8_1(crc1, next1); - DO_8_2(crc1); - } -final: - /* process remaining bytes (can't be larger than 8) */ - while (len) { - crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8); - len--; - } - - return crc1; -} - -/* clean up our namespace */ -#undef DO_8_1 -#undef DO_8_2 -#undef CRC64_SPLIT -#undef MERGE_CRC -#undef MERGE_END -#undef CRC64_REVERSED_POLY -#undef CRC64_LEN_MASK - - -/* note: similar perf advantages can be had for long strings in crc16 using all - * of the same optimizations as above; though this is unnecessary. crc16 is - * normally used to shard keys; not hash / verify data, so is used on shorter - * data that doesn't warrant such changes. */ - -uint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc, - void *buf, size_t len) { - unsigned char *next = buf; - - /* process individual bytes until we reach an 8-byte aligned pointer */ - while (len && ((uintptr_t)next & 7) != 0) { - crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8); - len--; - } - - /* fast middle processing, 8 bytes (aligned!) per loop */ - while (len >= 8) { - uint64_t n = *(uint64_t *)next; - crc = little_table[7][(n & 0xff) ^ ((crc >> 8) & 0xff)] ^ - little_table[6][((n >> 8) & 0xff) ^ (crc & 0xff)] ^ - little_table[5][(n >> 16) & 0xff] ^ - little_table[4][(n >> 24) & 0xff] ^ - little_table[3][(n >> 32) & 0xff] ^ - little_table[2][(n >> 40) & 0xff] ^ - little_table[1][(n >> 48) & 0xff] ^ - little_table[0][n >> 56]; - next += 8; - len -= 8; - } - - /* process remaining bytes (can't be larger than 8) */ - while (len) { - crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8); - len--; - } - - return crc; -} - -/* Calculate a non-inverted CRC eight bytes at a time on a big-endian - * architecture. - */ -uint64_t crcspeed64big(uint64_t big_table[8][256], uint64_t crc, void *buf, - size_t len) { - unsigned char *next = buf; - - crc = rev8(crc); - while (len && ((uintptr_t)next & 7) != 0) { - crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8); - len--; - } - - /* note: alignment + 2/3-way processing can probably be handled here nearly - the same as above, using our updated DO_8_2 macro. Not included in these - changes, as other authors, I don't have big-endian to test with. */ - - while (len >= 8) { - crc ^= *(uint64_t *)next; - crc = big_table[0][crc & 0xff] ^ - big_table[1][(crc >> 8) & 0xff] ^ - big_table[2][(crc >> 16) & 0xff] ^ - big_table[3][(crc >> 24) & 0xff] ^ - big_table[4][(crc >> 32) & 0xff] ^ - big_table[5][(crc >> 40) & 0xff] ^ - big_table[6][(crc >> 48) & 0xff] ^ - big_table[7][crc >> 56]; - next += 8; - len -= 8; - } - - while (len) { - crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8); - len--; - } - - return rev8(crc); -} - -/* WARNING: Completely untested on big endian architecture. Possibly broken. */ -uint16_t crcspeed16big(uint16_t big_table[8][256], uint16_t crc_in, void *buf, - size_t len) { - unsigned char *next = buf; - uint64_t crc = crc_in; - - crc = rev8(crc); - while (len && ((uintptr_t)next & 7) != 0) { - crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8); - len--; - } - - while (len >= 8) { - uint64_t n = *(uint64_t *)next; - crc = big_table[0][(n & 0xff) ^ ((crc >> (56 - 8)) & 0xff)] ^ - big_table[1][((n >> 8) & 0xff) ^ (crc & 0xff)] ^ - big_table[2][(n >> 16) & 0xff] ^ - big_table[3][(n >> 24) & 0xff] ^ - big_table[4][(n >> 32) & 0xff] ^ - big_table[5][(n >> 40) & 0xff] ^ - big_table[6][(n >> 48) & 0xff] ^ - big_table[7][n >> 56]; - next += 8; - len -= 8; - } - - while (len) { - crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8); - len--; - } - - return rev8(crc); -} - -/* Return the CRC of buf[0..len-1] with initial crc, processing eight bytes - at a time using passed-in lookup table. - This selects one of two routines depending on the endianness of - the architecture. */ -uint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf, - size_t len) { - uint64_t n = 1; - - return *(char *)&n ? crcspeed64little(table, crc, buf, len) - : crcspeed64big(table, crc, buf, len); -} - -uint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf, - size_t len) { - uint64_t n = 1; - - return *(char *)&n ? crcspeed16little(table, crc, buf, len) - : crcspeed16big(table, crc, buf, len); -} - -/* Initialize CRC lookup table in architecture-dependent manner. */ -void crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]) { - uint64_t n = 1; - - *(char *)&n ? crcspeed64little_init(fn, table) - : crcspeed64big_init(fn, table); -} - -void crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]) { - uint64_t n = 1; - - *(char *)&n ? crcspeed16little_init(fn, table) - : crcspeed16big_init(fn, table); -} |
