Add Redis source code for testing

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-21 22:40:55 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-21 22:40:55 +0100
commit: 5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree: 1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/src/crccombine.c
parent: c7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
download: crep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz
1 files changed, 252 insertions, 0 deletions
diff --git a/examples/redis-unstable/src/crccombine.c b/examples/redis-unstable/src/crccombine.c
new file mode 100644
index 0000000..20add37
--- /dev/null
+++ b/examples/redis-unstable/src/crccombine.c
@@ -0,0 +1,252 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <strings.h>
+#if defined(__i386__) || defined(__X86_64__)
+#include <immintrin.h>
+#endif
+#include "crccombine.h"
+
+/* Copyright (C) 2013 Mark Adler
+ * Copyright (C) 2019-2024 Josiah Carlson
+ * Portions originally from: crc64.c Version 1.4  16 Dec 2013  Mark Adler
+ * Modifications by Josiah Carlson <josiah.carlson@gmail.com>
+ *   - Added implementation variations with sample timings for gf_matrix_times*()
+ *   - Most folks would be best using gf2_matrix_times_vec or
+ *	   gf2_matrix_times_vec2, unless some processor does AVX2 fast.
+ *   - This is the implementation of the MERGE_CRC macro defined in
+ *     crcspeed.c (which calls crc_combine()), and is a specialization of the
+ *     generic crc_combine() (and related from the 2013 edition of Mark Adler's
+ *     crc64.c)) for the sake of clarity and performance.
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the author be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+	 claim that you wrote the original software. If you use this software
+	 in a product, an acknowledgment in the product documentation would be
+	 appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+	 misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Mark Adler
+  madler@alumni.caltech.edu
+*/
+
+#define STATIC_ASSERT(VVV) do {int test = 1 / (VVV);test++;} while (0)
+
+#if !((defined(__i386__) || defined(__X86_64__)))
+
+/* This cuts 40% of the time vs bit-by-bit. */
+
+uint64_t gf2_matrix_times_switch(uint64_t *mat, uint64_t vec) {
+	/*
+	 * Without using any vector math, this handles 4 bits at a time,
+	 * and saves 40+% of the time compared to the bit-by-bit version. Use if you
+	 * have no vector compile option available to you. With cache, we see:
+	 * E5-2670 ~1-2us to extend ~1 meg 64 bit hash
+	 */
+	uint64_t sum;
+
+	sum = 0;
+	while (vec) {
+		/* reversing the case order is ~10% slower on Xeon E5-2670 */
+		switch (vec & 15) {
+		case 15:
+			sum ^= *mat ^ *(mat+1) ^ *(mat+2) ^ *(mat+3);
+			break;
+		case 14:
+			sum ^= *(mat+1) ^ *(mat+2) ^ *(mat+3);
+			break;
+		case 13:
+			sum ^= *mat ^ *(mat+2) ^ *(mat+3);
+			break;
+		case 12:
+			sum ^= *(mat+2) ^ *(mat+3);
+			break;
+		case 11:
+			sum ^= *mat ^ *(mat+1) ^ *(mat+3);
+			break;
+		case 10:
+			sum ^= *(mat+1) ^ *(mat+3);
+			break;
+		case 9:
+			sum ^= *mat ^ *(mat+3);
+			break;
+		case 8:
+			sum ^= *(mat+3);
+			break;
+		case 7:
+			sum ^= *mat ^ *(mat+1) ^ *(mat+2);
+			break;
+		case 6:
+			sum ^= *(mat+1) ^ *(mat+2);
+			break;
+		case 5:
+			sum ^= *mat ^ *(mat+2);
+			break;
+		case 4:
+			sum ^= *(mat+2);
+			break;
+		case 3:
+			sum ^= *mat ^ *(mat+1);
+			break;
+		case 2:
+			sum ^= *(mat+1);
+			break;
+		case 1:
+			sum ^= *mat;
+			break;
+		default:
+			break;
+		}
+		vec >>= 4;
+		mat += 4;
+	}
+	return sum;
+}
+
+#define CRC_MULTIPLY gf2_matrix_times_switch
+
+#else
+
+/*
+	Warning: here there be dragons involving vector math, and macros to save us
+	from repeating the same information over and over.
+*/
+
+uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) {
+	/*
+	 * Uses xmm registers on x86, works basically everywhere fast, doing
+	 * cycles of movqda, mov, shr, pand, and, pxor, at least on gcc 8.
+	 * Is 9-11x faster than original.
+	 * E5-2670 ~29us to extend ~1 meg 64 bit hash
+	 * i3-8130U ~22us to extend ~1 meg 64 bit hash
+	 */
+	v2uq sum = {0, 0},
+		*mv2 = (v2uq*)mat;
+	/* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */
+	static v2uq masks2[4] = {
+		{0,0},
+		{-1,0},
+		{0,-1},
+		{-1,-1},
+	};
+
+	/* Almost as beautiful as gf2_matrix_times_vec, but only half as many
+	 * bits per step, so we need 2 per chunk4 operation. Faster in my tests. */
+
+#define DO_CHUNK4() \
+		sum ^= (*mv2++) & masks2[vec & 3]; \
+		vec >>= 2; \
+		sum ^= (*mv2++) & masks2[vec & 3]; \
+		vec >>= 2
+
+#define DO_CHUNK16() \
+		DO_CHUNK4(); \
+		DO_CHUNK4(); \
+		DO_CHUNK4(); \
+		DO_CHUNK4()
+
+	DO_CHUNK16();
+	DO_CHUNK16();
+	DO_CHUNK16();
+	DO_CHUNK16();
+
+	STATIC_ASSERT(sizeof(uint64_t) == 8);
+	STATIC_ASSERT(sizeof(long long unsigned int) == 8);
+	return sum[0] ^ sum[1];
+}
+
+#undef DO_CHUNK16
+#undef DO_CHUNK4
+
+#define CRC_MULTIPLY gf2_matrix_times_vec2
+#endif
+
+static void gf2_matrix_square(uint64_t *square, uint64_t *mat, uint8_t dim) {
+	unsigned n;
+
+	for (n = 0; n < dim; n++)
+		square[n] = CRC_MULTIPLY(mat, mat[n]);
+}
+
+/* Turns out our Redis / Jones CRC cycles at this point, so we can support
+ * more than 64 bits of extension if we want. Trivially. */
+static uint64_t combine_cache[64][64];
+
+/* Mark Adler has some amazing updates to crc.c in his crcany repository. I
+ * like static caches, and not worrying about finding cycles generally. We are
+ * okay to spend the 32k of memory here, leaving the algorithm unchanged from
+ * as it was a decade ago, and be happy that it costs <200 microseconds to
+ * init, and that subsequent calls to the combine function take under 100
+ * nanoseconds. We also note that the crcany/crc.c code applies to any CRC, and
+ * we are currently targeting one: Jones CRC64.
+ */
+
+void init_combine_cache(uint64_t poly, uint8_t dim) {
+	unsigned n, cache_num = 0;
+	combine_cache[1][0] = poly;
+	int prev = 1;
+	uint64_t row = 1;
+	for (n = 1; n < dim; n++)
+	{
+		combine_cache[1][n] = row;
+		row <<= 1;
+	}
+
+	gf2_matrix_square(combine_cache[0], combine_cache[1], dim);
+	gf2_matrix_square(combine_cache[1], combine_cache[0], dim);
+
+	/* do/while to overwrite the first two layers, they are not used, but are
+	 * re-generated in the last two layers for the Redis polynomial */
+	do {
+		gf2_matrix_square(combine_cache[cache_num], combine_cache[cache_num + prev], dim);
+		prev = -1;
+	} while (++cache_num < 64);
+}
+
+/* Return the CRC-64 of two sequential blocks, where crc1 is the CRC-64 of the
+ * first block, crc2 is the CRC-64 of the second block, and len2 is the length
+ * of the second block.
+ *
+ * If you want reflections on your CRCs; do them outside before / after.
+ * WARNING: if you enable USE_STATIC_COMBINE_CACHE to make this fast, you MUST
+ * ALWAYS USE THE SAME POLYNOMIAL, otherwise you will get the wrong results.
+ * You MAY bzero() the even/odd static arrays, which will induce a re-cache on
+ * next call as a work-around, but ... maybe just parameterize the cached
+ * models at that point like Mark Adler does in modern crcany/crc.c .
+ */
+uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2, uint64_t poly, uint8_t dim) {
+	/* degenerate case */
+	if (len2 == 0)
+		return crc1;
+
+	unsigned cache_num = 0;
+	if (combine_cache[0][0] == 0) {
+		init_combine_cache(poly, dim);
+	}
+
+	/* apply len2 zeros to crc1 (first square will put the operator for one
+	   zero byte, eight zero bits, in even) */
+	do
+	{
+		/* apply zeros operator for this bit of len2 */
+		if (len2 & 1)
+			crc1 = CRC_MULTIPLY(combine_cache[cache_num], crc1);
+		len2 >>= 1;
+		cache_num = (cache_num + 1) & 63;
+		/* if no more bits set, then done */
+	} while (len2 != 0);
+
+	/* return combined crc */
+	crc1 ^= crc2;
+	return crc1;
+}
+
+#undef CRC_MULTIPLY
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-01-21 22:40:55 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-01-21 22:40:55 +0100
commit	5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree	1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/src/crccombine.c
parent	c7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
download	crep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz