Remove testing data

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-21 22:52:54 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-21 22:52:54 +0100
commit: dcacc00e3750300617ba6e16eb346713f91a783a (patch)
tree: 38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/redis-unstable/modules/vector-sets/tests/reduce.py
parent: 58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff)
download: crep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz
1 files changed, 0 insertions, 71 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py
deleted file mode 100644
index e39164f..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/reduce.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-class Reduce(TestCase):
-    def getname(self):
-        return "Dimension Reduction"
-    def estimated_runtime(self):
-        return 0.2
-    def test(self):
-        original_dim = 100
-        reduced_dim = 80
-        count = 1000
-        k = 50  # Number of nearest neighbors to check
-        # Fill Redis with vectors using REDUCE and get reference data
-        data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim)
-        # Verify dimension is reduced
-        dim = self.redis.execute_command('VDIM', self.test_key)
-        assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}"
-        # Generate query vector and get nearest neighbors using Redis
-        query_vec = generate_random_vector(original_dim)
-        redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 
-                                             original_dim, *[str(x) for x in query_vec],
-                                             'COUNT', k, 'WITHSCORES')
-        # Convert Redis results to dict
-        redis_results = {}
-        for i in range(0, len(redis_raw), 2):
-            key = redis_raw[i].decode()
-            score = float(redis_raw[i+1])
-            redis_results[key] = score
-        # Get results from linear scan with original vectors
-        linear_results = data.find_k_nearest(query_vec, k)
-        linear_items = {name: score for name, score in linear_results}
-        # Compare overlap between reduced and non-reduced results
-        redis_set = set(redis_results.keys())
-        linear_set = set(linear_items.keys())
-        overlap = len(redis_set & linear_set)
-        overlap_ratio = overlap / k
-        # With random projection, we expect some loss of accuracy but should
-        # maintain at least some similarity structure.
-        # Note that gaussian distribution is the worse with this test, so
-        # in real world practice, things will be better.
-        min_expected_overlap = 0.1  # At least 10% overlap in top-k
-        assert overlap_ratio >= min_expected_overlap, \
-            f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}"
-        # For items that appear in both results, scores should be reasonably correlated
-        common_items = redis_set & linear_set
-        for item in common_items:
-            redis_score = redis_results[item]
-            linear_score = linear_items[item]
-            # Allow for some deviation due to dimensionality reduction
-            assert abs(redis_score - linear_score) < 0.2, \
-                f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
-        # If test fails, print comparison for debugging
-        if overlap_ratio < min_expected_overlap:
-            print("\nLow overlap in results. Details:")
-            print("\nTop results from linear scan (original vectors):")
-            for name, score in linear_results:
-                print(f"{name}: {score:.3f}")
-            print("\nTop results from Redis (reduced vectors):")
-            for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True):
-                print(f"{item}: {score:.3f}")
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-01-21 22:52:54 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-01-21 22:52:54 +0100
commit	dcacc00e3750300617ba6e16eb346713f91a783a (patch)
tree	38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/redis-unstable/modules/vector-sets/tests/reduce.py
parent	58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff)
download	crep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz

diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py deleted file mode 100644 index e39164f..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/reduce.py +++ /dev/null
@@ -1,71 +0,0 @@
1	from test import TestCase, fill_redis_with_vectors, generate_random_vector
2
3	class Reduce(TestCase):
4	def getname(self):
5	return "Dimension Reduction"
6
7	def estimated_runtime(self):
8	return 0.2
9
10	def test(self):
11	original_dim = 100
12	reduced_dim = 80
13	count = 1000
14	k = 50 # Number of nearest neighbors to check
15
16	# Fill Redis with vectors using REDUCE and get reference data
17	data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim)
18
19	# Verify dimension is reduced
20	dim = self.redis.execute_command('VDIM', self.test_key)
21	assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}"
22
23	# Generate query vector and get nearest neighbors using Redis
24	query_vec = generate_random_vector(original_dim)
25	redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES',
26	original_dim, *[str(x) for x in query_vec],
27	'COUNT', k, 'WITHSCORES')
28
29	# Convert Redis results to dict
30	redis_results = {}
31	for i in range(0, len(redis_raw), 2):
32	key = redis_raw[i].decode()
33	score = float(redis_raw[i+1])
34	redis_results[key] = score
35
36	# Get results from linear scan with original vectors
37	linear_results = data.find_k_nearest(query_vec, k)
38	linear_items = {name: score for name, score in linear_results}
39
40	# Compare overlap between reduced and non-reduced results
41	redis_set = set(redis_results.keys())
42	linear_set = set(linear_items.keys())
43	overlap = len(redis_set & linear_set)
44	overlap_ratio = overlap / k
45
46	# With random projection, we expect some loss of accuracy but should
47	# maintain at least some similarity structure.
48	# Note that gaussian distribution is the worse with this test, so
49	# in real world practice, things will be better.
50	min_expected_overlap = 0.1 # At least 10% overlap in top-k
51	assert overlap_ratio >= min_expected_overlap, \
52	f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}"
53
54	# For items that appear in both results, scores should be reasonably correlated
55	common_items = redis_set & linear_set
56	for item in common_items:
57	redis_score = redis_results[item]
58	linear_score = linear_items[item]
59	# Allow for some deviation due to dimensionality reduction
60	assert abs(redis_score - linear_score) < 0.2, \
61	f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
62
63	# If test fails, print comparison for debugging
64	if overlap_ratio < min_expected_overlap:
65	print("\nLow overlap in results. Details:")
66	print("\nTop results from linear scan (original vectors):")
67	for name, score in linear_results:
68	print(f"{name}: {score:.3f}")
69	print("\nTop results from Redis (reduced vectors):")
70	for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True):
71	print(f"{item}: {score:.3f}")