summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/tests/large_scale.py
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:52:54 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:52:54 +0100
commitdcacc00e3750300617ba6e16eb346713f91a783a (patch)
tree38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/redis-unstable/modules/vector-sets/tests/large_scale.py
parent58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff)
downloadcrep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz
Remove testing data
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests/large_scale.py')
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/large_scale.py56
1 files changed, 0 insertions, 56 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py
deleted file mode 100644
index eac5dca..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-import random
-
-class LargeScale(TestCase):
- def getname(self):
- return "Large Scale Comparison"
-
- def estimated_runtime(self):
- return 10
-
- def test(self):
- dim = 300
- count = 20000
- k = 50
-
- # Fill Redis and get reference data for comparison
- random.seed(42) # Make test deterministic
- data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
-
- # Generate query vector
- query_vec = generate_random_vector(dim)
-
- # Get results from Redis with good exploration factor
- redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec],
- 'COUNT', k, 'WITHSCORES', 'EF', 500)
-
- # Convert Redis results to dict
- redis_results = {}
- for i in range(0, len(redis_raw), 2):
- key = redis_raw[i].decode()
- score = float(redis_raw[i+1])
- redis_results[key] = score
-
- # Get results from linear scan
- linear_results = data.find_k_nearest(query_vec, k)
- linear_items = {name: score for name, score in linear_results}
-
- # Compare overlap
- redis_set = set(redis_results.keys())
- linear_set = set(linear_items.keys())
- overlap = len(redis_set & linear_set)
-
- # If test fails, print comparison for debugging
- if overlap < k * 0.7:
- data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k)
-
- assert overlap >= k * 0.7, \
- f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%"
-
- # Verify scores for common items
- for item in redis_set & linear_set:
- redis_score = redis_results[item]
- linear_score = linear_items[item]
- assert abs(redis_score - linear_score) < 0.01, \
- f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"