diff options
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests/large_scale.py')
| -rw-r--r-- | examples/redis-unstable/modules/vector-sets/tests/large_scale.py | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py new file mode 100644 index 0000000..eac5dca --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py @@ -0,0 +1,56 @@ +from test import TestCase, fill_redis_with_vectors, generate_random_vector +import random + +class LargeScale(TestCase): + def getname(self): + return "Large Scale Comparison" + + def estimated_runtime(self): + return 10 + + def test(self): + dim = 300 + count = 20000 + k = 50 + + # Fill Redis and get reference data for comparison + random.seed(42) # Make test deterministic + data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) + + # Generate query vector + query_vec = generate_random_vector(dim) + + # Get results from Redis with good exploration factor + redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in query_vec], + 'COUNT', k, 'WITHSCORES', 'EF', 500) + + # Convert Redis results to dict + redis_results = {} + for i in range(0, len(redis_raw), 2): + key = redis_raw[i].decode() + score = float(redis_raw[i+1]) + redis_results[key] = score + + # Get results from linear scan + linear_results = data.find_k_nearest(query_vec, k) + linear_items = {name: score for name, score in linear_results} + + # Compare overlap + redis_set = set(redis_results.keys()) + linear_set = set(linear_items.keys()) + overlap = len(redis_set & linear_set) + + # If test fails, print comparison for debugging + if overlap < k * 0.7: + data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k) + + assert overlap >= k * 0.7, \ + f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%" + + # Verify scores for common items + for item in redis_set & linear_set: + redis_score = redis_results[item] + linear_score = linear_items[item] + assert abs(redis_score - linear_score) < 0.01, \ + f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}" |
