diff options
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests/large_scale.py')
| -rw-r--r-- | examples/redis-unstable/modules/vector-sets/tests/large_scale.py | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py new file mode 100644 index 0000000..eac5dca --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | from test import TestCase, fill_redis_with_vectors, generate_random_vector | ||
| 2 | import random | ||
| 3 | |||
| 4 | class LargeScale(TestCase): | ||
| 5 | def getname(self): | ||
| 6 | return "Large Scale Comparison" | ||
| 7 | |||
| 8 | def estimated_runtime(self): | ||
| 9 | return 10 | ||
| 10 | |||
| 11 | def test(self): | ||
| 12 | dim = 300 | ||
| 13 | count = 20000 | ||
| 14 | k = 50 | ||
| 15 | |||
| 16 | # Fill Redis and get reference data for comparison | ||
| 17 | random.seed(42) # Make test deterministic | ||
| 18 | data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) | ||
| 19 | |||
| 20 | # Generate query vector | ||
| 21 | query_vec = generate_random_vector(dim) | ||
| 22 | |||
| 23 | # Get results from Redis with good exploration factor | ||
| 24 | redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, | ||
| 25 | *[str(x) for x in query_vec], | ||
| 26 | 'COUNT', k, 'WITHSCORES', 'EF', 500) | ||
| 27 | |||
| 28 | # Convert Redis results to dict | ||
| 29 | redis_results = {} | ||
| 30 | for i in range(0, len(redis_raw), 2): | ||
| 31 | key = redis_raw[i].decode() | ||
| 32 | score = float(redis_raw[i+1]) | ||
| 33 | redis_results[key] = score | ||
| 34 | |||
| 35 | # Get results from linear scan | ||
| 36 | linear_results = data.find_k_nearest(query_vec, k) | ||
| 37 | linear_items = {name: score for name, score in linear_results} | ||
| 38 | |||
| 39 | # Compare overlap | ||
| 40 | redis_set = set(redis_results.keys()) | ||
| 41 | linear_set = set(linear_items.keys()) | ||
| 42 | overlap = len(redis_set & linear_set) | ||
| 43 | |||
| 44 | # If test fails, print comparison for debugging | ||
| 45 | if overlap < k * 0.7: | ||
| 46 | data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k) | ||
| 47 | |||
| 48 | assert overlap >= k * 0.7, \ | ||
| 49 | f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%" | ||
| 50 | |||
| 51 | # Verify scores for common items | ||
| 52 | for item in redis_set & linear_set: | ||
| 53 | redis_score = redis_results[item] | ||
| 54 | linear_score = linear_items[item] | ||
| 55 | assert abs(redis_score - linear_score) < 0.01, \ | ||
| 56 | f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}" | ||
