diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
| commit | dcacc00e3750300617ba6e16eb346713f91a783a (patch) | |
| tree | 38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/redis-unstable/modules/vector-sets/tests/reduce.py | |
| parent | 58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff) | |
| download | crep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz | |
Remove testing data
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests/reduce.py')
| -rw-r--r-- | examples/redis-unstable/modules/vector-sets/tests/reduce.py | 71 |
1 files changed, 0 insertions, 71 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py deleted file mode 100644 index e39164f..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/reduce.py +++ /dev/null | |||
| @@ -1,71 +0,0 @@ | |||
| 1 | from test import TestCase, fill_redis_with_vectors, generate_random_vector | ||
| 2 | |||
| 3 | class Reduce(TestCase): | ||
| 4 | def getname(self): | ||
| 5 | return "Dimension Reduction" | ||
| 6 | |||
| 7 | def estimated_runtime(self): | ||
| 8 | return 0.2 | ||
| 9 | |||
| 10 | def test(self): | ||
| 11 | original_dim = 100 | ||
| 12 | reduced_dim = 80 | ||
| 13 | count = 1000 | ||
| 14 | k = 50 # Number of nearest neighbors to check | ||
| 15 | |||
| 16 | # Fill Redis with vectors using REDUCE and get reference data | ||
| 17 | data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim) | ||
| 18 | |||
| 19 | # Verify dimension is reduced | ||
| 20 | dim = self.redis.execute_command('VDIM', self.test_key) | ||
| 21 | assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}" | ||
| 22 | |||
| 23 | # Generate query vector and get nearest neighbors using Redis | ||
| 24 | query_vec = generate_random_vector(original_dim) | ||
| 25 | redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', | ||
| 26 | original_dim, *[str(x) for x in query_vec], | ||
| 27 | 'COUNT', k, 'WITHSCORES') | ||
| 28 | |||
| 29 | # Convert Redis results to dict | ||
| 30 | redis_results = {} | ||
| 31 | for i in range(0, len(redis_raw), 2): | ||
| 32 | key = redis_raw[i].decode() | ||
| 33 | score = float(redis_raw[i+1]) | ||
| 34 | redis_results[key] = score | ||
| 35 | |||
| 36 | # Get results from linear scan with original vectors | ||
| 37 | linear_results = data.find_k_nearest(query_vec, k) | ||
| 38 | linear_items = {name: score for name, score in linear_results} | ||
| 39 | |||
| 40 | # Compare overlap between reduced and non-reduced results | ||
| 41 | redis_set = set(redis_results.keys()) | ||
| 42 | linear_set = set(linear_items.keys()) | ||
| 43 | overlap = len(redis_set & linear_set) | ||
| 44 | overlap_ratio = overlap / k | ||
| 45 | |||
| 46 | # With random projection, we expect some loss of accuracy but should | ||
| 47 | # maintain at least some similarity structure. | ||
| 48 | # Note that gaussian distribution is the worse with this test, so | ||
| 49 | # in real world practice, things will be better. | ||
| 50 | min_expected_overlap = 0.1 # At least 10% overlap in top-k | ||
| 51 | assert overlap_ratio >= min_expected_overlap, \ | ||
| 52 | f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}" | ||
| 53 | |||
| 54 | # For items that appear in both results, scores should be reasonably correlated | ||
| 55 | common_items = redis_set & linear_set | ||
| 56 | for item in common_items: | ||
| 57 | redis_score = redis_results[item] | ||
| 58 | linear_score = linear_items[item] | ||
| 59 | # Allow for some deviation due to dimensionality reduction | ||
| 60 | assert abs(redis_score - linear_score) < 0.2, \ | ||
| 61 | f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}" | ||
| 62 | |||
| 63 | # If test fails, print comparison for debugging | ||
| 64 | if overlap_ratio < min_expected_overlap: | ||
| 65 | print("\nLow overlap in results. Details:") | ||
| 66 | print("\nTop results from linear scan (original vectors):") | ||
| 67 | for name, score in linear_results: | ||
| 68 | print(f"{name}: {score:.3f}") | ||
| 69 | print("\nTop results from Redis (reduced vectors):") | ||
| 70 | for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True): | ||
| 71 | print(f"{item}: {score:.3f}") | ||
