summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/tests/reduce.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests/reduce.py')
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/reduce.py71
1 files changed, 0 insertions, 71 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py
deleted file mode 100644
index e39164f..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/reduce.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-
-class Reduce(TestCase):
- def getname(self):
- return "Dimension Reduction"
-
- def estimated_runtime(self):
- return 0.2
-
- def test(self):
- original_dim = 100
- reduced_dim = 80
- count = 1000
- k = 50 # Number of nearest neighbors to check
-
- # Fill Redis with vectors using REDUCE and get reference data
- data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim)
-
- # Verify dimension is reduced
- dim = self.redis.execute_command('VDIM', self.test_key)
- assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}"
-
- # Generate query vector and get nearest neighbors using Redis
- query_vec = generate_random_vector(original_dim)
- redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES',
- original_dim, *[str(x) for x in query_vec],
- 'COUNT', k, 'WITHSCORES')
-
- # Convert Redis results to dict
- redis_results = {}
- for i in range(0, len(redis_raw), 2):
- key = redis_raw[i].decode()
- score = float(redis_raw[i+1])
- redis_results[key] = score
-
- # Get results from linear scan with original vectors
- linear_results = data.find_k_nearest(query_vec, k)
- linear_items = {name: score for name, score in linear_results}
-
- # Compare overlap between reduced and non-reduced results
- redis_set = set(redis_results.keys())
- linear_set = set(linear_items.keys())
- overlap = len(redis_set & linear_set)
- overlap_ratio = overlap / k
-
- # With random projection, we expect some loss of accuracy but should
- # maintain at least some similarity structure.
- # Note that gaussian distribution is the worse with this test, so
- # in real world practice, things will be better.
- min_expected_overlap = 0.1 # At least 10% overlap in top-k
- assert overlap_ratio >= min_expected_overlap, \
- f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}"
-
- # For items that appear in both results, scores should be reasonably correlated
- common_items = redis_set & linear_set
- for item in common_items:
- redis_score = redis_results[item]
- linear_score = linear_items[item]
- # Allow for some deviation due to dimensionality reduction
- assert abs(redis_score - linear_score) < 0.2, \
- f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
-
- # If test fails, print comparison for debugging
- if overlap_ratio < min_expected_overlap:
- print("\nLow overlap in results. Details:")
- print("\nTop results from linear scan (original vectors):")
- for name, score in linear_results:
- print(f"{name}: {score:.3f}")
- print("\nTop results from Redis (reduced vectors):")
- for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True):
- print(f"{item}: {score:.3f}")