diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:40:55 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:40:55 +0100 |
| commit | 5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch) | |
| tree | 1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/modules/vector-sets/tests/node_update.py | |
| parent | c7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff) | |
| download | crep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz | |
Add Redis source code for testing
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests/node_update.py')
| -rw-r--r-- | examples/redis-unstable/modules/vector-sets/tests/node_update.py | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/node_update.py b/examples/redis-unstable/modules/vector-sets/tests/node_update.py new file mode 100644 index 0000000..53aa2dd --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/node_update.py @@ -0,0 +1,85 @@ +from test import TestCase, generate_random_vector +import struct +import math +import random + +class VectorUpdateAndClusters(TestCase): + def getname(self): + return "VADD vector update with cluster relocation" + + def estimated_runtime(self): + return 2.0 # Should take around 2 seconds + + def generate_cluster_vector(self, base_vec, noise=0.1): + """Generate a vector that's similar to base_vec with some noise.""" + vec = [x + random.gauss(0, noise) for x in base_vec] + # Normalize + norm = math.sqrt(sum(x*x for x in vec)) + return [x/norm for x in vec] + + def test(self): + dim = 128 + vectors_per_cluster = 5000 + + # Create two very different base vectors for our clusters + cluster1_base = generate_random_vector(dim) + cluster2_base = [-x for x in cluster1_base] # Opposite direction + + # Add vectors from first cluster + for i in range(vectors_per_cluster): + vec = self.generate_cluster_vector(cluster1_base) + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, + f'{self.test_key}:cluster1:{i}') + + # Add vectors from second cluster + for i in range(vectors_per_cluster): + vec = self.generate_cluster_vector(cluster2_base) + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, + f'{self.test_key}:cluster2:{i}') + + # Pick a test vector from cluster1 + test_key = f'{self.test_key}:cluster1:0' + + # Verify it's in cluster1 using VSIM + initial_vec = self.generate_cluster_vector(cluster1_base) + results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in initial_vec], + 'COUNT', 100, 'WITHSCORES') + + # Count how many cluster1 items are in top results + cluster1_count = sum(1 for i in range(0, len(results), 2) + if b'cluster1' in results[i]) + assert cluster1_count > 80, "Initial clustering check failed" + + # Now update the test vector to be in cluster2 + new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05) + vec_bytes = struct.pack(f'{dim}f', *new_vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key) + + # Verify the embedding was actually updated using VEMB + emb_result = self.redis.execute_command('VEMB', self.test_key, test_key) + updated_vec = [float(x) for x in emb_result] + + # Verify updated vector matches what we inserted + dot_product = sum(a*b for a,b in zip(updated_vec, new_vec)) + similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) * + math.sqrt(sum(x*x for x in new_vec))) + assert similarity > 0.9, "Vector was not properly updated" + + # Verify it's now in cluster2 using VSIM + results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in cluster2_base], + 'COUNT', 100, 'WITHSCORES') + + # Verify our updated vector is among top results + found = False + for i in range(0, len(results), 2): + if results[i].decode() == test_key: + found = True + similarity = float(results[i+1]) + assert similarity > 0.80, f"Updated vector has low similarity: {similarity}" + break + + assert found, "Updated vector not found in cluster2 proximity" |
