summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/tests/deletion.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests/deletion.py')
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/deletion.py173
1 files changed, 0 insertions, 173 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/deletion.py b/examples/redis-unstable/modules/vector-sets/tests/deletion.py
deleted file mode 100644
index cb91959..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/deletion.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-import random
-
-"""
-A note about this test:
-It was experimentally tried to modify hnsw.c in order to
-avoid calling hnsw_reconnect_nodes(). In this case, the test
-fails very often with EF set to 250, while it hardly
-fails at all with the same parameters if hnsw_reconnect_nodes()
-is called.
-
-Note that for the nature of the test (it is very strict) it can
-still fail from time to time, without this signaling any
-actual bug.
-"""
-
-class VREM(TestCase):
- def getname(self):
- return "Deletion and graph state after deletion"
-
- def estimated_runtime(self):
- return 2.0
-
- def format_neighbors_with_scores(self, links_result, old_links=None, items_to_remove=None):
- """Format neighbors with their similarity scores and status indicators"""
- if not links_result:
- return "No neighbors"
-
- output = []
- for level, neighbors in enumerate(links_result):
- level_num = len(links_result) - level - 1
- output.append(f"Level {level_num}:")
-
- # Get neighbors and scores
- neighbors_with_scores = []
- for i in range(0, len(neighbors), 2):
- neighbor = neighbors[i].decode() if isinstance(neighbors[i], bytes) else neighbors[i]
- score = float(neighbors[i+1]) if i+1 < len(neighbors) else None
- status = ""
-
- # For old links, mark deleted ones
- if items_to_remove and neighbor in items_to_remove:
- status = " [lost]"
- # For new links, mark newly added ones
- elif old_links is not None:
- # Check if this neighbor was in the old links at this level
- was_present = False
- if old_links and level < len(old_links):
- old_neighbors = [n.decode() if isinstance(n, bytes) else n
- for n in old_links[level]]
- was_present = neighbor in old_neighbors
- if not was_present:
- status = " [gained]"
-
- if score is not None:
- neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor} ({score:.6f}){status}")
- else:
- neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor}{status}")
-
- output.extend([" " + n for n in neighbors_with_scores])
- return "\n".join(output)
-
- def test(self):
- # 1. Fill server with random elements
- dim = 128
- count = 5000
- data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
-
- # 2. Do VSIM to get 200 items
- query_vec = generate_random_vector(dim)
- results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec],
- 'COUNT', 200, 'WITHSCORES')
-
- # Convert results to list of (item, score) pairs, sorted by score
- items = []
- for i in range(0, len(results), 2):
- item = results[i].decode()
- score = float(results[i+1])
- items.append((item, score))
- items.sort(key=lambda x: x[1], reverse=True) # Sort by similarity
-
- # Store the graph structure for all items before deletion
- neighbors_before = {}
- for item, _ in items:
- links = self.redis.execute_command('VLINKS', self.test_key, item, 'WITHSCORES')
- if links: # Some items might not have links
- neighbors_before[item] = links
-
- # 3. Remove 100 random items
- items_to_remove = set(item for item, _ in random.sample(items, 100))
- # Keep track of top 10 non-removed items
- top_remaining = []
- for item, score in items:
- if item not in items_to_remove:
- top_remaining.append((item, score))
- if len(top_remaining) == 10:
- break
-
- # Remove the items
- for item in items_to_remove:
- result = self.redis.execute_command('VREM', self.test_key, item)
- assert result == 1, f"VREM failed to remove {item}"
-
- # 4. Do VSIM again with same vector
- new_results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec],
- 'COUNT', 200, 'WITHSCORES',
- 'EF', 500)
-
- # Convert new results to dict of item -> score
- new_scores = {}
- for i in range(0, len(new_results), 2):
- item = new_results[i].decode()
- score = float(new_results[i+1])
- new_scores[item] = score
-
- failure = False
- failed_item = None
- failed_reason = None
- # 5. Verify all top 10 non-removed items are still found with similar scores
- for item, old_score in top_remaining:
- if item not in new_scores:
- failure = True
- failed_item = item
- failed_reason = "missing"
- break
- new_score = new_scores[item]
- if abs(new_score - old_score) >= 0.01:
- failure = True
- failed_item = item
- failed_reason = f"score changed: {old_score:.6f} -> {new_score:.6f}"
- break
-
- if failure:
- print("\nTest failed!")
- print(f"Problem with item: {failed_item} ({failed_reason})")
-
- print("\nOriginal neighbors (with similarity scores):")
- if failed_item in neighbors_before:
- print(self.format_neighbors_with_scores(
- neighbors_before[failed_item],
- items_to_remove=items_to_remove))
- else:
- print("No neighbors found in original graph")
-
- print("\nCurrent neighbors (with similarity scores):")
- current_links = self.redis.execute_command('VLINKS', self.test_key,
- failed_item, 'WITHSCORES')
- if current_links:
- print(self.format_neighbors_with_scores(
- current_links,
- old_links=neighbors_before.get(failed_item)))
- else:
- print("No neighbors in current graph")
-
- print("\nOriginal results (top 20):")
- for item, score in items[:20]:
- deleted = "[deleted]" if item in items_to_remove else ""
- print(f"{item}: {score:.6f} {deleted}")
-
- print("\nNew results after removal (top 20):")
- new_items = []
- for i in range(0, len(new_results), 2):
- item = new_results[i].decode()
- score = float(new_results[i+1])
- new_items.append((item, score))
- new_items.sort(key=lambda x: x[1], reverse=True)
- for item, score in new_items[:20]:
- print(f"{item}: {score:.6f}")
-
- raise AssertionError(f"Test failed: Problem with item {failed_item} ({failed_reason}). *** IMPORTANT *** This test may fail from time to time without indicating that there is a bug. However normally it should pass. The fact is that it's a quite extreme test where we destroy 50% of nodes of top results and still expect perfect recall, with vectors that are very hostile because of the distribution used.")
-