diff options
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests')
26 files changed, 0 insertions, 2863 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py b/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py deleted file mode 100644 index 8481a36..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py +++ /dev/null @@ -1,21 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class BasicCommands(TestCase): - def getname(self): - return "VADD, VDIM, VCARD basic usage" - - def test(self): - # Test VADD - vec = generate_random_vector(4) - vec_bytes = struct.pack('4f', *vec) - result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') - assert result == 1, "VADD should return 1 for first item" - - # Test VDIM - dim = self.redis.execute_command('VDIM', self.test_key) - assert dim == 4, f"VDIM should return 4, got {dim}" - - # Test VCARD - card = self.redis.execute_command('VCARD', self.test_key) - assert card == 1, f"VCARD should return 1, got {card}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py b/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py deleted file mode 100644 index 11c3c9b..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py +++ /dev/null @@ -1,35 +0,0 @@ -from test import TestCase - -class BasicSimilarity(TestCase): - def getname(self): - return "VSIM reported distance makes sense with 4D vectors" - - def test(self): - # Add two very similar vectors, one different - vec1 = [1, 0, 0, 0] - vec2 = [0.99, 0.01, 0, 0] - vec3 = [0.1, 1, -1, 0.5] - - # Add vectors using VALUES format - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], f'{self.test_key}:item:1') - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec2], f'{self.test_key}:item:2') - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec3], f'{self.test_key}:item:3') - - # Query similarity with vec1 - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], 'WITHSCORES') - - # Convert results to dictionary - results_dict = {} - for i in range(0, len(result), 2): - key = result[i].decode() - score = float(result[i+1]) - results_dict[key] = score - - # Verify results - assert results_dict[f'{self.test_key}:item:1'] > 0.99, "Self-similarity should be very high" - assert results_dict[f'{self.test_key}:item:2'] > 0.99, "Similar vector should have high similarity" - assert results_dict[f'{self.test_key}:item:3'] < 0.8, "Not very similar vector should have low similarity" diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py deleted file mode 100644 index f4b3a12..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py +++ /dev/null @@ -1,156 +0,0 @@ -from test import TestCase, generate_random_vector -import threading -import time -import struct - -class ThreadingStressTest(TestCase): - def getname(self): - return "Concurrent VADD/DEL/VSIM operations stress test" - - def estimated_runtime(self): - return 10 # Test runs for 10 seconds - - def test(self): - # Constants - easy to modify if needed - NUM_VADD_THREADS = 10 - NUM_VSIM_THREADS = 1 - NUM_DEL_THREADS = 1 - TEST_DURATION = 10 # seconds - VECTOR_DIM = 100 - DEL_INTERVAL = 1 # seconds - - # Shared flags and state - stop_event = threading.Event() - error_list = [] - error_lock = threading.Lock() - - def log_error(thread_name, error): - with error_lock: - error_list.append(f"{thread_name}: {error}") - - def vadd_worker(thread_id): - """Thread function to perform VADD operations""" - thread_name = f"VADD-{thread_id}" - try: - vector_count = 0 - while not stop_event.is_set(): - try: - # Generate random vector - vec = generate_random_vector(VECTOR_DIM) - vec_bytes = struct.pack(f'{VECTOR_DIM}f', *vec) - - # Add vector with CAS option - self.redis.execute_command( - 'VADD', - self.test_key, - 'FP32', - vec_bytes, - f'{self.test_key}:item:{thread_id}:{vector_count}', - 'CAS' - ) - - vector_count += 1 - - # Small sleep to reduce CPU pressure - if vector_count % 10 == 0: - time.sleep(0.001) - except Exception as e: - log_error(thread_name, f"Error: {str(e)}") - time.sleep(0.1) # Slight backoff on error - except Exception as e: - log_error(thread_name, f"Thread error: {str(e)}") - - def del_worker(): - """Thread function that deletes the key periodically""" - thread_name = "DEL" - try: - del_count = 0 - while not stop_event.is_set(): - try: - # Sleep first, then delete - time.sleep(DEL_INTERVAL) - if stop_event.is_set(): - break - - self.redis.delete(self.test_key) - del_count += 1 - except Exception as e: - log_error(thread_name, f"Error: {str(e)}") - except Exception as e: - log_error(thread_name, f"Thread error: {str(e)}") - - def vsim_worker(thread_id): - """Thread function to perform VSIM operations""" - thread_name = f"VSIM-{thread_id}" - try: - search_count = 0 - while not stop_event.is_set(): - try: - # Generate query vector - query_vec = generate_random_vector(VECTOR_DIM) - query_str = [str(x) for x in query_vec] - - # Perform similarity search - args = ['VSIM', self.test_key, 'VALUES', VECTOR_DIM] - args.extend(query_str) - args.extend(['COUNT', 10]) - self.redis.execute_command(*args) - - search_count += 1 - - # Small sleep to reduce CPU pressure - if search_count % 10 == 0: - time.sleep(0.005) - except Exception as e: - # Don't log empty array errors, as they're expected when key doesn't exist - if "empty array" not in str(e).lower(): - log_error(thread_name, f"Error: {str(e)}") - time.sleep(0.1) # Slight backoff on error - except Exception as e: - log_error(thread_name, f"Thread error: {str(e)}") - - # Start all threads - threads = [] - - # VADD threads - for i in range(NUM_VADD_THREADS): - thread = threading.Thread(target=vadd_worker, args=(i,)) - thread.start() - threads.append(thread) - - # DEL threads - for _ in range(NUM_DEL_THREADS): - thread = threading.Thread(target=del_worker) - thread.start() - threads.append(thread) - - # VSIM threads - for i in range(NUM_VSIM_THREADS): - thread = threading.Thread(target=vsim_worker, args=(i,)) - thread.start() - threads.append(thread) - - # Let the test run for the specified duration - time.sleep(TEST_DURATION) - - # Signal all threads to stop - stop_event.set() - - # Wait for threads to finish - for thread in threads: - thread.join(timeout=2.0) - - # Check if Redis is still responsive - try: - ping_result = self.redis.ping() - assert ping_result, "Redis did not respond to PING after stress test" - except Exception as e: - assert False, f"Redis connection failed after stress test: {str(e)}" - - # Report any errors for diagnosis, but don't fail the test unless PING fails - if error_list: - error_count = len(error_list) - print(f"\nEncountered {error_count} errors during stress test.") - print("First 5 errors:") - for error in error_list[:5]: - print(f"- {error}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py deleted file mode 100644 index 9bbf011..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py +++ /dev/null @@ -1,48 +0,0 @@ -from test import TestCase, fill_redis_with_vectors, generate_random_vector -import threading, time - -class ConcurrentVSIMAndDEL(TestCase): - def getname(self): - return "Concurrent VSIM and DEL operations" - - def estimated_runtime(self): - return 2 - - def test(self): - # Fill the key with 5000 random vectors - dim = 128 - count = 5000 - fill_redis_with_vectors(self.redis, self.test_key, count, dim) - - # List to store results from threads - thread_results = [] - - def vsim_thread(): - """Thread function to perform VSIM operations until the key is deleted""" - while True: - query_vec = generate_random_vector(dim) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in query_vec], 'COUNT', 10) - if not result: - # Empty array detected, key is deleted - thread_results.append(True) - break - - # Start multiple threads to perform VSIM operations - threads = [] - for _ in range(4): # Start 4 threads - t = threading.Thread(target=vsim_thread) - t.start() - threads.append(t) - - # Delete the key while threads are still running - time.sleep(1) - self.redis.delete(self.test_key) - - # Wait for all threads to finish (they will exit once they detect the key is deleted) - for t in threads: - t.join() - - # Verify that all threads detected an empty array or error - assert len(thread_results) == len(threads), "Not all threads detected the key deletion" - assert all(thread_results), "Some threads did not detect an empty array or error after DEL" diff --git a/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py b/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py deleted file mode 100644 index 78f06d8..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py +++ /dev/null @@ -1,39 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class DebugDigestTest(TestCase): - def getname(self): - return "[regression] DEBUG DIGEST-VALUE with attributes" - - def test(self): - # Generate random vectors - vec1 = generate_random_vector(4) - vec2 = generate_random_vector(4) - vec_bytes1 = struct.pack('4f', *vec1) - vec_bytes2 = struct.pack('4f', *vec2) - - # Add vectors to the key, one with attribute, one without - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1') - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}') - - # Call DEBUG DIGEST-VALUE on the key - try: - digest1 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) - assert digest1 is not None, "DEBUG DIGEST-VALUE should return a value" - - # Change attribute and verify digest changes - self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '{"color":"blue"}') - - digest2 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) - assert digest2 is not None, "DEBUG DIGEST-VALUE should return a value after attribute change" - assert digest1 != digest2, "Digest should change when an attribute is modified" - - # Remove attribute and verify digest changes again - self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '') - - digest3 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) - assert digest3 is not None, "DEBUG DIGEST-VALUE should return a value after attribute removal" - assert digest2 != digest3, "Digest should change when an attribute is removed" - - except Exception as e: - raise AssertionError(f"DEBUG DIGEST-VALUE command failed: {str(e)}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/deletion.py b/examples/redis-unstable/modules/vector-sets/tests/deletion.py deleted file mode 100644 index cb91959..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/deletion.py +++ /dev/null @@ -1,173 +0,0 @@ -from test import TestCase, fill_redis_with_vectors, generate_random_vector -import random - -""" -A note about this test: -It was experimentally tried to modify hnsw.c in order to -avoid calling hnsw_reconnect_nodes(). In this case, the test -fails very often with EF set to 250, while it hardly -fails at all with the same parameters if hnsw_reconnect_nodes() -is called. - -Note that for the nature of the test (it is very strict) it can -still fail from time to time, without this signaling any -actual bug. -""" - -class VREM(TestCase): - def getname(self): - return "Deletion and graph state after deletion" - - def estimated_runtime(self): - return 2.0 - - def format_neighbors_with_scores(self, links_result, old_links=None, items_to_remove=None): - """Format neighbors with their similarity scores and status indicators""" - if not links_result: - return "No neighbors" - - output = [] - for level, neighbors in enumerate(links_result): - level_num = len(links_result) - level - 1 - output.append(f"Level {level_num}:") - - # Get neighbors and scores - neighbors_with_scores = [] - for i in range(0, len(neighbors), 2): - neighbor = neighbors[i].decode() if isinstance(neighbors[i], bytes) else neighbors[i] - score = float(neighbors[i+1]) if i+1 < len(neighbors) else None - status = "" - - # For old links, mark deleted ones - if items_to_remove and neighbor in items_to_remove: - status = " [lost]" - # For new links, mark newly added ones - elif old_links is not None: - # Check if this neighbor was in the old links at this level - was_present = False - if old_links and level < len(old_links): - old_neighbors = [n.decode() if isinstance(n, bytes) else n - for n in old_links[level]] - was_present = neighbor in old_neighbors - if not was_present: - status = " [gained]" - - if score is not None: - neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor} ({score:.6f}){status}") - else: - neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor}{status}") - - output.extend([" " + n for n in neighbors_with_scores]) - return "\n".join(output) - - def test(self): - # 1. Fill server with random elements - dim = 128 - count = 5000 - data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) - - # 2. Do VSIM to get 200 items - query_vec = generate_random_vector(dim) - results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in query_vec], - 'COUNT', 200, 'WITHSCORES') - - # Convert results to list of (item, score) pairs, sorted by score - items = [] - for i in range(0, len(results), 2): - item = results[i].decode() - score = float(results[i+1]) - items.append((item, score)) - items.sort(key=lambda x: x[1], reverse=True) # Sort by similarity - - # Store the graph structure for all items before deletion - neighbors_before = {} - for item, _ in items: - links = self.redis.execute_command('VLINKS', self.test_key, item, 'WITHSCORES') - if links: # Some items might not have links - neighbors_before[item] = links - - # 3. Remove 100 random items - items_to_remove = set(item for item, _ in random.sample(items, 100)) - # Keep track of top 10 non-removed items - top_remaining = [] - for item, score in items: - if item not in items_to_remove: - top_remaining.append((item, score)) - if len(top_remaining) == 10: - break - - # Remove the items - for item in items_to_remove: - result = self.redis.execute_command('VREM', self.test_key, item) - assert result == 1, f"VREM failed to remove {item}" - - # 4. Do VSIM again with same vector - new_results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in query_vec], - 'COUNT', 200, 'WITHSCORES', - 'EF', 500) - - # Convert new results to dict of item -> score - new_scores = {} - for i in range(0, len(new_results), 2): - item = new_results[i].decode() - score = float(new_results[i+1]) - new_scores[item] = score - - failure = False - failed_item = None - failed_reason = None - # 5. Verify all top 10 non-removed items are still found with similar scores - for item, old_score in top_remaining: - if item not in new_scores: - failure = True - failed_item = item - failed_reason = "missing" - break - new_score = new_scores[item] - if abs(new_score - old_score) >= 0.01: - failure = True - failed_item = item - failed_reason = f"score changed: {old_score:.6f} -> {new_score:.6f}" - break - - if failure: - print("\nTest failed!") - print(f"Problem with item: {failed_item} ({failed_reason})") - - print("\nOriginal neighbors (with similarity scores):") - if failed_item in neighbors_before: - print(self.format_neighbors_with_scores( - neighbors_before[failed_item], - items_to_remove=items_to_remove)) - else: - print("No neighbors found in original graph") - - print("\nCurrent neighbors (with similarity scores):") - current_links = self.redis.execute_command('VLINKS', self.test_key, - failed_item, 'WITHSCORES') - if current_links: - print(self.format_neighbors_with_scores( - current_links, - old_links=neighbors_before.get(failed_item))) - else: - print("No neighbors in current graph") - - print("\nOriginal results (top 20):") - for item, score in items[:20]: - deleted = "[deleted]" if item in items_to_remove else "" - print(f"{item}: {score:.6f} {deleted}") - - print("\nNew results after removal (top 20):") - new_items = [] - for i in range(0, len(new_results), 2): - item = new_results[i].decode() - score = float(new_results[i+1]) - new_items.append((item, score)) - new_items.sort(key=lambda x: x[1], reverse=True) - for item, score in new_items[:20]: - print(f"{item}: {score:.6f}") - - raise AssertionError(f"Test failed: Problem with item {failed_item} ({failed_reason}). *** IMPORTANT *** This test may fail from time to time without indicating that there is a bug. However normally it should pass. The fact is that it's a quite extreme test where we destroy 50% of nodes of top results and still expect perfect recall, with vectors that are very hostile because of the distribution used.") - diff --git a/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py b/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py deleted file mode 100644 index f081152..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py +++ /dev/null @@ -1,67 +0,0 @@ -from test import TestCase, generate_random_vector -import struct -import redis.exceptions - -class DimensionValidation(TestCase): - def getname(self): - return "[regression] Dimension Validation with Projection" - - def estimated_runtime(self): - return 0.5 - - def test(self): - # Test scenario 1: Create a set with projection - original_dim = 100 - reduced_dim = 50 - - # Create the initial vector and set with projection - vec1 = generate_random_vector(original_dim) - vec1_bytes = struct.pack(f'{original_dim}f', *vec1) - - # Add first vector with projection - result = self.redis.execute_command('VADD', self.test_key, - 'REDUCE', reduced_dim, - 'FP32', vec1_bytes, f'{self.test_key}:item:1') - assert result == 1, "First VADD with REDUCE should return 1" - - # Check VINFO returns the correct projection information - info = self.redis.execute_command('VINFO', self.test_key) - info_map = {k.decode('utf-8'): v for k, v in zip(info[::2], info[1::2])} - assert 'vector-dim' in info_map, "VINFO should contain vector-dim" - assert info_map['vector-dim'] == reduced_dim, f"Expected reduced dimension {reduced_dim}, got {info['vector-dim']}" - assert 'projection-input-dim' in info_map, "VINFO should contain projection-input-dim" - assert info_map['projection-input-dim'] == original_dim, f"Expected original dimension {original_dim}, got {info['projection-input-dim']}" - - # Test scenario 2: Try adding a mismatched vector - should fail - wrong_dim = 80 - wrong_vec = generate_random_vector(wrong_dim) - wrong_vec_bytes = struct.pack(f'{wrong_dim}f', *wrong_vec) - - # This should fail with dimension mismatch error - try: - self.redis.execute_command('VADD', self.test_key, - 'REDUCE', reduced_dim, - 'FP32', wrong_vec_bytes, f'{self.test_key}:item:2') - assert False, "VADD with wrong dimension should fail" - except redis.exceptions.ResponseError as e: - assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error, got: {e}" - - # Test scenario 3: Add a correctly-sized vector - vec2 = generate_random_vector(original_dim) - vec2_bytes = struct.pack(f'{original_dim}f', *vec2) - - # This should succeed - result = self.redis.execute_command('VADD', self.test_key, - 'REDUCE', reduced_dim, - 'FP32', vec2_bytes, f'{self.test_key}:item:3') - assert result == 1, "VADD with correct dimensions should succeed" - - # Check VSIM also validates input dimensions - wrong_query = generate_random_vector(wrong_dim) - try: - self.redis.execute_command('VSIM', self.test_key, - 'VALUES', wrong_dim, *[str(x) for x in wrong_query], - 'COUNT', 10) - assert False, "VSIM with wrong dimension should fail" - except redis.exceptions.ResponseError as e: - assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/epsilon.py b/examples/redis-unstable/modules/vector-sets/tests/epsilon.py deleted file mode 100644 index 97e11c0..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/epsilon.py +++ /dev/null @@ -1,77 +0,0 @@ -from test import TestCase - -class EpsilonOption(TestCase): - def getname(self): - return "VSIM EPSILON option filtering" - - def estimated_runtime(self): - return 0.1 - - def test(self): - # Add vectors as shown in the example - # Vector 'a' at (1, 1) - normalized to (0.707, 0.707) - result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '1', '1', 'a') - assert result == 1, "VADD should return 1 for item 'a'" - - # Vector 'b' at (0, 1) - normalized to (0, 1) - result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '1', 'b') - assert result == 1, "VADD should return 1 for item 'b'" - - # Vector 'c' at (0, 0) - this will be a zero vector, might be handled specially - result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '0', 'c') - assert result == 1, "VADD should return 1 for item 'c'" - - # Vector 'd' at (0, -1) - normalized to (0, -1) - result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '-1', 'd') - assert result == 1, "VADD should return 1 for item 'd'" - - # Vector 'e' at (-1, -1) - normalized to (-0.707, -0.707) - result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '-1', '-1', 'e') - assert result == 1, "VADD should return 1 for item 'e'" - - # Test without EPSILON - should return all items - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES') - # Result is a flat list: [elem1, score1, elem2, score2, ...] - elements_all = [result[i].decode() for i in range(0, len(result), 2)] - scores_all = [float(result[i]) for i in range(1, len(result), 2)] - - assert len(elements_all) == 5, f"Should return 5 elements without EPSILON, got {len(elements_all)}" - assert elements_all[0] == 'a', "First element should be 'a' (most similar)" - assert scores_all[0] == 1.0, "Score for 'a' should be 1.0 (identical)" - - # Test with EPSILON 0.5 - should return only elements with similarity >= 0.5 (distance < 0.5) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.5') - elements_epsilon_0_5 = [result[i].decode() for i in range(0, len(result), 2)] - scores_epsilon_0_5 = [float(result[i]) for i in range(1, len(result), 2)] - - assert len(elements_epsilon_0_5) == 3, f"With EPSILON 0.5, should return 3 elements, got {len(elements_epsilon_0_5)}" - assert set(elements_epsilon_0_5) == {'a', 'b', 'c'}, f"With EPSILON 0.5, should get a, b, c, got {elements_epsilon_0_5}" - - # Verify all returned scores are >= 0.5 - for i, score in enumerate(scores_epsilon_0_5): - assert score >= 0.5, f"Element {elements_epsilon_0_5[i]} has score {score} which is < 0.5" - - # Test with EPSILON 0.2 - should return only elements with similarity >= 0.8 (distance < 0.2) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.2') - elements_epsilon_0_2 = [result[i].decode() for i in range(0, len(result), 2)] - scores_epsilon_0_2 = [float(result[i]) for i in range(1, len(result), 2)] - - assert len(elements_epsilon_0_2) == 2, f"With EPSILON 0.2, should return 2 elements, got {len(elements_epsilon_0_2)}" - assert set(elements_epsilon_0_2) == {'a', 'b'}, f"With EPSILON 0.2, should get a, b, got {elements_epsilon_0_2}" - - # Verify all returned scores are >= 0.8 (since distance < 0.2 means similarity > 0.8) - for i, score in enumerate(scores_epsilon_0_2): - assert score >= 0.8, f"Element {elements_epsilon_0_2[i]} has score {score} which is < 0.8" - - # Test with very small EPSILON - should return only the exact match - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.001') - elements_epsilon_small = [result[i].decode() for i in range(0, len(result), 2)] - - assert len(elements_epsilon_small) == 1, f"With EPSILON 0.001, should return only 1 element, got {len(elements_epsilon_small)}" - assert elements_epsilon_small[0] == 'a', "With very small EPSILON, should only get 'a'" - - # Test with EPSILON 1.0 - should return all elements (since all similarities are between 0 and 1) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '1.0') - elements_epsilon_1 = [result[i].decode() for i in range(0, len(result), 2)] - - assert len(elements_epsilon_1) == 5, f"With EPSILON 1.0, should return all 5 elements, got {len(elements_epsilon_1)}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py b/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py deleted file mode 100644 index 6c78c82..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py +++ /dev/null @@ -1,27 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class VREM_LastItemDeletesKey(TestCase): - def getname(self): - return "VREM last item deletes key" - - def test(self): - # Generate a random vector - vec = generate_random_vector(4) - vec_bytes = struct.pack('4f', *vec) - - # Add the vector to the key - result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') - assert result == 1, "VADD should return 1 for first item" - - # Verify the key exists - exists = self.redis.exists(self.test_key) - assert exists == 1, "Key should exist after VADD" - - # Remove the item - result = self.redis.execute_command('VREM', self.test_key, f'{self.test_key}:item:1') - assert result == 1, "VREM should return 1 for successful removal" - - # Verify the key no longer exists - exists = self.redis.exists(self.test_key) - assert exists == 0, "Key should no longer exist after VREM of last item" diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py b/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py deleted file mode 100644 index 364915d..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py +++ /dev/null @@ -1,242 +0,0 @@ -from test import TestCase - -class VSIMFilterExpressions(TestCase): - def getname(self): - return "VSIM FILTER expressions basic functionality" - - def test(self): - # Create a small set of vectors with different attributes - - # Basic vectors for testing - all orthogonal for clear results - vec1 = [1, 0, 0, 0] - vec2 = [0, 1, 0, 0] - vec3 = [0, 0, 1, 0] - vec4 = [0, 0, 0, 1] - vec5 = [0.5, 0.5, 0, 0] - - # Add vectors with various attributes - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], f'{self.test_key}:item:1') - self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', - '{"age": 25, "name": "Alice", "active": true, "scores": [85, 90, 95], "city": "New York"}') - - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec2], f'{self.test_key}:item:2') - self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', - '{"age": 30, "name": "Bob", "active": false, "scores": [70, 75, 80], "city": "Boston"}') - - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec3], f'{self.test_key}:item:3') - self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:3', - '{"age": 35, "name": "Charlie", "scores": [60, 65, 70], "city": "Seattle"}') - - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec4], f'{self.test_key}:item:4') - # Item 4 has no attribute at all - - self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, - *[str(x) for x in vec5], f'{self.test_key}:item:5') - self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5', - 'invalid json') # Intentionally malformed JSON - - # Basic equality with numbers - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age == 25') - assert len(result) == 1, "Expected 1 result for age == 25" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25" - - # Greater than - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age > 25') - assert len(result) == 2, "Expected 2 results for age > 25" - - # Less than or equal - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age <= 30') - assert len(result) == 2, "Expected 2 results for age <= 30" - - # String equality - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.name == "Alice"') - assert len(result) == 1, "Expected 1 result for name == Alice" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice" - - # String inequality - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.name != "Alice"') - assert len(result) == 2, "Expected 2 results for name != Alice" - - # Boolean value - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.active') - assert len(result) == 1, "Expected 1 result for .active being true" - - # Logical AND - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age > 20 and .age < 30') - assert len(result) == 1, "Expected 1 result for 20 < age < 30" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30" - - # Logical OR - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age < 30 or .age > 35') - assert len(result) == 1, "Expected 1 result for age < 30 or age > 35" - - # Logical NOT - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '!(.age == 25)') - assert len(result) == 2, "Expected 2 results for NOT(age == 25)" - - # The "in" operator with array - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age in [25, 35]') - assert len(result) == 2, "Expected 2 results for age in [25, 35]" - - # The "in" operator with strings in array - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.name in ["Alice", "David"]') - assert len(result) == 1, "Expected 1 result for name in [Alice, David]" - - # The "in" operator for substring matching - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"lic" in .name') - assert len(result) == 1, "Expected 1 result for 'lic' in name" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (Alice)" - - # The "in" operator with city substring - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"ork" in .city') - assert len(result) == 1, "Expected 1 result for 'ork' in city" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (New York)" - - # The "in" operator with no matches - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"xyz" in .name') - assert len(result) == 0, "Expected 0 results for 'xyz' in name" - - # Off-by-one tests - substring at the beginning - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"Ali" in .name') - assert len(result) == 1, "Expected 1 result for 'Ali' at beginning of 'Alice'" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" - - # Off-by-one tests - substring at the end - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"ice" in .name') - assert len(result) == 1, "Expected 1 result for 'ice' at end of 'Alice'" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" - - # Off-by-one tests - exact match (entire string) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"Alice" in .name') - assert len(result) == 1, "Expected 1 result for exact match 'Alice' in 'Alice'" - assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" - - # Off-by-one tests - single character - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"A" in .name') - assert len(result) == 1, "Expected 1 result for single char 'A' in 'Alice'" - - # Off-by-one tests - empty string (should match all strings) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"" in .name') - assert len(result) == 3, "Expected 3 results for empty string (matches all strings)" - - # Off-by-one tests - non-empty strings are never substrings of "" - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.name in ""') - assert len(result) == 0, "Expected 0 results for empty string on the right of IN operator" - - # Off-by-one tests - empty string match empty string. - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '"" in .name && "" in ""') - assert len(result) == 3, "Expected empty string matching empty string" - - # Arithmetic operations - addition - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age + 10 > 40') - assert len(result) == 1, "Expected 1 result for age + 10 > 40" - - # Arithmetic operations - multiplication - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age * 2 > 60') - assert len(result) == 1, "Expected 1 result for age * 2 > 60" - - # Arithmetic operations - division - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age / 5 == 5') - assert len(result) == 1, "Expected 1 result for age / 5 == 5" - - # Arithmetic operations - modulo - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age % 2 == 0') - assert len(result) == 1, "Expected 1 result for age % 2 == 0" - - # Power operator - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age ** 2 > 900') - assert len(result) == 1, "Expected 1 result for age^2 > 900" - - # Missing attribute (should exclude items missing that attribute) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.missing_field == "value"') - assert len(result) == 0, "Expected 0 results for missing_field == value" - - # No attribute set at all - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.any_field') - assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded" - - # Malformed JSON - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.any_field') - assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded" - - # Complex expression combining multiple operators - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")') - assert len(result) == 2, "Expected 2 results for the complex expression" - expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2'] - assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression" - - # Parentheses to control operator precedence - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.age > (20 + 10)') - assert len(result) == 1, "Expected 1 result for age > (20 + 10)" - - # Array access (arrays evaluate to true) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, - *[str(x) for x in vec1], - 'FILTER', '.scores') - assert len(result) == 3, "Expected 3 results for .scores (arrays evaluate to true)" diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_int.py b/examples/redis-unstable/modules/vector-sets/tests/filter_int.py deleted file mode 100644 index 0fd1dc1..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/filter_int.py +++ /dev/null @@ -1,668 +0,0 @@ -from test import TestCase, generate_random_vector -import struct -import random -import math -import json -import time - -class VSIMFilterAdvanced(TestCase): - def getname(self): - return "VSIM FILTER comprehensive functionality testing" - - def estimated_runtime(self): - return 15 # This test might take up to 15 seconds for the large dataset - - def setup(self): - super().setup() - self.dim = 32 # Vector dimension - self.count = 5000 # Number of vectors for large tests - self.small_count = 50 # Number of vectors for small/quick tests - - # Categories for attributes - self.categories = ["electronics", "furniture", "clothing", "books", "food"] - self.cities = ["New York", "London", "Tokyo", "Paris", "Berlin", "Sydney", "Toronto", "Singapore"] - self.price_ranges = [(10, 50), (50, 200), (200, 1000), (1000, 5000)] - self.years = list(range(2000, 2025)) - - def create_attributes(self, index): - """Create realistic attributes for a vector""" - category = random.choice(self.categories) - city = random.choice(self.cities) - min_price, max_price = random.choice(self.price_ranges) - price = round(random.uniform(min_price, max_price), 2) - year = random.choice(self.years) - in_stock = random.random() > 0.3 # 70% chance of being in stock - rating = round(random.uniform(1, 5), 1) - views = int(random.expovariate(1/1000)) # Exponential distribution for page views - tags = random.sample(["popular", "sale", "new", "limited", "exclusive", "clearance"], - k=random.randint(0, 3)) - - # Add some specific patterns for testing - # Every 10th item has a specific property combination for testing - is_premium = (index % 10 == 0) - - # Create attributes dictionary - attrs = { - "id": index, - "category": category, - "location": city, - "price": price, - "year": year, - "in_stock": in_stock, - "rating": rating, - "views": views, - "tags": tags - } - - if is_premium: - attrs["is_premium"] = True - attrs["special_features"] = ["premium", "warranty", "support"] - - # Add sub-categories for more complex filters - if category == "electronics": - attrs["subcategory"] = random.choice(["phones", "computers", "cameras", "audio"]) - elif category == "furniture": - attrs["subcategory"] = random.choice(["chairs", "tables", "sofas", "beds"]) - elif category == "clothing": - attrs["subcategory"] = random.choice(["shirts", "pants", "dresses", "shoes"]) - - # Add some intentionally missing fields for testing - if random.random() > 0.9: # 10% chance of missing price - del attrs["price"] - - # Some items have promotion field - if random.random() > 0.7: # 30% chance of having a promotion - attrs["promotion"] = random.choice(["discount", "bundle", "gift"]) - - # Create invalid JSON for a small percentage of vectors - if random.random() > 0.98: # 2% chance of having invalid JSON - return "{{invalid json}}" - - return json.dumps(attrs) - - def create_vectors_with_attributes(self, key, count): - """Create vectors and add attributes to them""" - vectors = [] - names = [] - attribute_map = {} # To store attributes for verification - - # Create vectors - for i in range(count): - vec = generate_random_vector(self.dim) - vectors.append(vec) - name = f"{key}:item:{i}" - names.append(name) - - # Add to Redis - vec_bytes = struct.pack(f'{self.dim}f', *vec) - self.redis.execute_command('VADD', key, 'FP32', vec_bytes, name) - - # Create and add attributes - attrs = self.create_attributes(i) - self.redis.execute_command('VSETATTR', key, name, attrs) - - # Store attributes for later verification - try: - attribute_map[name] = json.loads(attrs) if '{' in attrs else None - except json.JSONDecodeError: - attribute_map[name] = None - - return vectors, names, attribute_map - - def filter_linear_search(self, vectors, names, query_vector, filter_expr, attribute_map, k=10): - """Perform a linear search with filtering for verification""" - similarities = [] - query_norm = math.sqrt(sum(x*x for x in query_vector)) - - if query_norm == 0: - return [] - - for i, vec in enumerate(vectors): - name = names[i] - attributes = attribute_map.get(name) - - # Skip if doesn't match filter - if not self.matches_filter(attributes, filter_expr): - continue - - vec_norm = math.sqrt(sum(x*x for x in vec)) - if vec_norm == 0: - continue - - dot_product = sum(a*b for a,b in zip(query_vector, vec)) - cosine_sim = dot_product / (query_norm * vec_norm) - distance = 1.0 - cosine_sim - redis_similarity = 1.0 - (distance/2.0) - similarities.append((name, redis_similarity)) - - similarities.sort(key=lambda x: x[1], reverse=True) - return similarities[:k] - - def matches_filter(self, attributes, filter_expr): - """Filter matching for verification - uses Python eval to handle complex expressions""" - if attributes is None: - return False # No attributes or invalid JSON - - # Replace JSON path selectors with Python dictionary access - py_expr = filter_expr - - # Handle `.field` notation (replace with attributes['field']) - i = 0 - while i < len(py_expr): - if py_expr[i] == '.' and (i == 0 or not py_expr[i-1].isalnum()): - # Find the end of the selector (stops at operators or whitespace) - j = i + 1 - while j < len(py_expr) and (py_expr[j].isalnum() or py_expr[j] == '_'): - j += 1 - - if j > i + 1: # Found a valid selector - field = py_expr[i+1:j] - # Use a safe access pattern that returns a default value based on context - py_expr = py_expr[:i] + f"attributes.get('{field}')" + py_expr[j:] - i = i + len(f"attributes.get('{field}')") - else: - i += 1 - else: - i += 1 - - # Convert not operator if needed - py_expr = py_expr.replace('!', ' not ') - - try: - # Custom evaluation that handles exceptions for missing fields - # by returning False for the entire expression - - # Split the expression on logical operators - parts = [] - for op in [' and ', ' or ']: - if op in py_expr: - parts = py_expr.split(op) - break - - if not parts: # No logical operators found - parts = [py_expr] - - # Try to evaluate each part - if any part fails, - # the whole expression should fail - try: - result = eval(py_expr, {"attributes": attributes}) - return bool(result) - except (TypeError, AttributeError): - # This typically happens when trying to compare None with - # numbers or other types, or when an attribute doesn't exist - return False - except Exception as e: - print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}") - return False - - except Exception as e: - print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}") - return False - - def safe_decode(self,item): - return item.decode() if isinstance(item, bytes) else item - - def calculate_recall(self, redis_results, linear_results, k=10): - """Calculate recall (percentage of correct results retrieved)""" - redis_set = set(self.safe_decode(item) for item in redis_results) - linear_set = set(item[0] for item in linear_results[:k]) - - if not linear_set: - return 1.0 # If no linear results, consider it perfect recall - - intersection = redis_set.intersection(linear_set) - return len(intersection) / len(linear_set) - - def test_recall_with_filter(self, filter_expr, ef=500, filter_ef=None): - """Test recall for a given filter expression""" - # Create query vector - query_vec = generate_random_vector(self.dim) - - # First, get ground truth using linear scan - linear_results = self.filter_linear_search( - self.vectors, self.names, query_vec, filter_expr, self.attribute_map, k=50) - - # Calculate true selectivity from ground truth - true_selectivity = len(linear_results) / len(self.names) if self.names else 0 - - # Perform Redis search with filter - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 50, 'WITHSCORES', 'EF', ef, 'FILTER', filter_expr]) - if filter_ef: - cmd_args.extend(['FILTER-EF', filter_ef]) - - start_time = time.time() - redis_results = self.redis.execute_command(*cmd_args) - query_time = time.time() - start_time - - # Convert Redis results to dict - redis_items = {} - for i in range(0, len(redis_results), 2): - key = redis_results[i].decode() if isinstance(redis_results[i], bytes) else redis_results[i] - score = float(redis_results[i+1]) - redis_items[key] = score - - # Calculate metrics - recall = self.calculate_recall(redis_items.keys(), linear_results) - selectivity = len(redis_items) / len(self.names) if redis_items else 0 - - # Compare against the true selectivity from linear scan - assert abs(selectivity - true_selectivity) < 0.1, \ - f"Redis selectivity {selectivity:.3f} differs significantly from ground truth {true_selectivity:.3f}" - - # We expect high recall for standard parameters - if ef >= 500 and (filter_ef is None or filter_ef >= 1000): - try: - assert recall >= 0.7, \ - f"Low recall {recall:.2f} for filter '{filter_expr}'" - except AssertionError as e: - # Get items found in each set - redis_items_set = set(redis_items.keys()) - linear_items_set = set(item[0] for item in linear_results) - - # Find items in each set - only_in_redis = redis_items_set - linear_items_set - only_in_linear = linear_items_set - redis_items_set - in_both = redis_items_set & linear_items_set - - # Build comprehensive debug message - debug = f"\nGround Truth: {len(linear_results)} matching items (total vectors: {len(self.vectors)})" - debug += f"\nRedis Found: {len(redis_items)} items with FILTER-EF: {filter_ef or 'default'}" - debug += f"\nItems in both sets: {len(in_both)} (recall: {recall:.4f})" - debug += f"\nItems only in Redis: {len(only_in_redis)}" - debug += f"\nItems only in Ground Truth: {len(only_in_linear)}" - - # Show some example items from each set with their scores - if only_in_redis: - debug += "\n\nTOP 5 ITEMS ONLY IN REDIS:" - sorted_redis = sorted([(k, v) for k, v in redis_items.items()], key=lambda x: x[1], reverse=True) - for i, (item, score) in enumerate(sorted_redis[:5]): - if item in only_in_redis: - debug += f"\n {i+1}. {item} (Score: {score:.4f})" - - # Show attribute that should match filter - attr = self.attribute_map.get(item) - if attr: - debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}" - - if only_in_linear: - debug += "\n\nTOP 5 ITEMS ONLY IN GROUND TRUTH:" - for i, (item, score) in enumerate(linear_results[:5]): - if item in only_in_linear: - debug += f"\n {i+1}. {item} (Score: {score:.4f})" - - # Show attribute that should match filter - attr = self.attribute_map.get(item) - if attr: - debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}" - - # Help identify parsing issues - debug += "\n\nPARSING CHECK:" - debug += f"\nRedis command: VSIM {self.test_key} VALUES {self.dim} [...] FILTER '{filter_expr}'" - - # Check for WITHSCORES handling issues - if len(redis_results) > 0 and len(redis_results) % 2 == 0: - debug += f"\nRedis returned {len(redis_results)} items (looks like item,score pairs)" - debug += f"\nFirst few results: {redis_results[:4]}" - - # Check the filter implementation - debug += "\n\nFILTER IMPLEMENTATION CHECK:" - debug += f"\nFilter expression: '{filter_expr}'" - debug += "\nSample attribute matches from attribute_map:" - count_matching = 0 - for i, (name, attrs) in enumerate(self.attribute_map.items()): - if attrs and self.matches_filter(attrs, filter_expr): - count_matching += 1 - if i < 3: # Show first 3 matches - debug += f"\n - {name}: {attrs}" - debug += f"\nTotal items matching filter in attribute_map: {count_matching}" - - # Check if results array handling could be wrong - debug += "\n\nRESULT ARRAYS CHECK:" - if len(linear_results) >= 1: - debug += f"\nlinear_results[0]: {linear_results[0]}" - if isinstance(linear_results[0], tuple) and len(linear_results[0]) == 2: - debug += " (correct tuple format: (name, score))" - else: - debug += " (UNEXPECTED FORMAT!)" - - # Debug sort order - debug += "\n\nSORTING CHECK:" - if len(linear_results) >= 2: - debug += f"\nGround truth first item score: {linear_results[0][1]}" - debug += f"\nGround truth second item score: {linear_results[1][1]}" - debug += f"\nCorrectly sorted by similarity? {linear_results[0][1] >= linear_results[1][1]}" - - # Re-raise with detailed information - raise AssertionError(str(e) + debug) - - return recall, selectivity, query_time, len(redis_items) - - def test(self): - print(f"\nRunning comprehensive VSIM FILTER tests...") - - # Create a larger dataset for testing - print(f"Creating dataset with {self.count} vectors and attributes...") - self.vectors, self.names, self.attribute_map = self.create_vectors_with_attributes( - self.test_key, self.count) - - # ==== 1. Recall and Precision Testing ==== - print("Testing recall for various filters...") - - # Test basic filters with different selectivity - results = {} - results["category"] = self.test_recall_with_filter('.category == "electronics"') - results["price_high"] = self.test_recall_with_filter('.price > 1000') - results["in_stock"] = self.test_recall_with_filter('.in_stock') - results["rating"] = self.test_recall_with_filter('.rating >= 4') - results["complex1"] = self.test_recall_with_filter('.category == "electronics" and .price < 500') - - print("Filter | Recall | Selectivity | Time (ms) | Results") - print("----------------------------------------------------") - for name, (recall, selectivity, time_ms, count) in results.items(): - print(f"{name:7} | {recall:.3f} | {selectivity:.3f} | {time_ms*1000:.1f} | {count}") - - # ==== 2. Filter Selectivity Performance ==== - print("\nTesting filter selectivity performance...") - - # High selectivity (very few matches) - high_sel_recall, _, high_sel_time, _ = self.test_recall_with_filter('.is_premium') - - # Medium selectivity - med_sel_recall, _, med_sel_time, _ = self.test_recall_with_filter('.price > 100 and .price < 1000') - - # Low selectivity (many matches) - low_sel_recall, _, low_sel_time, _ = self.test_recall_with_filter('.year > 2000') - - print(f"High selectivity recall: {high_sel_recall:.3f}, time: {high_sel_time*1000:.1f}ms") - print(f"Med selectivity recall: {med_sel_recall:.3f}, time: {med_sel_time*1000:.1f}ms") - print(f"Low selectivity recall: {low_sel_recall:.3f}, time: {low_sel_time*1000:.1f}ms") - - # ==== 3. FILTER-EF Parameter Testing ==== - print("\nTesting FILTER-EF parameter...") - - # Test with different FILTER-EF values - filter_expr = '.category == "electronics" and .price > 200' - ef_values = [100, 500, 2000, 5000] - - print("FILTER-EF | Recall | Time (ms)") - print("-----------------------------") - for filter_ef in ef_values: - recall, _, query_time, _ = self.test_recall_with_filter( - filter_expr, ef=500, filter_ef=filter_ef) - print(f"{filter_ef:9} | {recall:.3f} | {query_time*1000:.1f}") - - # Assert that higher FILTER-EF generally gives better recall - low_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=100) - high_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=5000) - - # This might not always be true due to randomness, but generally holds - # We use a softer assertion to avoid flaky tests - assert high_ef_recall >= low_ef_recall * 0.8, \ - f"Higher FILTER-EF should generally give better recall: {high_ef_recall:.3f} vs {low_ef_recall:.3f}" - - # ==== 4. Complex Filter Expressions ==== - print("\nTesting complex filter expressions...") - - # Test a variety of complex expressions - complex_filters = [ - '.price > 100 and (.category == "electronics" or .category == "furniture")', - '(.rating > 4 and .in_stock) or (.price < 50 and .views > 1000)', - '.category in ["electronics", "clothing"] and .price > 200 and .rating >= 3', - '(.category == "electronics" and .subcategory == "phones") or (.category == "furniture" and .price > 1000)', - '.year > 2010 and !(.price < 100) and .in_stock' - ] - - print("Expression | Results | Time (ms)") - print("-----------------------------") - for i, expr in enumerate(complex_filters): - try: - _, _, query_time, result_count = self.test_recall_with_filter(expr) - print(f"Complex {i+1} | {result_count:7} | {query_time*1000:.1f}") - except Exception as e: - print(f"Complex {i+1} | Error: {str(e)}") - - # ==== 5. Attribute Type Testing ==== - print("\nTesting different attribute types...") - - type_filters = [ - ('.price > 500', "Numeric"), - ('.category == "books"', "String equality"), - ('.in_stock', "Boolean"), - ('.tags in ["sale", "new"]', "Array membership"), - ('.rating * 2 > 8', "Arithmetic") - ] - - for expr, type_name in type_filters: - try: - _, _, query_time, result_count = self.test_recall_with_filter(expr) - print(f"{type_name:16} | {expr:30} | {result_count:5} results | {query_time*1000:.1f}ms") - except Exception as e: - print(f"{type_name:16} | {expr:30} | Error: {str(e)}") - - # ==== 6. Filter + Count Interaction ==== - print("\nTesting COUNT parameter with filters...") - - filter_expr = '.category == "electronics"' - counts = [5, 20, 100] - - for count in counts: - query_vec = generate_random_vector(self.dim) - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', count, 'WITHSCORES', 'FILTER', filter_expr]) - - results = self.redis.execute_command(*cmd_args) - result_count = len(results) // 2 # Divide by 2 because WITHSCORES returns pairs - - # We expect result count to be at most the requested count - assert result_count <= count, f"Got {result_count} results with COUNT {count}" - print(f"COUNT {count:3} | Got {result_count:3} results") - - # ==== 7. Edge Cases ==== - print("\nTesting edge cases...") - - # Test with no matching items - no_match_expr = '.category == "nonexistent_category"' - results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim, - *[str(x) for x in generate_random_vector(self.dim)], - 'FILTER', no_match_expr) - assert len(results) == 0, f"Expected 0 results for non-matching filter, got {len(results)}" - print(f"No matching items: {len(results)} results (expected 0)") - - # Test with invalid filter syntax - try: - self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim, - *[str(x) for x in generate_random_vector(self.dim)], - 'FILTER', '.category === "books"') # Triple equals is invalid - assert False, "Expected error for invalid filter syntax" - except: - print("Invalid filter syntax correctly raised an error") - - # Test with extremely long complex expression - long_expr = ' and '.join([f'.rating > {i/10}' for i in range(10)]) - try: - results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim, - *[str(x) for x in generate_random_vector(self.dim)], - 'FILTER', long_expr) - print(f"Long expression: {len(results)} results") - except Exception as e: - print(f"Long expression error: {str(e)}") - - print("\nComprehensive VSIM FILTER tests completed successfully") - - -class VSIMFilterSelectivityTest(TestCase): - def getname(self): - return "VSIM FILTER selectivity performance benchmark" - - def estimated_runtime(self): - return 8 # This test might take up to 8 seconds - - def setup(self): - super().setup() - self.dim = 32 - self.count = 10000 - self.test_key = f"{self.test_key}:selectivity" # Use a different key - - def create_vector_with_age_attribute(self, name, age): - """Create a vector with a specific age attribute""" - vec = generate_random_vector(self.dim) - vec_bytes = struct.pack(f'{self.dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name) - self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps({"age": age})) - - def test(self): - print("\nRunning VSIM FILTER selectivity benchmark...") - - # Create a dataset where we control the exact selectivity - print(f"Creating controlled dataset with {self.count} vectors...") - - # Create vectors with age attributes from 1 to 100 - for i in range(self.count): - age = (i % 100) + 1 # Ages from 1 to 100 - name = f"{self.test_key}:item:{i}" - self.create_vector_with_age_attribute(name, age) - - # Create a query vector - query_vec = generate_random_vector(self.dim) - - # Test filters with different selectivities - selectivities = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.99] - results = [] - - print("\nSelectivity | Filter | Results | Time (ms)") - print("--------------------------------------------------") - - for target_selectivity in selectivities: - # Calculate age threshold for desired selectivity - # For example, age <= 10 gives 10% selectivity - age_threshold = int(target_selectivity * 100) - filter_expr = f'.age <= {age_threshold}' - - # Run query and measure time - start_time = time.time() - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr]) - - results = self.redis.execute_command(*cmd_args) - query_time = time.time() - start_time - - actual_selectivity = len(results) / min(100, int(target_selectivity * self.count)) - print(f"{target_selectivity:.2f} | {filter_expr:15} | {len(results):7} | {query_time*1000:.1f}") - - # Add assertion to ensure reasonable performance for different selectivities - # For very selective queries (1%), we might need more exploration - if target_selectivity <= 0.05: - # For very selective queries, ensure we can find some results - assert len(results) > 0, f"No results found for {filter_expr}" - else: - # For less selective queries, performance should be reasonable - assert query_time < 1.0, f"Query too slow: {query_time:.3f}s for {filter_expr}" - - print("\nSelectivity benchmark completed successfully") - - -class VSIMFilterComparisonTest(TestCase): - def getname(self): - return "VSIM FILTER EF parameter comparison" - - def estimated_runtime(self): - return 8 # This test might take up to 8 seconds - - def setup(self): - super().setup() - self.dim = 32 - self.count = 5000 - self.test_key = f"{self.test_key}:efparams" # Use a different key - - def create_dataset(self): - """Create a dataset with specific attribute patterns for testing FILTER-EF""" - vectors = [] - names = [] - - # Create vectors with category and quality score attributes - for i in range(self.count): - vec = generate_random_vector(self.dim) - name = f"{self.test_key}:item:{i}" - - # Add vector to Redis - vec_bytes = struct.pack(f'{self.dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name) - - # Create attributes - we want a very selective filter - # Only 2% of items have category=premium AND quality>90 - category = "premium" if random.random() < 0.1 else random.choice(["standard", "economy", "basic"]) - quality = random.randint(1, 100) - - attrs = { - "id": i, - "category": category, - "quality": quality - } - - self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs)) - vectors.append(vec) - names.append(name) - - return vectors, names - - def test(self): - print("\nRunning VSIM FILTER-EF parameter comparison...") - - # Create dataset - vectors, names = self.create_dataset() - - # Create a selective filter that matches ~2% of items - filter_expr = '.category == "premium" and .quality > 90' - - # Create query vector - query_vec = generate_random_vector(self.dim) - - # Test different FILTER-EF values - ef_values = [50, 100, 500, 1000, 5000] - results = [] - - print("\nFILTER-EF | Results | Time (ms) | Notes") - print("---------------------------------------") - - baseline_count = None - - for ef in ef_values: - # Run query and measure time - start_time = time.time() - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr, 'FILTER-EF', ef]) - - query_results = self.redis.execute_command(*cmd_args) - query_time = time.time() - start_time - - # Set baseline for comparison - if baseline_count is None: - baseline_count = len(query_results) - - recall_rate = len(query_results) / max(1, baseline_count) if baseline_count > 0 else 1.0 - - notes = "" - if ef == 5000: - notes = "Baseline" - elif recall_rate < 0.5: - notes = "Low recall!" - - print(f"{ef:9} | {len(query_results):7} | {query_time*1000:.1f} | {notes}") - results.append((ef, len(query_results), query_time)) - - # If we have enough results at highest EF, check that recall improves with higher EF - if results[-1][1] >= 5: # At least 5 results for highest EF - # Extract result counts - result_counts = [r[1] for r in results] - - # The last result (highest EF) should typically find more results than the first (lowest EF) - # but we use a soft assertion to avoid flaky tests - assert result_counts[-1] >= result_counts[0], \ - f"Higher FILTER-EF should find at least as many results: {result_counts[-1]} vs {result_counts[0]}" - - print("\nFILTER-EF parameter comparison completed successfully") diff --git a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py deleted file mode 100644 index eac5dca..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py +++ /dev/null @@ -1,56 +0,0 @@ -from test import TestCase, fill_redis_with_vectors, generate_random_vector -import random - -class LargeScale(TestCase): - def getname(self): - return "Large Scale Comparison" - - def estimated_runtime(self): - return 10 - - def test(self): - dim = 300 - count = 20000 - k = 50 - - # Fill Redis and get reference data for comparison - random.seed(42) # Make test deterministic - data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) - - # Generate query vector - query_vec = generate_random_vector(dim) - - # Get results from Redis with good exploration factor - redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in query_vec], - 'COUNT', k, 'WITHSCORES', 'EF', 500) - - # Convert Redis results to dict - redis_results = {} - for i in range(0, len(redis_raw), 2): - key = redis_raw[i].decode() - score = float(redis_raw[i+1]) - redis_results[key] = score - - # Get results from linear scan - linear_results = data.find_k_nearest(query_vec, k) - linear_items = {name: score for name, score in linear_results} - - # Compare overlap - redis_set = set(redis_results.keys()) - linear_set = set(linear_items.keys()) - overlap = len(redis_set & linear_set) - - # If test fails, print comparison for debugging - if overlap < k * 0.7: - data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k) - - assert overlap >= k * 0.7, \ - f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%" - - # Verify scores for common items - for item in redis_set & linear_set: - redis_score = redis_results[item] - linear_score = linear_items[item] - assert abs(redis_score - linear_score) < 0.01, \ - f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py b/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py deleted file mode 100644 index d0f3f09..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py +++ /dev/null @@ -1,36 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class MemoryUsageTest(TestCase): - def getname(self): - return "[regression] MEMORY USAGE with attributes" - - def test(self): - # Generate random vectors - vec1 = generate_random_vector(4) - vec2 = generate_random_vector(4) - vec_bytes1 = struct.pack('4f', *vec1) - vec_bytes2 = struct.pack('4f', *vec2) - - # Add vectors to the key, one with attribute, one without - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1') - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}') - - # Get memory usage for the key - try: - memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key) - # If we got here without exception, the command worked - assert memory_usage > 0, "MEMORY USAGE should return a positive value" - - # Add more attributes to increase complexity - self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', '{"color":"blue","size":10}') - - # Check memory usage again - new_memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key) - assert new_memory_usage > 0, "MEMORY USAGE should still return a positive value after setting attributes" - - # Memory usage should be higher after adding attributes - assert new_memory_usage > memory_usage, "Memory usage increase after adding attributes" - - except Exception as e: - raise AssertionError(f"MEMORY USAGE command failed: {str(e)}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/node_update.py b/examples/redis-unstable/modules/vector-sets/tests/node_update.py deleted file mode 100644 index 53aa2dd..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/node_update.py +++ /dev/null @@ -1,85 +0,0 @@ -from test import TestCase, generate_random_vector -import struct -import math -import random - -class VectorUpdateAndClusters(TestCase): - def getname(self): - return "VADD vector update with cluster relocation" - - def estimated_runtime(self): - return 2.0 # Should take around 2 seconds - - def generate_cluster_vector(self, base_vec, noise=0.1): - """Generate a vector that's similar to base_vec with some noise.""" - vec = [x + random.gauss(0, noise) for x in base_vec] - # Normalize - norm = math.sqrt(sum(x*x for x in vec)) - return [x/norm for x in vec] - - def test(self): - dim = 128 - vectors_per_cluster = 5000 - - # Create two very different base vectors for our clusters - cluster1_base = generate_random_vector(dim) - cluster2_base = [-x for x in cluster1_base] # Opposite direction - - # Add vectors from first cluster - for i in range(vectors_per_cluster): - vec = self.generate_cluster_vector(cluster1_base) - vec_bytes = struct.pack(f'{dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, - f'{self.test_key}:cluster1:{i}') - - # Add vectors from second cluster - for i in range(vectors_per_cluster): - vec = self.generate_cluster_vector(cluster2_base) - vec_bytes = struct.pack(f'{dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, - f'{self.test_key}:cluster2:{i}') - - # Pick a test vector from cluster1 - test_key = f'{self.test_key}:cluster1:0' - - # Verify it's in cluster1 using VSIM - initial_vec = self.generate_cluster_vector(cluster1_base) - results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in initial_vec], - 'COUNT', 100, 'WITHSCORES') - - # Count how many cluster1 items are in top results - cluster1_count = sum(1 for i in range(0, len(results), 2) - if b'cluster1' in results[i]) - assert cluster1_count > 80, "Initial clustering check failed" - - # Now update the test vector to be in cluster2 - new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05) - vec_bytes = struct.pack(f'{dim}f', *new_vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key) - - # Verify the embedding was actually updated using VEMB - emb_result = self.redis.execute_command('VEMB', self.test_key, test_key) - updated_vec = [float(x) for x in emb_result] - - # Verify updated vector matches what we inserted - dot_product = sum(a*b for a,b in zip(updated_vec, new_vec)) - similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) * - math.sqrt(sum(x*x for x in new_vec))) - assert similarity > 0.9, "Vector was not properly updated" - - # Verify it's now in cluster2 using VSIM - results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in cluster2_base], - 'COUNT', 100, 'WITHSCORES') - - # Verify our updated vector is among top results - found = False - for i in range(0, len(results), 2): - if results[i].decode() == test_key: - found = True - similarity = float(results[i+1]) - assert similarity > 0.80, f"Updated vector has low similarity: {similarity}" - break - - assert found, "Updated vector not found in cluster2 proximity" diff --git a/examples/redis-unstable/modules/vector-sets/tests/persistence.py b/examples/redis-unstable/modules/vector-sets/tests/persistence.py deleted file mode 100644 index 79730f4..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/persistence.py +++ /dev/null @@ -1,86 +0,0 @@ -from test import TestCase, fill_redis_with_vectors, generate_random_vector -import random - -class HNSWPersistence(TestCase): - def getname(self): - return "HNSW Persistence" - - def estimated_runtime(self): - return 30 - - def _verify_results(self, key, dim, query_vec, reduced_dim=None): - """Run a query and return results dict""" - k = 10 - args = ['VSIM', key] - - if reduced_dim: - args.extend(['VALUES', dim]) - args.extend([str(x) for x in query_vec]) - else: - args.extend(['VALUES', dim]) - args.extend([str(x) for x in query_vec]) - - args.extend(['COUNT', k, 'WITHSCORES']) - results = self.redis.execute_command(*args) - - results_dict = {} - for i in range(0, len(results), 2): - key = results[i].decode() - score = float(results[i+1]) - results_dict[key] = score - return results_dict - - def test(self): - # Setup dimensions - dim = 128 - reduced_dim = 32 - count = 5000 - random.seed(42) - - # Create two datasets - one normal and one with dimension reduction - normal_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:normal", count, dim) - projected_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:projected", - count, dim, reduced_dim) - - # Generate query vectors we'll use before and after reload - query_vec_normal = generate_random_vector(dim) - query_vec_projected = generate_random_vector(dim) - - # Get initial results for both sets - initial_normal = self._verify_results(f"{self.test_key}:normal", - dim, query_vec_normal) - initial_projected = self._verify_results(f"{self.test_key}:projected", - dim, query_vec_projected, reduced_dim) - - # Force Redis to save and reload the dataset - self.redis.execute_command('DEBUG', 'RELOAD') - - # Verify results after reload - reloaded_normal = self._verify_results(f"{self.test_key}:normal", - dim, query_vec_normal) - reloaded_projected = self._verify_results(f"{self.test_key}:projected", - dim, query_vec_projected, reduced_dim) - - # Verify normal vectors results - assert len(initial_normal) == len(reloaded_normal), \ - "Normal vectors: Result count mismatch before/after reload" - - for key in initial_normal: - assert key in reloaded_normal, f"Normal vectors: Missing item after reload: {key}" - assert abs(initial_normal[key] - reloaded_normal[key]) < 0.0001, \ - f"Normal vectors: Score mismatch for {key}: " + \ - f"before={initial_normal[key]:.6f}, after={reloaded_normal[key]:.6f}" - - # Verify projected vectors results - assert len(initial_projected) == len(reloaded_projected), \ - "Projected vectors: Result count mismatch before/after reload" - - for key in initial_projected: - assert key in reloaded_projected, \ - f"Projected vectors: Missing item after reload: {key}" - assert abs(initial_projected[key] - reloaded_projected[key]) < 0.0001, \ - f"Projected vectors: Score mismatch for {key}: " + \ - f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}" - - self.redis.delete(f"{self.test_key}:normal") - self.redis.delete(f"{self.test_key}:projected") diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py deleted file mode 100644 index e39164f..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/reduce.py +++ /dev/null @@ -1,71 +0,0 @@ -from test import TestCase, fill_redis_with_vectors, generate_random_vector - -class Reduce(TestCase): - def getname(self): - return "Dimension Reduction" - - def estimated_runtime(self): - return 0.2 - - def test(self): - original_dim = 100 - reduced_dim = 80 - count = 1000 - k = 50 # Number of nearest neighbors to check - - # Fill Redis with vectors using REDUCE and get reference data - data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim) - - # Verify dimension is reduced - dim = self.redis.execute_command('VDIM', self.test_key) - assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}" - - # Generate query vector and get nearest neighbors using Redis - query_vec = generate_random_vector(original_dim) - redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', - original_dim, *[str(x) for x in query_vec], - 'COUNT', k, 'WITHSCORES') - - # Convert Redis results to dict - redis_results = {} - for i in range(0, len(redis_raw), 2): - key = redis_raw[i].decode() - score = float(redis_raw[i+1]) - redis_results[key] = score - - # Get results from linear scan with original vectors - linear_results = data.find_k_nearest(query_vec, k) - linear_items = {name: score for name, score in linear_results} - - # Compare overlap between reduced and non-reduced results - redis_set = set(redis_results.keys()) - linear_set = set(linear_items.keys()) - overlap = len(redis_set & linear_set) - overlap_ratio = overlap / k - - # With random projection, we expect some loss of accuracy but should - # maintain at least some similarity structure. - # Note that gaussian distribution is the worse with this test, so - # in real world practice, things will be better. - min_expected_overlap = 0.1 # At least 10% overlap in top-k - assert overlap_ratio >= min_expected_overlap, \ - f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}" - - # For items that appear in both results, scores should be reasonably correlated - common_items = redis_set & linear_set - for item in common_items: - redis_score = redis_results[item] - linear_score = linear_items[item] - # Allow for some deviation due to dimensionality reduction - assert abs(redis_score - linear_score) < 0.2, \ - f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}" - - # If test fails, print comparison for debugging - if overlap_ratio < min_expected_overlap: - print("\nLow overlap in results. Details:") - print("\nTop results from linear scan (original vectors):") - for name, score in linear_results: - print(f"{name}: {score:.3f}") - print("\nTop results from Redis (reduced vectors):") - for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True): - print(f"{item}: {score:.3f}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/replication.py b/examples/redis-unstable/modules/vector-sets/tests/replication.py deleted file mode 100644 index 91dfdf7..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/replication.py +++ /dev/null @@ -1,92 +0,0 @@ -from test import TestCase, generate_random_vector -import struct -import random -import time - -class ComprehensiveReplicationTest(TestCase): - def getname(self): - return "Comprehensive Replication Test with mixed operations" - - def estimated_runtime(self): - # This test will take longer than the default 100ms - return 20.0 # 20 seconds estimate - - def test(self): - # Setup replication between primary and replica - assert self.setup_replication(), "Failed to setup replication" - - # Test parameters - num_vectors = 5000 - vector_dim = 8 - delete_probability = 0.1 - cas_probability = 0.3 - - # Keep track of added items for potential deletion - added_items = [] - - # Add vectors and occasionally delete - for i in range(num_vectors): - # Generate a random vector - vec = generate_random_vector(vector_dim) - vec_bytes = struct.pack(f'{vector_dim}f', *vec) - item_name = f"{self.test_key}:item:{i}" - - # Decide whether to use CAS or not - use_cas = random.random() < cas_probability - - if use_cas and added_items: - # Get an existing item for CAS reference (if available) - cas_item = random.choice(added_items) - try: - # Add with CAS - result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, - item_name, 'CAS') - # Only add to our list if actually added (CAS might fail) - if result == 1: - added_items.append(item_name) - except Exception as e: - print(f" CAS VADD failed: {e}") - else: - try: - # Add without CAS - result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, item_name) - # Only add to our list if actually added - if result == 1: - added_items.append(item_name) - except Exception as e: - print(f" VADD failed: {e}") - - # Randomly delete items (with 10% probability) - if random.random() < delete_probability and added_items: - try: - # Select a random item to delete - item_to_delete = random.choice(added_items) - # Delete the item using VREM (not VDEL) - self.redis.execute_command('VREM', self.test_key, item_to_delete) - # Remove from our list - added_items.remove(item_to_delete) - except Exception as e: - print(f" VREM failed: {e}") - - # Allow time for replication to complete - time.sleep(2.0) - - # Verify final VCARD matches - primary_card = self.redis.execute_command('VCARD', self.test_key) - replica_card = self.replica.execute_command('VCARD', self.test_key) - assert primary_card == replica_card, f"Final VCARD mismatch: primary={primary_card}, replica={replica_card}" - - # Verify VDIM matches - primary_dim = self.redis.execute_command('VDIM', self.test_key) - replica_dim = self.replica.execute_command('VDIM', self.test_key) - assert primary_dim == replica_dim, f"VDIM mismatch: primary={primary_dim}, replica={replica_dim}" - - # Verify digests match using DEBUG DIGEST - primary_digest = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) - replica_digest = self.replica.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) - assert primary_digest == replica_digest, f"Digest mismatch: primary={primary_digest}, replica={replica_digest}" - - # Print summary - print(f"\n Added and maintained {len(added_items)} vectors with dimension {vector_dim}") - print(f" Final vector count: {primary_card}") - print(f" Final digest: {primary_digest[0].decode()}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/threading_config.py b/examples/redis-unstable/modules/vector-sets/tests/threading_config.py deleted file mode 100644 index dfc931a..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/threading_config.py +++ /dev/null @@ -1,249 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - - -class ThreadingConfigTest(TestCase): - """ - Test suite for vset-force-single-threaded-execution configuration. - - This test validates the behavior of VADD and VSIM commands under different - threading configurations. The new configuration is MUTABLE and BINARY: - - false (0): Multi-threaded execution enabled (default) - - true (1): Force single-threaded execution - - Key behaviors tested: - - VADD with and without CAS option under both threading modes - - VSIM with and without NOTHREAD option under both threading modes - - Configuration reading, validation, and runtime modification - - Thread behavior switching (multi-threaded vs forced single-threaded) - """ - - def getname(self): - return "vset-force-single-threaded-execution configuration testing" - - def estimated_runtime(self): - return 0.5 # Updated for mutable config testing with mode switching - - def get_config_value(self): - """Get current vset-force-single-threaded-execution config value""" - try: - result = self.redis.execute_command('CONFIG', 'GET', 'vset-force-single-threaded-execution') - if len(result) >= 2: - # Redis returns 'yes'/'no' for boolean configs - return result[1].decode() if isinstance(result[1], bytes) else result[1] - return None - except Exception: - return None - - def set_config_value(self, value): - """Set vset-force-single-threaded-execution config value""" - try: - # Convert boolean to yes/no string - str_value = 'yes' if value else 'no' - result = self.redis.execute_command('CONFIG', 'SET', 'vset-force-single-threaded-execution', str_value) - return result == b'OK' or result == 'OK' - except Exception as e: - print(f"Failed to set config: {e}") - return False - - def test_config_access_and_mutability(self): - """Test 1: Configuration access and mutability""" - # Get initial value - initial_value = self.get_config_value() - assert initial_value is not None, "Should be able to read vset-force-single-threaded-execution config" - assert initial_value in ['yes', 'no'], f"Config value should be yes/no, got {initial_value}" - - # Test mutability by toggling the value - new_value = 'no' if initial_value == 'yes' else 'yes' - assert self.set_config_value(new_value == 'yes'), "Should be able to change config value" - - # Verify the change - current_value = self.get_config_value() - assert current_value == new_value, f"Config should be {new_value}, got {current_value}" - - # Restore original value - assert self.set_config_value(initial_value == 'yes'), "Should be able to restore original value" - - return initial_value == 'yes' - - def test_vadd_without_cas(self, force_single_threaded=False): - """Test 2: VADD command without CAS option""" - # Set threading mode - self.set_config_value(force_single_threaded) - - # Clear test data to avoid dimension conflicts - self.redis.delete(self.test_key) - - dim = 64 - vec = generate_random_vector(dim) - vec_bytes = struct.pack(f'{dim}f', *vec) - - result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') - assert result == 1, f"VADD should return 1 for new item, got {result}" - - # Verify the vector was added - card = self.redis.execute_command('VCARD', self.test_key) - assert card == 1, f"VCARD should return 1, got {card}" - - def test_vadd_with_cas(self, force_single_threaded=False): - """Test 3: VADD command with CAS option""" - # Set threading mode - self.set_config_value(force_single_threaded) - - # Clear test data to avoid dimension conflicts - self.redis.delete(self.test_key) - - dim = 64 - vec = generate_random_vector(dim) - vec_bytes = struct.pack(f'{dim}f', *vec) - - # First insertion with CAS should succeed - result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS') - assert result == 1, f"First VADD with CAS should return 1, got {result}" - - # Second insertion of same item with CAS should return 0 - result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS') - assert result == 0, f"Duplicate VADD with CAS should return 0, got {result}" - - def test_vsim_without_nothread(self, force_single_threaded=False): - """Test 4: VSIM command without NOTHREAD""" - # Set threading mode - self.set_config_value(force_single_threaded) - - # Clear test data to avoid dimension conflicts - self.redis.delete(self.test_key) - - dim = 64 - - # Add test vectors - for i in range(5): - vec = generate_random_vector(dim) - vec_bytes = struct.pack(f'{dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}') - - # Test VSIM without NOTHREAD - query_vec = generate_random_vector(dim) - args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3] - result = self.redis.execute_command(*args) - - assert isinstance(result, list), f"VSIM should return a list, got {type(result)}" - assert len(result) <= 3, f"VSIM should return at most 3 results, got {len(result)}" - - def test_vsim_with_nothread(self, force_single_threaded=False): - """Test 5: VSIM command with NOTHREAD""" - # Set threading mode - self.set_config_value(force_single_threaded) - - dim = 64 - - # Ensure we have vectors to search (use existing vectors from previous test) - card = self.redis.execute_command('VCARD', self.test_key) - if card == 0: - # Add test vectors if none exist - for i in range(5): - vec = generate_random_vector(dim) - vec_bytes = struct.pack(f'{dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}') - - # Test VSIM with NOTHREAD - query_vec = generate_random_vector(dim) - args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3, 'NOTHREAD'] - result = self.redis.execute_command(*args) - - assert isinstance(result, list), f"VSIM with NOTHREAD should return a list, got {type(result)}" - assert len(result) <= 3, f"VSIM with NOTHREAD should return at most 3 results, got {len(result)}" - - def test_threading_mode_comparison(self): - """Test 6: Compare behavior between threading modes""" - dim = 64 - - # Clear test data - self.redis.delete(self.test_key) - - # Test multi-threaded mode (default) - self.set_config_value(False) # Multi-threaded - self.test_vadd_without_cas(False) - self.test_vadd_with_cas(False) - multi_threaded_card = self.redis.execute_command('VCARD', self.test_key) - - # Clear and test single-threaded mode - self.redis.delete(self.test_key) - self.set_config_value(True) # Single-threaded - self.test_vadd_without_cas(True) - self.test_vadd_with_cas(True) - single_threaded_card = self.redis.execute_command('VCARD', self.test_key) - - # Both modes should produce same results - assert multi_threaded_card == single_threaded_card, \ - f"Both modes should produce same results: multi={multi_threaded_card}, single={single_threaded_card}" - - def test_nothread_override_behavior(self): - """Test 7: NOTHREAD option should work regardless of config""" - dim = 64 - - # Test with both config modes - for force_single in [False, True]: - self.set_config_value(force_single) - self.redis.delete(self.test_key) - - # Add test vectors - for i in range(3): - vec = generate_random_vector(dim) - vec_bytes = struct.pack(f'{dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}') - - # NOTHREAD should work regardless of config - query_vec = generate_random_vector(dim) - args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 2, 'NOTHREAD'] - result = self.redis.execute_command(*args) - - assert isinstance(result, list), f"NOTHREAD should work with force_single={force_single}" - assert len(result) <= 2, f"NOTHREAD should return ≤2 results with force_single={force_single}" - - def test(self): - """Main test method - runs all threading configuration tests""" - # Get initial configuration - initial_force_single = self.test_config_access_and_mutability() - print(f"Initial vset-force-single-threaded-execution: {'yes' if initial_force_single else 'no'}") - - # Clear test data - self.redis.delete(self.test_key) - - # Test both threading modes - print("Testing multi-threaded mode...") - self.set_config_value(False) - self.test_vadd_without_cas(False) - self.test_vadd_with_cas(False) - self.test_vsim_without_nothread(False) - self.test_vsim_with_nothread(False) - - print("Testing single-threaded mode...") - self.set_config_value(True) - self.test_vadd_without_cas(True) - self.test_vadd_with_cas(True) - self.test_vsim_without_nothread(True) - self.test_vsim_with_nothread(True) - - # Test mode comparison and NOTHREAD override - self.test_threading_mode_comparison() - self.test_nothread_override_behavior() - - # Restore initial configuration - self.set_config_value(initial_force_single) - - # Print summary - self._print_test_summary(initial_force_single) - - def _print_test_summary(self, initial_force_single): - """Print a summary of what was tested""" - print(f"\nThreading Configuration Test Summary:") - print(f" Configuration: vset-force-single-threaded-execution") - print(f" Type: Boolean, Mutable") - print(f" Initial value: {'yes' if initial_force_single else 'no'}") - print(f" Tested modes: Both multi-threaded (no) and single-threaded (yes)") - print(f" VADD: Works correctly in both modes") - print(f" VADD with CAS: Works correctly in both modes") - print(f" VSIM: Works correctly in both modes") - print(f" NOTHREAD option: Overrides config in both modes") - print(f" Configuration mutability: ✅ Successfully changed at runtime") - print(f" All tests passed successfully!") diff --git a/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py b/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py deleted file mode 100644 index 3cb3508..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py +++ /dev/null @@ -1,98 +0,0 @@ -from test import TestCase, generate_random_vector -import threading -import struct -import math -import time -import random -from typing import List, Dict - -class ConcurrentCASTest(TestCase): - def getname(self): - return "Concurrent VADD with CAS" - - def estimated_runtime(self): - return 1.5 - - def worker(self, vectors: List[List[float]], start_idx: int, end_idx: int, - dim: int, results: Dict[str, bool]): - """Worker thread that adds a subset of vectors using VADD CAS""" - for i in range(start_idx, end_idx): - vec = vectors[i] - name = f"{self.test_key}:item:{i}" - vec_bytes = struct.pack(f'{dim}f', *vec) - - # Try to add the vector with CAS - try: - result = self.redis.execute_command('VADD', self.test_key, 'FP32', - vec_bytes, name, 'CAS') - results[name] = (result == 1) # Store if it was actually added - except Exception as e: - results[name] = False - print(f"Error adding {name}: {e}") - - def verify_vector_similarity(self, vec1: List[float], vec2: List[float]) -> float: - """Calculate cosine similarity between two vectors""" - dot_product = sum(a*b for a,b in zip(vec1, vec2)) - norm1 = math.sqrt(sum(x*x for x in vec1)) - norm2 = math.sqrt(sum(x*x for x in vec2)) - return dot_product / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else 0 - - def test(self): - # Test parameters - dim = 128 - total_vectors = 5000 - num_threads = 8 - vectors_per_thread = total_vectors // num_threads - - # Generate all vectors upfront - random.seed(42) # For reproducibility - vectors = [generate_random_vector(dim) for _ in range(total_vectors)] - - # Prepare threads and results dictionary - threads = [] - results = {} # Will store success/failure for each vector - - # Launch threads - for i in range(num_threads): - start_idx = i * vectors_per_thread - end_idx = start_idx + vectors_per_thread if i < num_threads-1 else total_vectors - thread = threading.Thread(target=self.worker, - args=(vectors, start_idx, end_idx, dim, results)) - threads.append(thread) - thread.start() - - # Wait for all threads to complete - for thread in threads: - thread.join() - - # Verify cardinality - card = self.redis.execute_command('VCARD', self.test_key) - assert card == total_vectors, \ - f"Expected {total_vectors} elements, but found {card}" - - # Verify each vector - num_verified = 0 - for i in range(total_vectors): - name = f"{self.test_key}:item:{i}" - - # Verify the item was successfully added - assert results[name], f"Vector {name} was not successfully added" - - # Get the stored vector - stored_vec_raw = self.redis.execute_command('VEMB', self.test_key, name) - stored_vec = [float(x) for x in stored_vec_raw] - - # Verify vector dimensions - assert len(stored_vec) == dim, \ - f"Stored vector dimension mismatch for {name}: {len(stored_vec)} != {dim}" - - # Calculate similarity with original vector - similarity = self.verify_vector_similarity(vectors[i], stored_vec) - assert similarity > 0.99, \ - f"Low similarity ({similarity}) for {name}" - - num_verified += 1 - - # Final verification - assert num_verified == total_vectors, \ - f"Only verified {num_verified} out of {total_vectors} vectors" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vemb.py b/examples/redis-unstable/modules/vector-sets/tests/vemb.py deleted file mode 100644 index 0f4cf77..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/vemb.py +++ /dev/null @@ -1,41 +0,0 @@ -from test import TestCase -import struct -import math - -class VEMB(TestCase): - def getname(self): - return "VEMB Command" - - def test(self): - dim = 4 - - # Add same vector in both formats - vec = [1, 0, 0, 0] - norm = math.sqrt(sum(x*x for x in vec)) - vec = [x/norm for x in vec] # Normalize the vector - - # Add using FP32 - vec_bytes = struct.pack(f'{dim}f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') - - # Add using VALUES - self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, - *[str(x) for x in vec], f'{self.test_key}:item:2') - - # Get both back with VEMB - result1 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:1') - result2 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:2') - - retrieved_vec1 = [float(x) for x in result1] - retrieved_vec2 = [float(x) for x in result2] - - # Compare both vectors with original (allow for small quantization errors) - for i in range(dim): - assert abs(vec[i] - retrieved_vec1[i]) < 0.01, \ - f"FP32 vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec1[i]}" - assert abs(vec[i] - retrieved_vec2[i]) < 0.01, \ - f"VALUES vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec2[i]}" - - # Test non-existent item - result = self.redis.execute_command('VEMB', self.test_key, 'nonexistent') - assert result is None, "Non-existent item should return nil" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vismember.py b/examples/redis-unstable/modules/vector-sets/tests/vismember.py deleted file mode 100644 index eabebca..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/vismember.py +++ /dev/null @@ -1,47 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class BasicVISMEMBER(TestCase): - def getname(self): - return "VISMEMBER basic functionality" - - def test(self): - # Add multiple vectors to the vector set - vec1 = generate_random_vector(4) - vec2 = generate_random_vector(4) - vec_bytes1 = struct.pack('4f', *vec1) - vec_bytes2 = struct.pack('4f', *vec2) - - # Create item keys - item1 = f'{self.test_key}:item:1' - item2 = f'{self.test_key}:item:2' - nonexistent_item = f'{self.test_key}:item:nonexistent' - - # Add the vectors - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, item1) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, item2) - - # Test VISMEMBER with existing elements - result1 = self.redis.execute_command('VISMEMBER', self.test_key, item1) - assert result1 == 1, f"VISMEMBER should return 1 for existing item, got {result1}" - - result2 = self.redis.execute_command('VISMEMBER', self.test_key, item2) - assert result2 == 1, f"VISMEMBER should return 1 for existing item, got {result2}" - - # Test VISMEMBER with non-existent element - result3 = self.redis.execute_command('VISMEMBER', self.test_key, nonexistent_item) - assert result3 == 0, f"VISMEMBER should return 0 for non-existent item, got {result3}" - - # Test VISMEMBER with non-existent key - nonexistent_key = f'{self.test_key}_nonexistent' - result4 = self.redis.execute_command('VISMEMBER', nonexistent_key, item1) - assert result4 == 0, f"VISMEMBER should return 0 for non-existent key, got {result4}" - - # Test VISMEMBER after removing an element - self.redis.execute_command('VREM', self.test_key, item1) - result5 = self.redis.execute_command('VISMEMBER', self.test_key, item1) - assert result5 == 0, f"VISMEMBER should return 0 after element removal, got {result5}" - - # Verify item2 still exists - result6 = self.redis.execute_command('VISMEMBER', self.test_key, item2) - assert result6 == 1, f"VISMEMBER should still return 1 for remaining item, got {result6}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py b/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py deleted file mode 100644 index 99d2e9a..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py +++ /dev/null @@ -1,35 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class VRANDMEMBERPingPongRegressionTest(TestCase): - def getname(self): - return "[regression] VRANDMEMBER ping-pong" - - def test(self): - """ - This test ensures that when only two vectors exist, VRANDMEMBER - does not get stuck returning only one of them due to the "ping-pong" issue. - """ - self.redis.delete(self.test_key) # Clean up before test - dim = 4 - - # Add exactly two vectors - vec1_name = "vec1" - vec1_data = generate_random_vector(dim) - self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec1_data, vec1_name) - - vec2_name = "vec2" - vec2_data = generate_random_vector(dim) - self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec2_data, vec2_name) - - # Call VRANDMEMBER many times and check for distribution - iterations = 100 - results = [] - for _ in range(iterations): - member = self.redis.execute_command('VRANDMEMBER', self.test_key) - results.append(member.decode()) - - # Verify that both members were returned, proving it's not stuck - unique_results = set(results) - - assert len(unique_results) == 2, f"Ping-pong test failed: should have returned 2 unique members, but got {len(unique_results)}." diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py b/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py deleted file mode 100644 index ca9e006..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py +++ /dev/null @@ -1,55 +0,0 @@ -from test import TestCase, generate_random_vector, fill_redis_with_vectors -import struct - -class VRANDMEMBERTest(TestCase): - def getname(self): - return "VRANDMEMBER basic functionality" - - def test(self): - # Test with empty key - result = self.redis.execute_command('VRANDMEMBER', self.test_key) - assert result is None, "VRANDMEMBER on non-existent key should return NULL" - - result = self.redis.execute_command('VRANDMEMBER', self.test_key, 5) - assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count on non-existent key should return empty array" - - # Fill with vectors - dim = 4 - count = 100 - data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) - - # Test single random member - result = self.redis.execute_command('VRANDMEMBER', self.test_key) - assert result is not None, "VRANDMEMBER should return a random member" - assert result.decode() in data.names, "Random member should be in the set" - - # Test multiple unique members (positive count) - positive_count = 10 - result = self.redis.execute_command('VRANDMEMBER', self.test_key, positive_count) - assert isinstance(result, list), "VRANDMEMBER with positive count should return an array" - assert len(result) == positive_count, f"Should return {positive_count} members" - - # Check for uniqueness - decoded_results = [r.decode() for r in result] - assert len(decoded_results) == len(set(decoded_results)), "Results should be unique with positive count" - for item in decoded_results: - assert item in data.names, "All returned items should be in the set" - - # Test more members than in the set - result = self.redis.execute_command('VRANDMEMBER', self.test_key, count + 10) - assert len(result) == count, "Should return only the available members when asking for more than exist" - - # Test with duplicates (negative count) - negative_count = -20 - result = self.redis.execute_command('VRANDMEMBER', self.test_key, negative_count) - assert isinstance(result, list), "VRANDMEMBER with negative count should return an array" - assert len(result) == abs(negative_count), f"Should return {abs(negative_count)} members" - - # Check that all returned elements are valid - decoded_results = [r.decode() for r in result] - for item in decoded_results: - assert item in data.names, "All returned items should be in the set" - - # Test with count = 0 (edge case) - result = self.redis.execute_command('VRANDMEMBER', self.test_key, 0) - assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count=0 should return empty array" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrange.py b/examples/redis-unstable/modules/vector-sets/tests/vrange.py deleted file mode 100644 index 7e57588..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/vrange.py +++ /dev/null @@ -1,113 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class BasicVRANGE(TestCase): - def getname(self): - return "VRANGE basic functionality and iteration" - - def test(self): - # Add multiple elements with different names for lexicographical ordering - elements = [ - "apple", "apricot", "banana", "cherry", "date", - "elderberry", "fig", "grape", "honeydew", "kiwi", - "lemon", "mango", "nectarine", "orange", "papaya", - "quince", "raspberry", "strawberry", "tangerine", "watermelon" - ] - - # Add all elements to the vector set - for elem in elements: - vec = generate_random_vector(4) - vec_bytes = struct.pack('4f', *vec) - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, elem) - - # Test 1: Basic range with inclusive boundaries - result = self.redis.execute_command('VRANGE', self.test_key, '[apple', '[grape', '5') - result = [r.decode() for r in result] - assert result == ['apple', 'apricot', 'banana', 'cherry', 'date'], f"Expected first 5 elements from apple, got {result}" - - # Test 2: Exclusive start boundary - result = self.redis.execute_command('VRANGE', self.test_key, '(apple', '[cherry', '10') - result = [r.decode() for r in result] - assert result == ['apricot', 'banana', 'cherry'], f"Expected elements after apple up to cherry inclusive, got {result}" - - # Test 3: Exclusive end boundary - result = self.redis.execute_command('VRANGE', self.test_key, '[banana', '(cherry', '10') - result = [r.decode() for r in result] - assert result == ['banana'], f"Expected only banana (cherry excluded), got {result}" - - # Test 4: Using '-' for minimum element - result = self.redis.execute_command('VRANGE', self.test_key, '-', '[banana', '10') - result = [r.decode() for r in result] - assert result[0] == 'apple', "Should start from the first element" - assert result[-1] == 'banana', "Should end at banana" - - # Test 5: Using '+' for maximum element - result = self.redis.execute_command('VRANGE', self.test_key, '[raspberry', '+', '10') - result = [r.decode() for r in result] - assert 'raspberry' in result and 'strawberry' in result and 'tangerine' in result and 'watermelon' in result, "Should include all elements from raspberry onwards" - - # Test 6: Full range with '-' and '+' - result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '100') - result = [r.decode() for r in result] - assert len(result) == len(elements), f"Should return all {len(elements)} elements" - assert result == sorted(elements), "Elements should be in lexicographical order" - - # Test 7: Iterator pattern - verify each element appears exactly once - seen = set() - batch_size = 3 - current = '-' - - while True: - if current == '-': - # First iteration - result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', str(batch_size)) - else: - # Subsequent iterations - exclusive start from last element - result = self.redis.execute_command('VRANGE', self.test_key, f'({current}', '+', str(batch_size)) - - result = [r.decode() for r in result] - - if not result: - break - - # Check no duplicates in this batch - for elem in result: - assert elem not in seen, f"Element {elem} appeared more than once" - seen.add(elem) - - # Update current to last element - current = result[-1] - - # Break if we got less than requested (end of set) - if len(result) < batch_size: - break - - # Verify we saw all elements exactly once - assert seen == set(elements), f"Iterator should visit all elements exactly once. Missing: {set(elements) - seen}, Extra: {seen - set(elements)}" - - # Test 8: Count of 0 returns empty array - result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '0') - assert result == [], f"Count of 0 should return empty array, got {result}" - - # Test 9: Range with no matching elements - result = self.redis.execute_command('VRANGE', self.test_key, '[zebra', '+', '10') - assert result == [], f"Range beyond all elements should return empty array, got {result}" - - # Test 10: Non-existent key - result = self.redis.execute_command('VRANGE', 'nonexistent_key', '-', '+', '10') - assert result == [], f"Non-existent key should return empty array, got {result}" - - # Test 11: Partial word boundaries - result = self.redis.execute_command('VRANGE', self.test_key, '[app', '[apr', '10') - result = [r.decode() for r in result] - assert 'apple' in result, "Should include 'apple' which starts with 'app'" - assert 'apricot' not in result, "Should not include 'apricot' as it's >= 'apr'" - - # Test 12: Single element range - result = self.redis.execute_command('VRANGE', self.test_key, '[cherry', '[cherry', '10') - result = [r.decode() for r in result] - assert result == ['cherry'], f"Inclusive single element range should return that element, got {result}" - - # Test 13: Empty range (start > end) - result = self.redis.execute_command('VRANGE', self.test_key, '[grape', '[apple', '10') - assert result == [], f"Range where start > end should return empty array, got {result}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py b/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py deleted file mode 100644 index 25b9689..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py +++ /dev/null @@ -1,32 +0,0 @@ -from test import TestCase, generate_random_vector -import struct - -class VSIMLimitEFSearch(TestCase): - def getname(self): - return "VSIM Limit EF Search" - - def estimated_runtime(self): - return 0.2 - - def test(self): - dim = 32 - vec = generate_random_vector(dim) - vec_bytes = struct.pack(f'{dim}f', *vec) - - # Add test vector - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') - - query_vec = generate_random_vector(dim) - - # Test EF upper bound (should accept 1000000) - result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in query_vec], 'EF', 1000000) - assert isinstance(result, list), "EF=1000000 should be accepted" - - # Test EF over limit (should reject > 1000000) - try: - self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, - *[str(x) for x in query_vec], 'EF', 1000001) - assert False, "EF=1000001 should be rejected" - except Exception as e: - assert "invalid EF" in str(e), f"Expected EF validation error, got: {e}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/with.py b/examples/redis-unstable/modules/vector-sets/tests/with.py deleted file mode 100644 index d14a23f..0000000 --- a/examples/redis-unstable/modules/vector-sets/tests/with.py +++ /dev/null @@ -1,214 +0,0 @@ -from test import TestCase, generate_random_vector -import struct -import json -import random - -class VSIMWithAttribs(TestCase): - def getname(self): - return "VSIM WITHATTRIBS/WITHSCORES functionality testing" - - def setup(self): - super().setup() - self.dim = 8 - self.count = 20 - - # Create vectors with attributes - for i in range(self.count): - vec = generate_random_vector(self.dim) - vec_bytes = struct.pack(f'{self.dim}f', *vec) - - # Item name - name = f"{self.test_key}:item:{i}" - - # Add to Redis - self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name) - - # Create and add attribute - if i % 5 == 0: - # Every 5th item has no attribute (for testing NULL responses) - continue - - category = random.choice(["electronics", "furniture", "clothing"]) - price = random.randint(50, 1000) - attrs = {"category": category, "price": price, "id": i} - - self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs)) - - def is_numeric(self, value): - """Check if a value can be converted to float""" - try: - if isinstance(value, (int, float)): - return True - if isinstance(value, bytes): - float(value.decode('utf-8')) - return True - if isinstance(value, str): - float(value) - return True - return False - except (ValueError, TypeError): - return False - - def test(self): - # Create query vector - query_vec = generate_random_vector(self.dim) - - # Test 1: VSIM with no additional options (should be same for RESP2 and RESP3) - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 5]) - - results_resp2 = self.redis.execute_command(*cmd_args) - results_resp3 = self.redis3.execute_command(*cmd_args) - - # Both should return simple arrays of item names - assert len(results_resp2) == 5, f"RESP2: Expected 5 results, got {len(results_resp2)}" - assert len(results_resp3) == 5, f"RESP3: Expected 5 results, got {len(results_resp3)}" - assert all(isinstance(item, bytes) for item in results_resp2), "RESP2: Results should be byte strings" - assert all(isinstance(item, bytes) for item in results_resp3), "RESP3: Results should be byte strings" - - # Test 2: VSIM with WITHSCORES only - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 5, 'WITHSCORES']) - - results_resp2 = self.redis.execute_command(*cmd_args) - results_resp3 = self.redis3.execute_command(*cmd_args) - - # RESP2: Should be a flat array alternating item, score - assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}" - for i in range(0, len(results_resp2), 2): - assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes" - assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric" - score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1] - assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1" - - # RESP3: Should be a dict/map with items as keys and scores as DIRECT values (not arrays) - assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}" - assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}" - for item, score in results_resp3.items(): - assert isinstance(item, bytes), f"RESP3: Key should be bytes" - # Score should be a direct value, NOT an array - assert not isinstance(score, list), f"RESP3: With single WITH option, value should not be array" - assert self.is_numeric(score), f"RESP3: Score should be numeric, got {type(score)}" - score_val = float(score) if isinstance(score, bytes) else score - assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1" - - # Test 3: VSIM with WITHATTRIBS only - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 5, 'WITHATTRIBS']) - - results_resp2 = self.redis.execute_command(*cmd_args) - results_resp3 = self.redis3.execute_command(*cmd_args) - - # RESP2: Should be a flat array alternating item, attribute - assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}" - for i in range(0, len(results_resp2), 2): - assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes" - attr = results_resp2[i+1] - assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+1} should be None or bytes" - if attr is not None: - # Verify it's valid JSON - json.loads(attr) - - # RESP3: Should be a dict/map with items as keys and attributes as DIRECT values (not arrays) - assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}" - assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}" - for item, attr in results_resp3.items(): - assert isinstance(item, bytes), f"RESP3: Key should be bytes" - # Attribute should be a direct value, NOT an array - assert not isinstance(attr, list), f"RESP3: With single WITH option, value should not be array" - assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes" - if attr is not None: - # Verify it's valid JSON - json.loads(attr) - - # Test 4: VSIM with both WITHSCORES and WITHATTRIBS - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS']) - - results_resp2 = self.redis.execute_command(*cmd_args) - results_resp3 = self.redis3.execute_command(*cmd_args) - - # RESP2: Should be a flat array with pattern: item, score, attribute - assert len(results_resp2) == 15, f"RESP2: Expected 15 elements (5 items × 3), got {len(results_resp2)}" - for i in range(0, len(results_resp2), 3): - assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes" - assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric" - score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1] - assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1" - attr = results_resp2[i+2] - assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+2} should be None or bytes" - - # RESP3: Should be a dict where each value is a 2-element array [score, attribute] - assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}" - assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}" - for item, value in results_resp3.items(): - assert isinstance(item, bytes), f"RESP3: Key should be bytes" - # With BOTH options, value MUST be an array - assert isinstance(value, list), f"RESP3: With both WITH options, value should be a list, got {type(value)}" - assert len(value) == 2, f"RESP3: Value should have 2 elements [score, attr], got {len(value)}" - - score, attr = value - assert self.is_numeric(score), f"RESP3: Score should be numeric" - score_val = float(score) if isinstance(score, bytes) else score - assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1" - assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes" - - # Test 5: Verify consistency - same items returned in same order - cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args.extend([str(x) for x in query_vec]) - cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS']) - - results_resp2 = self.redis.execute_command(*cmd_args) - results_resp3 = self.redis3.execute_command(*cmd_args) - - # Extract items from RESP2 (every 3rd element starting from 0) - items_resp2 = [results_resp2[i] for i in range(0, len(results_resp2), 3)] - - # Extract items from RESP3 (keys of the dict) - items_resp3 = list(results_resp3.keys()) - - # Verify same items returned - assert set(items_resp2) == set(items_resp3), "RESP2 and RESP3 should return the same items" - - # Build a mapping from items to scores and attributes for comparison - data_resp2 = {} - for i in range(0, len(results_resp2), 3): - item = results_resp2[i] - score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1] - attr = results_resp2[i+2] - data_resp2[item] = (score, attr) - - data_resp3 = {} - for item, value in results_resp3.items(): - score = float(value[0]) if isinstance(value[0], bytes) else value[0] - attr = value[1] - data_resp3[item] = (score, attr) - - # Verify scores and attributes match for each item - for item in data_resp2: - score_resp2, attr_resp2 = data_resp2[item] - score_resp3, attr_resp3 = data_resp3[item] - - assert abs(score_resp2 - score_resp3) < 0.0001, \ - f"Scores for {item} don't match: RESP2={score_resp2}, RESP3={score_resp3}" - assert attr_resp2 == attr_resp3, \ - f"Attributes for {item} don't match: RESP2={attr_resp2}, RESP3={attr_resp3}" - - # Test 6: Test ordering of WITHSCORES and WITHATTRIBS doesn't matter - cmd_args1 = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args1.extend([str(x) for x in query_vec]) - cmd_args1.extend(['COUNT', 3, 'WITHSCORES', 'WITHATTRIBS']) - - cmd_args2 = ['VSIM', self.test_key, 'VALUES', self.dim] - cmd_args2.extend([str(x) for x in query_vec]) - cmd_args2.extend(['COUNT', 3, 'WITHATTRIBS', 'WITHSCORES']) # Reversed order - - results1_resp3 = self.redis3.execute_command(*cmd_args1) - results2_resp3 = self.redis3.execute_command(*cmd_args2) - - # Both should return the same structure - assert results1_resp3 == results2_resp3, "Order of WITH options shouldn't matter" |
