diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:40:55 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:40:55 +0100 |
| commit | 5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch) | |
| tree | 1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/modules/vector-sets/tests | |
| parent | c7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff) | |
| download | crep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz | |
Add Redis source code for testing
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests')
26 files changed, 2863 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py b/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py new file mode 100644 index 0000000..8481a36 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py @@ -0,0 +1,21 @@ +from test import TestCase, generate_random_vector +import struct + +class BasicCommands(TestCase): + def getname(self): + return "VADD, VDIM, VCARD basic usage" + + def test(self): + # Test VADD + vec = generate_random_vector(4) + vec_bytes = struct.pack('4f', *vec) + result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') + assert result == 1, "VADD should return 1 for first item" + + # Test VDIM + dim = self.redis.execute_command('VDIM', self.test_key) + assert dim == 4, f"VDIM should return 4, got {dim}" + + # Test VCARD + card = self.redis.execute_command('VCARD', self.test_key) + assert card == 1, f"VCARD should return 1, got {card}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py b/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py new file mode 100644 index 0000000..11c3c9b --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py @@ -0,0 +1,35 @@ +from test import TestCase + +class BasicSimilarity(TestCase): + def getname(self): + return "VSIM reported distance makes sense with 4D vectors" + + def test(self): + # Add two very similar vectors, one different + vec1 = [1, 0, 0, 0] + vec2 = [0.99, 0.01, 0, 0] + vec3 = [0.1, 1, -1, 0.5] + + # Add vectors using VALUES format + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], f'{self.test_key}:item:1') + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec2], f'{self.test_key}:item:2') + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec3], f'{self.test_key}:item:3') + + # Query similarity with vec1 + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], 'WITHSCORES') + + # Convert results to dictionary + results_dict = {} + for i in range(0, len(result), 2): + key = result[i].decode() + score = float(result[i+1]) + results_dict[key] = score + + # Verify results + assert results_dict[f'{self.test_key}:item:1'] > 0.99, "Self-similarity should be very high" + assert results_dict[f'{self.test_key}:item:2'] > 0.99, "Similar vector should have high similarity" + assert results_dict[f'{self.test_key}:item:3'] < 0.8, "Not very similar vector should have low similarity" diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py new file mode 100644 index 0000000..f4b3a12 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py @@ -0,0 +1,156 @@ +from test import TestCase, generate_random_vector +import threading +import time +import struct + +class ThreadingStressTest(TestCase): + def getname(self): + return "Concurrent VADD/DEL/VSIM operations stress test" + + def estimated_runtime(self): + return 10 # Test runs for 10 seconds + + def test(self): + # Constants - easy to modify if needed + NUM_VADD_THREADS = 10 + NUM_VSIM_THREADS = 1 + NUM_DEL_THREADS = 1 + TEST_DURATION = 10 # seconds + VECTOR_DIM = 100 + DEL_INTERVAL = 1 # seconds + + # Shared flags and state + stop_event = threading.Event() + error_list = [] + error_lock = threading.Lock() + + def log_error(thread_name, error): + with error_lock: + error_list.append(f"{thread_name}: {error}") + + def vadd_worker(thread_id): + """Thread function to perform VADD operations""" + thread_name = f"VADD-{thread_id}" + try: + vector_count = 0 + while not stop_event.is_set(): + try: + # Generate random vector + vec = generate_random_vector(VECTOR_DIM) + vec_bytes = struct.pack(f'{VECTOR_DIM}f', *vec) + + # Add vector with CAS option + self.redis.execute_command( + 'VADD', + self.test_key, + 'FP32', + vec_bytes, + f'{self.test_key}:item:{thread_id}:{vector_count}', + 'CAS' + ) + + vector_count += 1 + + # Small sleep to reduce CPU pressure + if vector_count % 10 == 0: + time.sleep(0.001) + except Exception as e: + log_error(thread_name, f"Error: {str(e)}") + time.sleep(0.1) # Slight backoff on error + except Exception as e: + log_error(thread_name, f"Thread error: {str(e)}") + + def del_worker(): + """Thread function that deletes the key periodically""" + thread_name = "DEL" + try: + del_count = 0 + while not stop_event.is_set(): + try: + # Sleep first, then delete + time.sleep(DEL_INTERVAL) + if stop_event.is_set(): + break + + self.redis.delete(self.test_key) + del_count += 1 + except Exception as e: + log_error(thread_name, f"Error: {str(e)}") + except Exception as e: + log_error(thread_name, f"Thread error: {str(e)}") + + def vsim_worker(thread_id): + """Thread function to perform VSIM operations""" + thread_name = f"VSIM-{thread_id}" + try: + search_count = 0 + while not stop_event.is_set(): + try: + # Generate query vector + query_vec = generate_random_vector(VECTOR_DIM) + query_str = [str(x) for x in query_vec] + + # Perform similarity search + args = ['VSIM', self.test_key, 'VALUES', VECTOR_DIM] + args.extend(query_str) + args.extend(['COUNT', 10]) + self.redis.execute_command(*args) + + search_count += 1 + + # Small sleep to reduce CPU pressure + if search_count % 10 == 0: + time.sleep(0.005) + except Exception as e: + # Don't log empty array errors, as they're expected when key doesn't exist + if "empty array" not in str(e).lower(): + log_error(thread_name, f"Error: {str(e)}") + time.sleep(0.1) # Slight backoff on error + except Exception as e: + log_error(thread_name, f"Thread error: {str(e)}") + + # Start all threads + threads = [] + + # VADD threads + for i in range(NUM_VADD_THREADS): + thread = threading.Thread(target=vadd_worker, args=(i,)) + thread.start() + threads.append(thread) + + # DEL threads + for _ in range(NUM_DEL_THREADS): + thread = threading.Thread(target=del_worker) + thread.start() + threads.append(thread) + + # VSIM threads + for i in range(NUM_VSIM_THREADS): + thread = threading.Thread(target=vsim_worker, args=(i,)) + thread.start() + threads.append(thread) + + # Let the test run for the specified duration + time.sleep(TEST_DURATION) + + # Signal all threads to stop + stop_event.set() + + # Wait for threads to finish + for thread in threads: + thread.join(timeout=2.0) + + # Check if Redis is still responsive + try: + ping_result = self.redis.ping() + assert ping_result, "Redis did not respond to PING after stress test" + except Exception as e: + assert False, f"Redis connection failed after stress test: {str(e)}" + + # Report any errors for diagnosis, but don't fail the test unless PING fails + if error_list: + error_count = len(error_list) + print(f"\nEncountered {error_count} errors during stress test.") + print("First 5 errors:") + for error in error_list[:5]: + print(f"- {error}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py new file mode 100644 index 0000000..9bbf011 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py @@ -0,0 +1,48 @@ +from test import TestCase, fill_redis_with_vectors, generate_random_vector +import threading, time + +class ConcurrentVSIMAndDEL(TestCase): + def getname(self): + return "Concurrent VSIM and DEL operations" + + def estimated_runtime(self): + return 2 + + def test(self): + # Fill the key with 5000 random vectors + dim = 128 + count = 5000 + fill_redis_with_vectors(self.redis, self.test_key, count, dim) + + # List to store results from threads + thread_results = [] + + def vsim_thread(): + """Thread function to perform VSIM operations until the key is deleted""" + while True: + query_vec = generate_random_vector(dim) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in query_vec], 'COUNT', 10) + if not result: + # Empty array detected, key is deleted + thread_results.append(True) + break + + # Start multiple threads to perform VSIM operations + threads = [] + for _ in range(4): # Start 4 threads + t = threading.Thread(target=vsim_thread) + t.start() + threads.append(t) + + # Delete the key while threads are still running + time.sleep(1) + self.redis.delete(self.test_key) + + # Wait for all threads to finish (they will exit once they detect the key is deleted) + for t in threads: + t.join() + + # Verify that all threads detected an empty array or error + assert len(thread_results) == len(threads), "Not all threads detected the key deletion" + assert all(thread_results), "Some threads did not detect an empty array or error after DEL" diff --git a/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py b/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py new file mode 100644 index 0000000..78f06d8 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py @@ -0,0 +1,39 @@ +from test import TestCase, generate_random_vector +import struct + +class DebugDigestTest(TestCase): + def getname(self): + return "[regression] DEBUG DIGEST-VALUE with attributes" + + def test(self): + # Generate random vectors + vec1 = generate_random_vector(4) + vec2 = generate_random_vector(4) + vec_bytes1 = struct.pack('4f', *vec1) + vec_bytes2 = struct.pack('4f', *vec2) + + # Add vectors to the key, one with attribute, one without + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1') + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}') + + # Call DEBUG DIGEST-VALUE on the key + try: + digest1 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) + assert digest1 is not None, "DEBUG DIGEST-VALUE should return a value" + + # Change attribute and verify digest changes + self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '{"color":"blue"}') + + digest2 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) + assert digest2 is not None, "DEBUG DIGEST-VALUE should return a value after attribute change" + assert digest1 != digest2, "Digest should change when an attribute is modified" + + # Remove attribute and verify digest changes again + self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '') + + digest3 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) + assert digest3 is not None, "DEBUG DIGEST-VALUE should return a value after attribute removal" + assert digest2 != digest3, "Digest should change when an attribute is removed" + + except Exception as e: + raise AssertionError(f"DEBUG DIGEST-VALUE command failed: {str(e)}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/deletion.py b/examples/redis-unstable/modules/vector-sets/tests/deletion.py new file mode 100644 index 0000000..cb91959 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/deletion.py @@ -0,0 +1,173 @@ +from test import TestCase, fill_redis_with_vectors, generate_random_vector +import random + +""" +A note about this test: +It was experimentally tried to modify hnsw.c in order to +avoid calling hnsw_reconnect_nodes(). In this case, the test +fails very often with EF set to 250, while it hardly +fails at all with the same parameters if hnsw_reconnect_nodes() +is called. + +Note that for the nature of the test (it is very strict) it can +still fail from time to time, without this signaling any +actual bug. +""" + +class VREM(TestCase): + def getname(self): + return "Deletion and graph state after deletion" + + def estimated_runtime(self): + return 2.0 + + def format_neighbors_with_scores(self, links_result, old_links=None, items_to_remove=None): + """Format neighbors with their similarity scores and status indicators""" + if not links_result: + return "No neighbors" + + output = [] + for level, neighbors in enumerate(links_result): + level_num = len(links_result) - level - 1 + output.append(f"Level {level_num}:") + + # Get neighbors and scores + neighbors_with_scores = [] + for i in range(0, len(neighbors), 2): + neighbor = neighbors[i].decode() if isinstance(neighbors[i], bytes) else neighbors[i] + score = float(neighbors[i+1]) if i+1 < len(neighbors) else None + status = "" + + # For old links, mark deleted ones + if items_to_remove and neighbor in items_to_remove: + status = " [lost]" + # For new links, mark newly added ones + elif old_links is not None: + # Check if this neighbor was in the old links at this level + was_present = False + if old_links and level < len(old_links): + old_neighbors = [n.decode() if isinstance(n, bytes) else n + for n in old_links[level]] + was_present = neighbor in old_neighbors + if not was_present: + status = " [gained]" + + if score is not None: + neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor} ({score:.6f}){status}") + else: + neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor}{status}") + + output.extend([" " + n for n in neighbors_with_scores]) + return "\n".join(output) + + def test(self): + # 1. Fill server with random elements + dim = 128 + count = 5000 + data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) + + # 2. Do VSIM to get 200 items + query_vec = generate_random_vector(dim) + results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in query_vec], + 'COUNT', 200, 'WITHSCORES') + + # Convert results to list of (item, score) pairs, sorted by score + items = [] + for i in range(0, len(results), 2): + item = results[i].decode() + score = float(results[i+1]) + items.append((item, score)) + items.sort(key=lambda x: x[1], reverse=True) # Sort by similarity + + # Store the graph structure for all items before deletion + neighbors_before = {} + for item, _ in items: + links = self.redis.execute_command('VLINKS', self.test_key, item, 'WITHSCORES') + if links: # Some items might not have links + neighbors_before[item] = links + + # 3. Remove 100 random items + items_to_remove = set(item for item, _ in random.sample(items, 100)) + # Keep track of top 10 non-removed items + top_remaining = [] + for item, score in items: + if item not in items_to_remove: + top_remaining.append((item, score)) + if len(top_remaining) == 10: + break + + # Remove the items + for item in items_to_remove: + result = self.redis.execute_command('VREM', self.test_key, item) + assert result == 1, f"VREM failed to remove {item}" + + # 4. Do VSIM again with same vector + new_results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in query_vec], + 'COUNT', 200, 'WITHSCORES', + 'EF', 500) + + # Convert new results to dict of item -> score + new_scores = {} + for i in range(0, len(new_results), 2): + item = new_results[i].decode() + score = float(new_results[i+1]) + new_scores[item] = score + + failure = False + failed_item = None + failed_reason = None + # 5. Verify all top 10 non-removed items are still found with similar scores + for item, old_score in top_remaining: + if item not in new_scores: + failure = True + failed_item = item + failed_reason = "missing" + break + new_score = new_scores[item] + if abs(new_score - old_score) >= 0.01: + failure = True + failed_item = item + failed_reason = f"score changed: {old_score:.6f} -> {new_score:.6f}" + break + + if failure: + print("\nTest failed!") + print(f"Problem with item: {failed_item} ({failed_reason})") + + print("\nOriginal neighbors (with similarity scores):") + if failed_item in neighbors_before: + print(self.format_neighbors_with_scores( + neighbors_before[failed_item], + items_to_remove=items_to_remove)) + else: + print("No neighbors found in original graph") + + print("\nCurrent neighbors (with similarity scores):") + current_links = self.redis.execute_command('VLINKS', self.test_key, + failed_item, 'WITHSCORES') + if current_links: + print(self.format_neighbors_with_scores( + current_links, + old_links=neighbors_before.get(failed_item))) + else: + print("No neighbors in current graph") + + print("\nOriginal results (top 20):") + for item, score in items[:20]: + deleted = "[deleted]" if item in items_to_remove else "" + print(f"{item}: {score:.6f} {deleted}") + + print("\nNew results after removal (top 20):") + new_items = [] + for i in range(0, len(new_results), 2): + item = new_results[i].decode() + score = float(new_results[i+1]) + new_items.append((item, score)) + new_items.sort(key=lambda x: x[1], reverse=True) + for item, score in new_items[:20]: + print(f"{item}: {score:.6f}") + + raise AssertionError(f"Test failed: Problem with item {failed_item} ({failed_reason}). *** IMPORTANT *** This test may fail from time to time without indicating that there is a bug. However normally it should pass. The fact is that it's a quite extreme test where we destroy 50% of nodes of top results and still expect perfect recall, with vectors that are very hostile because of the distribution used.") + diff --git a/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py b/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py new file mode 100644 index 0000000..f081152 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py @@ -0,0 +1,67 @@ +from test import TestCase, generate_random_vector +import struct +import redis.exceptions + +class DimensionValidation(TestCase): + def getname(self): + return "[regression] Dimension Validation with Projection" + + def estimated_runtime(self): + return 0.5 + + def test(self): + # Test scenario 1: Create a set with projection + original_dim = 100 + reduced_dim = 50 + + # Create the initial vector and set with projection + vec1 = generate_random_vector(original_dim) + vec1_bytes = struct.pack(f'{original_dim}f', *vec1) + + # Add first vector with projection + result = self.redis.execute_command('VADD', self.test_key, + 'REDUCE', reduced_dim, + 'FP32', vec1_bytes, f'{self.test_key}:item:1') + assert result == 1, "First VADD with REDUCE should return 1" + + # Check VINFO returns the correct projection information + info = self.redis.execute_command('VINFO', self.test_key) + info_map = {k.decode('utf-8'): v for k, v in zip(info[::2], info[1::2])} + assert 'vector-dim' in info_map, "VINFO should contain vector-dim" + assert info_map['vector-dim'] == reduced_dim, f"Expected reduced dimension {reduced_dim}, got {info['vector-dim']}" + assert 'projection-input-dim' in info_map, "VINFO should contain projection-input-dim" + assert info_map['projection-input-dim'] == original_dim, f"Expected original dimension {original_dim}, got {info['projection-input-dim']}" + + # Test scenario 2: Try adding a mismatched vector - should fail + wrong_dim = 80 + wrong_vec = generate_random_vector(wrong_dim) + wrong_vec_bytes = struct.pack(f'{wrong_dim}f', *wrong_vec) + + # This should fail with dimension mismatch error + try: + self.redis.execute_command('VADD', self.test_key, + 'REDUCE', reduced_dim, + 'FP32', wrong_vec_bytes, f'{self.test_key}:item:2') + assert False, "VADD with wrong dimension should fail" + except redis.exceptions.ResponseError as e: + assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error, got: {e}" + + # Test scenario 3: Add a correctly-sized vector + vec2 = generate_random_vector(original_dim) + vec2_bytes = struct.pack(f'{original_dim}f', *vec2) + + # This should succeed + result = self.redis.execute_command('VADD', self.test_key, + 'REDUCE', reduced_dim, + 'FP32', vec2_bytes, f'{self.test_key}:item:3') + assert result == 1, "VADD with correct dimensions should succeed" + + # Check VSIM also validates input dimensions + wrong_query = generate_random_vector(wrong_dim) + try: + self.redis.execute_command('VSIM', self.test_key, + 'VALUES', wrong_dim, *[str(x) for x in wrong_query], + 'COUNT', 10) + assert False, "VSIM with wrong dimension should fail" + except redis.exceptions.ResponseError as e: + assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/epsilon.py b/examples/redis-unstable/modules/vector-sets/tests/epsilon.py new file mode 100644 index 0000000..97e11c0 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/epsilon.py @@ -0,0 +1,77 @@ +from test import TestCase + +class EpsilonOption(TestCase): + def getname(self): + return "VSIM EPSILON option filtering" + + def estimated_runtime(self): + return 0.1 + + def test(self): + # Add vectors as shown in the example + # Vector 'a' at (1, 1) - normalized to (0.707, 0.707) + result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '1', '1', 'a') + assert result == 1, "VADD should return 1 for item 'a'" + + # Vector 'b' at (0, 1) - normalized to (0, 1) + result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '1', 'b') + assert result == 1, "VADD should return 1 for item 'b'" + + # Vector 'c' at (0, 0) - this will be a zero vector, might be handled specially + result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '0', 'c') + assert result == 1, "VADD should return 1 for item 'c'" + + # Vector 'd' at (0, -1) - normalized to (0, -1) + result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '-1', 'd') + assert result == 1, "VADD should return 1 for item 'd'" + + # Vector 'e' at (-1, -1) - normalized to (-0.707, -0.707) + result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '-1', '-1', 'e') + assert result == 1, "VADD should return 1 for item 'e'" + + # Test without EPSILON - should return all items + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES') + # Result is a flat list: [elem1, score1, elem2, score2, ...] + elements_all = [result[i].decode() for i in range(0, len(result), 2)] + scores_all = [float(result[i]) for i in range(1, len(result), 2)] + + assert len(elements_all) == 5, f"Should return 5 elements without EPSILON, got {len(elements_all)}" + assert elements_all[0] == 'a', "First element should be 'a' (most similar)" + assert scores_all[0] == 1.0, "Score for 'a' should be 1.0 (identical)" + + # Test with EPSILON 0.5 - should return only elements with similarity >= 0.5 (distance < 0.5) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.5') + elements_epsilon_0_5 = [result[i].decode() for i in range(0, len(result), 2)] + scores_epsilon_0_5 = [float(result[i]) for i in range(1, len(result), 2)] + + assert len(elements_epsilon_0_5) == 3, f"With EPSILON 0.5, should return 3 elements, got {len(elements_epsilon_0_5)}" + assert set(elements_epsilon_0_5) == {'a', 'b', 'c'}, f"With EPSILON 0.5, should get a, b, c, got {elements_epsilon_0_5}" + + # Verify all returned scores are >= 0.5 + for i, score in enumerate(scores_epsilon_0_5): + assert score >= 0.5, f"Element {elements_epsilon_0_5[i]} has score {score} which is < 0.5" + + # Test with EPSILON 0.2 - should return only elements with similarity >= 0.8 (distance < 0.2) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.2') + elements_epsilon_0_2 = [result[i].decode() for i in range(0, len(result), 2)] + scores_epsilon_0_2 = [float(result[i]) for i in range(1, len(result), 2)] + + assert len(elements_epsilon_0_2) == 2, f"With EPSILON 0.2, should return 2 elements, got {len(elements_epsilon_0_2)}" + assert set(elements_epsilon_0_2) == {'a', 'b'}, f"With EPSILON 0.2, should get a, b, got {elements_epsilon_0_2}" + + # Verify all returned scores are >= 0.8 (since distance < 0.2 means similarity > 0.8) + for i, score in enumerate(scores_epsilon_0_2): + assert score >= 0.8, f"Element {elements_epsilon_0_2[i]} has score {score} which is < 0.8" + + # Test with very small EPSILON - should return only the exact match + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.001') + elements_epsilon_small = [result[i].decode() for i in range(0, len(result), 2)] + + assert len(elements_epsilon_small) == 1, f"With EPSILON 0.001, should return only 1 element, got {len(elements_epsilon_small)}" + assert elements_epsilon_small[0] == 'a', "With very small EPSILON, should only get 'a'" + + # Test with EPSILON 1.0 - should return all elements (since all similarities are between 0 and 1) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '1.0') + elements_epsilon_1 = [result[i].decode() for i in range(0, len(result), 2)] + + assert len(elements_epsilon_1) == 5, f"With EPSILON 1.0, should return all 5 elements, got {len(elements_epsilon_1)}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py b/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py new file mode 100644 index 0000000..6c78c82 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py @@ -0,0 +1,27 @@ +from test import TestCase, generate_random_vector +import struct + +class VREM_LastItemDeletesKey(TestCase): + def getname(self): + return "VREM last item deletes key" + + def test(self): + # Generate a random vector + vec = generate_random_vector(4) + vec_bytes = struct.pack('4f', *vec) + + # Add the vector to the key + result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') + assert result == 1, "VADD should return 1 for first item" + + # Verify the key exists + exists = self.redis.exists(self.test_key) + assert exists == 1, "Key should exist after VADD" + + # Remove the item + result = self.redis.execute_command('VREM', self.test_key, f'{self.test_key}:item:1') + assert result == 1, "VREM should return 1 for successful removal" + + # Verify the key no longer exists + exists = self.redis.exists(self.test_key) + assert exists == 0, "Key should no longer exist after VREM of last item" diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py b/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py new file mode 100644 index 0000000..364915d --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py @@ -0,0 +1,242 @@ +from test import TestCase + +class VSIMFilterExpressions(TestCase): + def getname(self): + return "VSIM FILTER expressions basic functionality" + + def test(self): + # Create a small set of vectors with different attributes + + # Basic vectors for testing - all orthogonal for clear results + vec1 = [1, 0, 0, 0] + vec2 = [0, 1, 0, 0] + vec3 = [0, 0, 1, 0] + vec4 = [0, 0, 0, 1] + vec5 = [0.5, 0.5, 0, 0] + + # Add vectors with various attributes + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], f'{self.test_key}:item:1') + self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', + '{"age": 25, "name": "Alice", "active": true, "scores": [85, 90, 95], "city": "New York"}') + + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec2], f'{self.test_key}:item:2') + self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', + '{"age": 30, "name": "Bob", "active": false, "scores": [70, 75, 80], "city": "Boston"}') + + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec3], f'{self.test_key}:item:3') + self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:3', + '{"age": 35, "name": "Charlie", "scores": [60, 65, 70], "city": "Seattle"}') + + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec4], f'{self.test_key}:item:4') + # Item 4 has no attribute at all + + self.redis.execute_command('VADD', self.test_key, 'VALUES', 4, + *[str(x) for x in vec5], f'{self.test_key}:item:5') + self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5', + 'invalid json') # Intentionally malformed JSON + + # Basic equality with numbers + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age == 25') + assert len(result) == 1, "Expected 1 result for age == 25" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25" + + # Greater than + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age > 25') + assert len(result) == 2, "Expected 2 results for age > 25" + + # Less than or equal + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age <= 30') + assert len(result) == 2, "Expected 2 results for age <= 30" + + # String equality + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.name == "Alice"') + assert len(result) == 1, "Expected 1 result for name == Alice" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice" + + # String inequality + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.name != "Alice"') + assert len(result) == 2, "Expected 2 results for name != Alice" + + # Boolean value + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.active') + assert len(result) == 1, "Expected 1 result for .active being true" + + # Logical AND + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age > 20 and .age < 30') + assert len(result) == 1, "Expected 1 result for 20 < age < 30" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30" + + # Logical OR + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age < 30 or .age > 35') + assert len(result) == 1, "Expected 1 result for age < 30 or age > 35" + + # Logical NOT + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '!(.age == 25)') + assert len(result) == 2, "Expected 2 results for NOT(age == 25)" + + # The "in" operator with array + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age in [25, 35]') + assert len(result) == 2, "Expected 2 results for age in [25, 35]" + + # The "in" operator with strings in array + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.name in ["Alice", "David"]') + assert len(result) == 1, "Expected 1 result for name in [Alice, David]" + + # The "in" operator for substring matching + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"lic" in .name') + assert len(result) == 1, "Expected 1 result for 'lic' in name" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (Alice)" + + # The "in" operator with city substring + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"ork" in .city') + assert len(result) == 1, "Expected 1 result for 'ork' in city" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (New York)" + + # The "in" operator with no matches + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"xyz" in .name') + assert len(result) == 0, "Expected 0 results for 'xyz' in name" + + # Off-by-one tests - substring at the beginning + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"Ali" in .name') + assert len(result) == 1, "Expected 1 result for 'Ali' at beginning of 'Alice'" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" + + # Off-by-one tests - substring at the end + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"ice" in .name') + assert len(result) == 1, "Expected 1 result for 'ice' at end of 'Alice'" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" + + # Off-by-one tests - exact match (entire string) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"Alice" in .name') + assert len(result) == 1, "Expected 1 result for exact match 'Alice' in 'Alice'" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" + + # Off-by-one tests - single character + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"A" in .name') + assert len(result) == 1, "Expected 1 result for single char 'A' in 'Alice'" + + # Off-by-one tests - empty string (should match all strings) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"" in .name') + assert len(result) == 3, "Expected 3 results for empty string (matches all strings)" + + # Off-by-one tests - non-empty strings are never substrings of "" + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.name in ""') + assert len(result) == 0, "Expected 0 results for empty string on the right of IN operator" + + # Off-by-one tests - empty string match empty string. + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"" in .name && "" in ""') + assert len(result) == 3, "Expected empty string matching empty string" + + # Arithmetic operations - addition + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age + 10 > 40') + assert len(result) == 1, "Expected 1 result for age + 10 > 40" + + # Arithmetic operations - multiplication + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age * 2 > 60') + assert len(result) == 1, "Expected 1 result for age * 2 > 60" + + # Arithmetic operations - division + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age / 5 == 5') + assert len(result) == 1, "Expected 1 result for age / 5 == 5" + + # Arithmetic operations - modulo + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age % 2 == 0') + assert len(result) == 1, "Expected 1 result for age % 2 == 0" + + # Power operator + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age ** 2 > 900') + assert len(result) == 1, "Expected 1 result for age^2 > 900" + + # Missing attribute (should exclude items missing that attribute) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.missing_field == "value"') + assert len(result) == 0, "Expected 0 results for missing_field == value" + + # No attribute set at all + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.any_field') + assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded" + + # Malformed JSON + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.any_field') + assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded" + + # Complex expression combining multiple operators + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")') + assert len(result) == 2, "Expected 2 results for the complex expression" + expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2'] + assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression" + + # Parentheses to control operator precedence + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.age > (20 + 10)') + assert len(result) == 1, "Expected 1 result for age > (20 + 10)" + + # Array access (arrays evaluate to true) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.scores') + assert len(result) == 3, "Expected 3 results for .scores (arrays evaluate to true)" diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_int.py b/examples/redis-unstable/modules/vector-sets/tests/filter_int.py new file mode 100644 index 0000000..0fd1dc1 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/filter_int.py @@ -0,0 +1,668 @@ +from test import TestCase, generate_random_vector +import struct +import random +import math +import json +import time + +class VSIMFilterAdvanced(TestCase): + def getname(self): + return "VSIM FILTER comprehensive functionality testing" + + def estimated_runtime(self): + return 15 # This test might take up to 15 seconds for the large dataset + + def setup(self): + super().setup() + self.dim = 32 # Vector dimension + self.count = 5000 # Number of vectors for large tests + self.small_count = 50 # Number of vectors for small/quick tests + + # Categories for attributes + self.categories = ["electronics", "furniture", "clothing", "books", "food"] + self.cities = ["New York", "London", "Tokyo", "Paris", "Berlin", "Sydney", "Toronto", "Singapore"] + self.price_ranges = [(10, 50), (50, 200), (200, 1000), (1000, 5000)] + self.years = list(range(2000, 2025)) + + def create_attributes(self, index): + """Create realistic attributes for a vector""" + category = random.choice(self.categories) + city = random.choice(self.cities) + min_price, max_price = random.choice(self.price_ranges) + price = round(random.uniform(min_price, max_price), 2) + year = random.choice(self.years) + in_stock = random.random() > 0.3 # 70% chance of being in stock + rating = round(random.uniform(1, 5), 1) + views = int(random.expovariate(1/1000)) # Exponential distribution for page views + tags = random.sample(["popular", "sale", "new", "limited", "exclusive", "clearance"], + k=random.randint(0, 3)) + + # Add some specific patterns for testing + # Every 10th item has a specific property combination for testing + is_premium = (index % 10 == 0) + + # Create attributes dictionary + attrs = { + "id": index, + "category": category, + "location": city, + "price": price, + "year": year, + "in_stock": in_stock, + "rating": rating, + "views": views, + "tags": tags + } + + if is_premium: + attrs["is_premium"] = True + attrs["special_features"] = ["premium", "warranty", "support"] + + # Add sub-categories for more complex filters + if category == "electronics": + attrs["subcategory"] = random.choice(["phones", "computers", "cameras", "audio"]) + elif category == "furniture": + attrs["subcategory"] = random.choice(["chairs", "tables", "sofas", "beds"]) + elif category == "clothing": + attrs["subcategory"] = random.choice(["shirts", "pants", "dresses", "shoes"]) + + # Add some intentionally missing fields for testing + if random.random() > 0.9: # 10% chance of missing price + del attrs["price"] + + # Some items have promotion field + if random.random() > 0.7: # 30% chance of having a promotion + attrs["promotion"] = random.choice(["discount", "bundle", "gift"]) + + # Create invalid JSON for a small percentage of vectors + if random.random() > 0.98: # 2% chance of having invalid JSON + return "{{invalid json}}" + + return json.dumps(attrs) + + def create_vectors_with_attributes(self, key, count): + """Create vectors and add attributes to them""" + vectors = [] + names = [] + attribute_map = {} # To store attributes for verification + + # Create vectors + for i in range(count): + vec = generate_random_vector(self.dim) + vectors.append(vec) + name = f"{key}:item:{i}" + names.append(name) + + # Add to Redis + vec_bytes = struct.pack(f'{self.dim}f', *vec) + self.redis.execute_command('VADD', key, 'FP32', vec_bytes, name) + + # Create and add attributes + attrs = self.create_attributes(i) + self.redis.execute_command('VSETATTR', key, name, attrs) + + # Store attributes for later verification + try: + attribute_map[name] = json.loads(attrs) if '{' in attrs else None + except json.JSONDecodeError: + attribute_map[name] = None + + return vectors, names, attribute_map + + def filter_linear_search(self, vectors, names, query_vector, filter_expr, attribute_map, k=10): + """Perform a linear search with filtering for verification""" + similarities = [] + query_norm = math.sqrt(sum(x*x for x in query_vector)) + + if query_norm == 0: + return [] + + for i, vec in enumerate(vectors): + name = names[i] + attributes = attribute_map.get(name) + + # Skip if doesn't match filter + if not self.matches_filter(attributes, filter_expr): + continue + + vec_norm = math.sqrt(sum(x*x for x in vec)) + if vec_norm == 0: + continue + + dot_product = sum(a*b for a,b in zip(query_vector, vec)) + cosine_sim = dot_product / (query_norm * vec_norm) + distance = 1.0 - cosine_sim + redis_similarity = 1.0 - (distance/2.0) + similarities.append((name, redis_similarity)) + + similarities.sort(key=lambda x: x[1], reverse=True) + return similarities[:k] + + def matches_filter(self, attributes, filter_expr): + """Filter matching for verification - uses Python eval to handle complex expressions""" + if attributes is None: + return False # No attributes or invalid JSON + + # Replace JSON path selectors with Python dictionary access + py_expr = filter_expr + + # Handle `.field` notation (replace with attributes['field']) + i = 0 + while i < len(py_expr): + if py_expr[i] == '.' and (i == 0 or not py_expr[i-1].isalnum()): + # Find the end of the selector (stops at operators or whitespace) + j = i + 1 + while j < len(py_expr) and (py_expr[j].isalnum() or py_expr[j] == '_'): + j += 1 + + if j > i + 1: # Found a valid selector + field = py_expr[i+1:j] + # Use a safe access pattern that returns a default value based on context + py_expr = py_expr[:i] + f"attributes.get('{field}')" + py_expr[j:] + i = i + len(f"attributes.get('{field}')") + else: + i += 1 + else: + i += 1 + + # Convert not operator if needed + py_expr = py_expr.replace('!', ' not ') + + try: + # Custom evaluation that handles exceptions for missing fields + # by returning False for the entire expression + + # Split the expression on logical operators + parts = [] + for op in [' and ', ' or ']: + if op in py_expr: + parts = py_expr.split(op) + break + + if not parts: # No logical operators found + parts = [py_expr] + + # Try to evaluate each part - if any part fails, + # the whole expression should fail + try: + result = eval(py_expr, {"attributes": attributes}) + return bool(result) + except (TypeError, AttributeError): + # This typically happens when trying to compare None with + # numbers or other types, or when an attribute doesn't exist + return False + except Exception as e: + print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}") + return False + + except Exception as e: + print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}") + return False + + def safe_decode(self,item): + return item.decode() if isinstance(item, bytes) else item + + def calculate_recall(self, redis_results, linear_results, k=10): + """Calculate recall (percentage of correct results retrieved)""" + redis_set = set(self.safe_decode(item) for item in redis_results) + linear_set = set(item[0] for item in linear_results[:k]) + + if not linear_set: + return 1.0 # If no linear results, consider it perfect recall + + intersection = redis_set.intersection(linear_set) + return len(intersection) / len(linear_set) + + def test_recall_with_filter(self, filter_expr, ef=500, filter_ef=None): + """Test recall for a given filter expression""" + # Create query vector + query_vec = generate_random_vector(self.dim) + + # First, get ground truth using linear scan + linear_results = self.filter_linear_search( + self.vectors, self.names, query_vec, filter_expr, self.attribute_map, k=50) + + # Calculate true selectivity from ground truth + true_selectivity = len(linear_results) / len(self.names) if self.names else 0 + + # Perform Redis search with filter + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 50, 'WITHSCORES', 'EF', ef, 'FILTER', filter_expr]) + if filter_ef: + cmd_args.extend(['FILTER-EF', filter_ef]) + + start_time = time.time() + redis_results = self.redis.execute_command(*cmd_args) + query_time = time.time() - start_time + + # Convert Redis results to dict + redis_items = {} + for i in range(0, len(redis_results), 2): + key = redis_results[i].decode() if isinstance(redis_results[i], bytes) else redis_results[i] + score = float(redis_results[i+1]) + redis_items[key] = score + + # Calculate metrics + recall = self.calculate_recall(redis_items.keys(), linear_results) + selectivity = len(redis_items) / len(self.names) if redis_items else 0 + + # Compare against the true selectivity from linear scan + assert abs(selectivity - true_selectivity) < 0.1, \ + f"Redis selectivity {selectivity:.3f} differs significantly from ground truth {true_selectivity:.3f}" + + # We expect high recall for standard parameters + if ef >= 500 and (filter_ef is None or filter_ef >= 1000): + try: + assert recall >= 0.7, \ + f"Low recall {recall:.2f} for filter '{filter_expr}'" + except AssertionError as e: + # Get items found in each set + redis_items_set = set(redis_items.keys()) + linear_items_set = set(item[0] for item in linear_results) + + # Find items in each set + only_in_redis = redis_items_set - linear_items_set + only_in_linear = linear_items_set - redis_items_set + in_both = redis_items_set & linear_items_set + + # Build comprehensive debug message + debug = f"\nGround Truth: {len(linear_results)} matching items (total vectors: {len(self.vectors)})" + debug += f"\nRedis Found: {len(redis_items)} items with FILTER-EF: {filter_ef or 'default'}" + debug += f"\nItems in both sets: {len(in_both)} (recall: {recall:.4f})" + debug += f"\nItems only in Redis: {len(only_in_redis)}" + debug += f"\nItems only in Ground Truth: {len(only_in_linear)}" + + # Show some example items from each set with their scores + if only_in_redis: + debug += "\n\nTOP 5 ITEMS ONLY IN REDIS:" + sorted_redis = sorted([(k, v) for k, v in redis_items.items()], key=lambda x: x[1], reverse=True) + for i, (item, score) in enumerate(sorted_redis[:5]): + if item in only_in_redis: + debug += f"\n {i+1}. {item} (Score: {score:.4f})" + + # Show attribute that should match filter + attr = self.attribute_map.get(item) + if attr: + debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}" + + if only_in_linear: + debug += "\n\nTOP 5 ITEMS ONLY IN GROUND TRUTH:" + for i, (item, score) in enumerate(linear_results[:5]): + if item in only_in_linear: + debug += f"\n {i+1}. {item} (Score: {score:.4f})" + + # Show attribute that should match filter + attr = self.attribute_map.get(item) + if attr: + debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}" + + # Help identify parsing issues + debug += "\n\nPARSING CHECK:" + debug += f"\nRedis command: VSIM {self.test_key} VALUES {self.dim} [...] FILTER '{filter_expr}'" + + # Check for WITHSCORES handling issues + if len(redis_results) > 0 and len(redis_results) % 2 == 0: + debug += f"\nRedis returned {len(redis_results)} items (looks like item,score pairs)" + debug += f"\nFirst few results: {redis_results[:4]}" + + # Check the filter implementation + debug += "\n\nFILTER IMPLEMENTATION CHECK:" + debug += f"\nFilter expression: '{filter_expr}'" + debug += "\nSample attribute matches from attribute_map:" + count_matching = 0 + for i, (name, attrs) in enumerate(self.attribute_map.items()): + if attrs and self.matches_filter(attrs, filter_expr): + count_matching += 1 + if i < 3: # Show first 3 matches + debug += f"\n - {name}: {attrs}" + debug += f"\nTotal items matching filter in attribute_map: {count_matching}" + + # Check if results array handling could be wrong + debug += "\n\nRESULT ARRAYS CHECK:" + if len(linear_results) >= 1: + debug += f"\nlinear_results[0]: {linear_results[0]}" + if isinstance(linear_results[0], tuple) and len(linear_results[0]) == 2: + debug += " (correct tuple format: (name, score))" + else: + debug += " (UNEXPECTED FORMAT!)" + + # Debug sort order + debug += "\n\nSORTING CHECK:" + if len(linear_results) >= 2: + debug += f"\nGround truth first item score: {linear_results[0][1]}" + debug += f"\nGround truth second item score: {linear_results[1][1]}" + debug += f"\nCorrectly sorted by similarity? {linear_results[0][1] >= linear_results[1][1]}" + + # Re-raise with detailed information + raise AssertionError(str(e) + debug) + + return recall, selectivity, query_time, len(redis_items) + + def test(self): + print(f"\nRunning comprehensive VSIM FILTER tests...") + + # Create a larger dataset for testing + print(f"Creating dataset with {self.count} vectors and attributes...") + self.vectors, self.names, self.attribute_map = self.create_vectors_with_attributes( + self.test_key, self.count) + + # ==== 1. Recall and Precision Testing ==== + print("Testing recall for various filters...") + + # Test basic filters with different selectivity + results = {} + results["category"] = self.test_recall_with_filter('.category == "electronics"') + results["price_high"] = self.test_recall_with_filter('.price > 1000') + results["in_stock"] = self.test_recall_with_filter('.in_stock') + results["rating"] = self.test_recall_with_filter('.rating >= 4') + results["complex1"] = self.test_recall_with_filter('.category == "electronics" and .price < 500') + + print("Filter | Recall | Selectivity | Time (ms) | Results") + print("----------------------------------------------------") + for name, (recall, selectivity, time_ms, count) in results.items(): + print(f"{name:7} | {recall:.3f} | {selectivity:.3f} | {time_ms*1000:.1f} | {count}") + + # ==== 2. Filter Selectivity Performance ==== + print("\nTesting filter selectivity performance...") + + # High selectivity (very few matches) + high_sel_recall, _, high_sel_time, _ = self.test_recall_with_filter('.is_premium') + + # Medium selectivity + med_sel_recall, _, med_sel_time, _ = self.test_recall_with_filter('.price > 100 and .price < 1000') + + # Low selectivity (many matches) + low_sel_recall, _, low_sel_time, _ = self.test_recall_with_filter('.year > 2000') + + print(f"High selectivity recall: {high_sel_recall:.3f}, time: {high_sel_time*1000:.1f}ms") + print(f"Med selectivity recall: {med_sel_recall:.3f}, time: {med_sel_time*1000:.1f}ms") + print(f"Low selectivity recall: {low_sel_recall:.3f}, time: {low_sel_time*1000:.1f}ms") + + # ==== 3. FILTER-EF Parameter Testing ==== + print("\nTesting FILTER-EF parameter...") + + # Test with different FILTER-EF values + filter_expr = '.category == "electronics" and .price > 200' + ef_values = [100, 500, 2000, 5000] + + print("FILTER-EF | Recall | Time (ms)") + print("-----------------------------") + for filter_ef in ef_values: + recall, _, query_time, _ = self.test_recall_with_filter( + filter_expr, ef=500, filter_ef=filter_ef) + print(f"{filter_ef:9} | {recall:.3f} | {query_time*1000:.1f}") + + # Assert that higher FILTER-EF generally gives better recall + low_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=100) + high_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=5000) + + # This might not always be true due to randomness, but generally holds + # We use a softer assertion to avoid flaky tests + assert high_ef_recall >= low_ef_recall * 0.8, \ + f"Higher FILTER-EF should generally give better recall: {high_ef_recall:.3f} vs {low_ef_recall:.3f}" + + # ==== 4. Complex Filter Expressions ==== + print("\nTesting complex filter expressions...") + + # Test a variety of complex expressions + complex_filters = [ + '.price > 100 and (.category == "electronics" or .category == "furniture")', + '(.rating > 4 and .in_stock) or (.price < 50 and .views > 1000)', + '.category in ["electronics", "clothing"] and .price > 200 and .rating >= 3', + '(.category == "electronics" and .subcategory == "phones") or (.category == "furniture" and .price > 1000)', + '.year > 2010 and !(.price < 100) and .in_stock' + ] + + print("Expression | Results | Time (ms)") + print("-----------------------------") + for i, expr in enumerate(complex_filters): + try: + _, _, query_time, result_count = self.test_recall_with_filter(expr) + print(f"Complex {i+1} | {result_count:7} | {query_time*1000:.1f}") + except Exception as e: + print(f"Complex {i+1} | Error: {str(e)}") + + # ==== 5. Attribute Type Testing ==== + print("\nTesting different attribute types...") + + type_filters = [ + ('.price > 500', "Numeric"), + ('.category == "books"', "String equality"), + ('.in_stock', "Boolean"), + ('.tags in ["sale", "new"]', "Array membership"), + ('.rating * 2 > 8', "Arithmetic") + ] + + for expr, type_name in type_filters: + try: + _, _, query_time, result_count = self.test_recall_with_filter(expr) + print(f"{type_name:16} | {expr:30} | {result_count:5} results | {query_time*1000:.1f}ms") + except Exception as e: + print(f"{type_name:16} | {expr:30} | Error: {str(e)}") + + # ==== 6. Filter + Count Interaction ==== + print("\nTesting COUNT parameter with filters...") + + filter_expr = '.category == "electronics"' + counts = [5, 20, 100] + + for count in counts: + query_vec = generate_random_vector(self.dim) + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', count, 'WITHSCORES', 'FILTER', filter_expr]) + + results = self.redis.execute_command(*cmd_args) + result_count = len(results) // 2 # Divide by 2 because WITHSCORES returns pairs + + # We expect result count to be at most the requested count + assert result_count <= count, f"Got {result_count} results with COUNT {count}" + print(f"COUNT {count:3} | Got {result_count:3} results") + + # ==== 7. Edge Cases ==== + print("\nTesting edge cases...") + + # Test with no matching items + no_match_expr = '.category == "nonexistent_category"' + results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim, + *[str(x) for x in generate_random_vector(self.dim)], + 'FILTER', no_match_expr) + assert len(results) == 0, f"Expected 0 results for non-matching filter, got {len(results)}" + print(f"No matching items: {len(results)} results (expected 0)") + + # Test with invalid filter syntax + try: + self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim, + *[str(x) for x in generate_random_vector(self.dim)], + 'FILTER', '.category === "books"') # Triple equals is invalid + assert False, "Expected error for invalid filter syntax" + except: + print("Invalid filter syntax correctly raised an error") + + # Test with extremely long complex expression + long_expr = ' and '.join([f'.rating > {i/10}' for i in range(10)]) + try: + results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim, + *[str(x) for x in generate_random_vector(self.dim)], + 'FILTER', long_expr) + print(f"Long expression: {len(results)} results") + except Exception as e: + print(f"Long expression error: {str(e)}") + + print("\nComprehensive VSIM FILTER tests completed successfully") + + +class VSIMFilterSelectivityTest(TestCase): + def getname(self): + return "VSIM FILTER selectivity performance benchmark" + + def estimated_runtime(self): + return 8 # This test might take up to 8 seconds + + def setup(self): + super().setup() + self.dim = 32 + self.count = 10000 + self.test_key = f"{self.test_key}:selectivity" # Use a different key + + def create_vector_with_age_attribute(self, name, age): + """Create a vector with a specific age attribute""" + vec = generate_random_vector(self.dim) + vec_bytes = struct.pack(f'{self.dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name) + self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps({"age": age})) + + def test(self): + print("\nRunning VSIM FILTER selectivity benchmark...") + + # Create a dataset where we control the exact selectivity + print(f"Creating controlled dataset with {self.count} vectors...") + + # Create vectors with age attributes from 1 to 100 + for i in range(self.count): + age = (i % 100) + 1 # Ages from 1 to 100 + name = f"{self.test_key}:item:{i}" + self.create_vector_with_age_attribute(name, age) + + # Create a query vector + query_vec = generate_random_vector(self.dim) + + # Test filters with different selectivities + selectivities = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.99] + results = [] + + print("\nSelectivity | Filter | Results | Time (ms)") + print("--------------------------------------------------") + + for target_selectivity in selectivities: + # Calculate age threshold for desired selectivity + # For example, age <= 10 gives 10% selectivity + age_threshold = int(target_selectivity * 100) + filter_expr = f'.age <= {age_threshold}' + + # Run query and measure time + start_time = time.time() + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr]) + + results = self.redis.execute_command(*cmd_args) + query_time = time.time() - start_time + + actual_selectivity = len(results) / min(100, int(target_selectivity * self.count)) + print(f"{target_selectivity:.2f} | {filter_expr:15} | {len(results):7} | {query_time*1000:.1f}") + + # Add assertion to ensure reasonable performance for different selectivities + # For very selective queries (1%), we might need more exploration + if target_selectivity <= 0.05: + # For very selective queries, ensure we can find some results + assert len(results) > 0, f"No results found for {filter_expr}" + else: + # For less selective queries, performance should be reasonable + assert query_time < 1.0, f"Query too slow: {query_time:.3f}s for {filter_expr}" + + print("\nSelectivity benchmark completed successfully") + + +class VSIMFilterComparisonTest(TestCase): + def getname(self): + return "VSIM FILTER EF parameter comparison" + + def estimated_runtime(self): + return 8 # This test might take up to 8 seconds + + def setup(self): + super().setup() + self.dim = 32 + self.count = 5000 + self.test_key = f"{self.test_key}:efparams" # Use a different key + + def create_dataset(self): + """Create a dataset with specific attribute patterns for testing FILTER-EF""" + vectors = [] + names = [] + + # Create vectors with category and quality score attributes + for i in range(self.count): + vec = generate_random_vector(self.dim) + name = f"{self.test_key}:item:{i}" + + # Add vector to Redis + vec_bytes = struct.pack(f'{self.dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name) + + # Create attributes - we want a very selective filter + # Only 2% of items have category=premium AND quality>90 + category = "premium" if random.random() < 0.1 else random.choice(["standard", "economy", "basic"]) + quality = random.randint(1, 100) + + attrs = { + "id": i, + "category": category, + "quality": quality + } + + self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs)) + vectors.append(vec) + names.append(name) + + return vectors, names + + def test(self): + print("\nRunning VSIM FILTER-EF parameter comparison...") + + # Create dataset + vectors, names = self.create_dataset() + + # Create a selective filter that matches ~2% of items + filter_expr = '.category == "premium" and .quality > 90' + + # Create query vector + query_vec = generate_random_vector(self.dim) + + # Test different FILTER-EF values + ef_values = [50, 100, 500, 1000, 5000] + results = [] + + print("\nFILTER-EF | Results | Time (ms) | Notes") + print("---------------------------------------") + + baseline_count = None + + for ef in ef_values: + # Run query and measure time + start_time = time.time() + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr, 'FILTER-EF', ef]) + + query_results = self.redis.execute_command(*cmd_args) + query_time = time.time() - start_time + + # Set baseline for comparison + if baseline_count is None: + baseline_count = len(query_results) + + recall_rate = len(query_results) / max(1, baseline_count) if baseline_count > 0 else 1.0 + + notes = "" + if ef == 5000: + notes = "Baseline" + elif recall_rate < 0.5: + notes = "Low recall!" + + print(f"{ef:9} | {len(query_results):7} | {query_time*1000:.1f} | {notes}") + results.append((ef, len(query_results), query_time)) + + # If we have enough results at highest EF, check that recall improves with higher EF + if results[-1][1] >= 5: # At least 5 results for highest EF + # Extract result counts + result_counts = [r[1] for r in results] + + # The last result (highest EF) should typically find more results than the first (lowest EF) + # but we use a soft assertion to avoid flaky tests + assert result_counts[-1] >= result_counts[0], \ + f"Higher FILTER-EF should find at least as many results: {result_counts[-1]} vs {result_counts[0]}" + + print("\nFILTER-EF parameter comparison completed successfully") diff --git a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py new file mode 100644 index 0000000..eac5dca --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py @@ -0,0 +1,56 @@ +from test import TestCase, fill_redis_with_vectors, generate_random_vector +import random + +class LargeScale(TestCase): + def getname(self): + return "Large Scale Comparison" + + def estimated_runtime(self): + return 10 + + def test(self): + dim = 300 + count = 20000 + k = 50 + + # Fill Redis and get reference data for comparison + random.seed(42) # Make test deterministic + data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) + + # Generate query vector + query_vec = generate_random_vector(dim) + + # Get results from Redis with good exploration factor + redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in query_vec], + 'COUNT', k, 'WITHSCORES', 'EF', 500) + + # Convert Redis results to dict + redis_results = {} + for i in range(0, len(redis_raw), 2): + key = redis_raw[i].decode() + score = float(redis_raw[i+1]) + redis_results[key] = score + + # Get results from linear scan + linear_results = data.find_k_nearest(query_vec, k) + linear_items = {name: score for name, score in linear_results} + + # Compare overlap + redis_set = set(redis_results.keys()) + linear_set = set(linear_items.keys()) + overlap = len(redis_set & linear_set) + + # If test fails, print comparison for debugging + if overlap < k * 0.7: + data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k) + + assert overlap >= k * 0.7, \ + f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%" + + # Verify scores for common items + for item in redis_set & linear_set: + redis_score = redis_results[item] + linear_score = linear_items[item] + assert abs(redis_score - linear_score) < 0.01, \ + f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py b/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py new file mode 100644 index 0000000..d0f3f09 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py @@ -0,0 +1,36 @@ +from test import TestCase, generate_random_vector +import struct + +class MemoryUsageTest(TestCase): + def getname(self): + return "[regression] MEMORY USAGE with attributes" + + def test(self): + # Generate random vectors + vec1 = generate_random_vector(4) + vec2 = generate_random_vector(4) + vec_bytes1 = struct.pack('4f', *vec1) + vec_bytes2 = struct.pack('4f', *vec2) + + # Add vectors to the key, one with attribute, one without + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1') + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}') + + # Get memory usage for the key + try: + memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key) + # If we got here without exception, the command worked + assert memory_usage > 0, "MEMORY USAGE should return a positive value" + + # Add more attributes to increase complexity + self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', '{"color":"blue","size":10}') + + # Check memory usage again + new_memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key) + assert new_memory_usage > 0, "MEMORY USAGE should still return a positive value after setting attributes" + + # Memory usage should be higher after adding attributes + assert new_memory_usage > memory_usage, "Memory usage increase after adding attributes" + + except Exception as e: + raise AssertionError(f"MEMORY USAGE command failed: {str(e)}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/node_update.py b/examples/redis-unstable/modules/vector-sets/tests/node_update.py new file mode 100644 index 0000000..53aa2dd --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/node_update.py @@ -0,0 +1,85 @@ +from test import TestCase, generate_random_vector +import struct +import math +import random + +class VectorUpdateAndClusters(TestCase): + def getname(self): + return "VADD vector update with cluster relocation" + + def estimated_runtime(self): + return 2.0 # Should take around 2 seconds + + def generate_cluster_vector(self, base_vec, noise=0.1): + """Generate a vector that's similar to base_vec with some noise.""" + vec = [x + random.gauss(0, noise) for x in base_vec] + # Normalize + norm = math.sqrt(sum(x*x for x in vec)) + return [x/norm for x in vec] + + def test(self): + dim = 128 + vectors_per_cluster = 5000 + + # Create two very different base vectors for our clusters + cluster1_base = generate_random_vector(dim) + cluster2_base = [-x for x in cluster1_base] # Opposite direction + + # Add vectors from first cluster + for i in range(vectors_per_cluster): + vec = self.generate_cluster_vector(cluster1_base) + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, + f'{self.test_key}:cluster1:{i}') + + # Add vectors from second cluster + for i in range(vectors_per_cluster): + vec = self.generate_cluster_vector(cluster2_base) + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, + f'{self.test_key}:cluster2:{i}') + + # Pick a test vector from cluster1 + test_key = f'{self.test_key}:cluster1:0' + + # Verify it's in cluster1 using VSIM + initial_vec = self.generate_cluster_vector(cluster1_base) + results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in initial_vec], + 'COUNT', 100, 'WITHSCORES') + + # Count how many cluster1 items are in top results + cluster1_count = sum(1 for i in range(0, len(results), 2) + if b'cluster1' in results[i]) + assert cluster1_count > 80, "Initial clustering check failed" + + # Now update the test vector to be in cluster2 + new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05) + vec_bytes = struct.pack(f'{dim}f', *new_vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key) + + # Verify the embedding was actually updated using VEMB + emb_result = self.redis.execute_command('VEMB', self.test_key, test_key) + updated_vec = [float(x) for x in emb_result] + + # Verify updated vector matches what we inserted + dot_product = sum(a*b for a,b in zip(updated_vec, new_vec)) + similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) * + math.sqrt(sum(x*x for x in new_vec))) + assert similarity > 0.9, "Vector was not properly updated" + + # Verify it's now in cluster2 using VSIM + results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in cluster2_base], + 'COUNT', 100, 'WITHSCORES') + + # Verify our updated vector is among top results + found = False + for i in range(0, len(results), 2): + if results[i].decode() == test_key: + found = True + similarity = float(results[i+1]) + assert similarity > 0.80, f"Updated vector has low similarity: {similarity}" + break + + assert found, "Updated vector not found in cluster2 proximity" diff --git a/examples/redis-unstable/modules/vector-sets/tests/persistence.py b/examples/redis-unstable/modules/vector-sets/tests/persistence.py new file mode 100644 index 0000000..79730f4 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/persistence.py @@ -0,0 +1,86 @@ +from test import TestCase, fill_redis_with_vectors, generate_random_vector +import random + +class HNSWPersistence(TestCase): + def getname(self): + return "HNSW Persistence" + + def estimated_runtime(self): + return 30 + + def _verify_results(self, key, dim, query_vec, reduced_dim=None): + """Run a query and return results dict""" + k = 10 + args = ['VSIM', key] + + if reduced_dim: + args.extend(['VALUES', dim]) + args.extend([str(x) for x in query_vec]) + else: + args.extend(['VALUES', dim]) + args.extend([str(x) for x in query_vec]) + + args.extend(['COUNT', k, 'WITHSCORES']) + results = self.redis.execute_command(*args) + + results_dict = {} + for i in range(0, len(results), 2): + key = results[i].decode() + score = float(results[i+1]) + results_dict[key] = score + return results_dict + + def test(self): + # Setup dimensions + dim = 128 + reduced_dim = 32 + count = 5000 + random.seed(42) + + # Create two datasets - one normal and one with dimension reduction + normal_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:normal", count, dim) + projected_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:projected", + count, dim, reduced_dim) + + # Generate query vectors we'll use before and after reload + query_vec_normal = generate_random_vector(dim) + query_vec_projected = generate_random_vector(dim) + + # Get initial results for both sets + initial_normal = self._verify_results(f"{self.test_key}:normal", + dim, query_vec_normal) + initial_projected = self._verify_results(f"{self.test_key}:projected", + dim, query_vec_projected, reduced_dim) + + # Force Redis to save and reload the dataset + self.redis.execute_command('DEBUG', 'RELOAD') + + # Verify results after reload + reloaded_normal = self._verify_results(f"{self.test_key}:normal", + dim, query_vec_normal) + reloaded_projected = self._verify_results(f"{self.test_key}:projected", + dim, query_vec_projected, reduced_dim) + + # Verify normal vectors results + assert len(initial_normal) == len(reloaded_normal), \ + "Normal vectors: Result count mismatch before/after reload" + + for key in initial_normal: + assert key in reloaded_normal, f"Normal vectors: Missing item after reload: {key}" + assert abs(initial_normal[key] - reloaded_normal[key]) < 0.0001, \ + f"Normal vectors: Score mismatch for {key}: " + \ + f"before={initial_normal[key]:.6f}, after={reloaded_normal[key]:.6f}" + + # Verify projected vectors results + assert len(initial_projected) == len(reloaded_projected), \ + "Projected vectors: Result count mismatch before/after reload" + + for key in initial_projected: + assert key in reloaded_projected, \ + f"Projected vectors: Missing item after reload: {key}" + assert abs(initial_projected[key] - reloaded_projected[key]) < 0.0001, \ + f"Projected vectors: Score mismatch for {key}: " + \ + f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}" + + self.redis.delete(f"{self.test_key}:normal") + self.redis.delete(f"{self.test_key}:projected") diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py new file mode 100644 index 0000000..e39164f --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/reduce.py @@ -0,0 +1,71 @@ +from test import TestCase, fill_redis_with_vectors, generate_random_vector + +class Reduce(TestCase): + def getname(self): + return "Dimension Reduction" + + def estimated_runtime(self): + return 0.2 + + def test(self): + original_dim = 100 + reduced_dim = 80 + count = 1000 + k = 50 # Number of nearest neighbors to check + + # Fill Redis with vectors using REDUCE and get reference data + data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim) + + # Verify dimension is reduced + dim = self.redis.execute_command('VDIM', self.test_key) + assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}" + + # Generate query vector and get nearest neighbors using Redis + query_vec = generate_random_vector(original_dim) + redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', + original_dim, *[str(x) for x in query_vec], + 'COUNT', k, 'WITHSCORES') + + # Convert Redis results to dict + redis_results = {} + for i in range(0, len(redis_raw), 2): + key = redis_raw[i].decode() + score = float(redis_raw[i+1]) + redis_results[key] = score + + # Get results from linear scan with original vectors + linear_results = data.find_k_nearest(query_vec, k) + linear_items = {name: score for name, score in linear_results} + + # Compare overlap between reduced and non-reduced results + redis_set = set(redis_results.keys()) + linear_set = set(linear_items.keys()) + overlap = len(redis_set & linear_set) + overlap_ratio = overlap / k + + # With random projection, we expect some loss of accuracy but should + # maintain at least some similarity structure. + # Note that gaussian distribution is the worse with this test, so + # in real world practice, things will be better. + min_expected_overlap = 0.1 # At least 10% overlap in top-k + assert overlap_ratio >= min_expected_overlap, \ + f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}" + + # For items that appear in both results, scores should be reasonably correlated + common_items = redis_set & linear_set + for item in common_items: + redis_score = redis_results[item] + linear_score = linear_items[item] + # Allow for some deviation due to dimensionality reduction + assert abs(redis_score - linear_score) < 0.2, \ + f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}" + + # If test fails, print comparison for debugging + if overlap_ratio < min_expected_overlap: + print("\nLow overlap in results. Details:") + print("\nTop results from linear scan (original vectors):") + for name, score in linear_results: + print(f"{name}: {score:.3f}") + print("\nTop results from Redis (reduced vectors):") + for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True): + print(f"{item}: {score:.3f}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/replication.py b/examples/redis-unstable/modules/vector-sets/tests/replication.py new file mode 100644 index 0000000..91dfdf7 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/replication.py @@ -0,0 +1,92 @@ +from test import TestCase, generate_random_vector +import struct +import random +import time + +class ComprehensiveReplicationTest(TestCase): + def getname(self): + return "Comprehensive Replication Test with mixed operations" + + def estimated_runtime(self): + # This test will take longer than the default 100ms + return 20.0 # 20 seconds estimate + + def test(self): + # Setup replication between primary and replica + assert self.setup_replication(), "Failed to setup replication" + + # Test parameters + num_vectors = 5000 + vector_dim = 8 + delete_probability = 0.1 + cas_probability = 0.3 + + # Keep track of added items for potential deletion + added_items = [] + + # Add vectors and occasionally delete + for i in range(num_vectors): + # Generate a random vector + vec = generate_random_vector(vector_dim) + vec_bytes = struct.pack(f'{vector_dim}f', *vec) + item_name = f"{self.test_key}:item:{i}" + + # Decide whether to use CAS or not + use_cas = random.random() < cas_probability + + if use_cas and added_items: + # Get an existing item for CAS reference (if available) + cas_item = random.choice(added_items) + try: + # Add with CAS + result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, + item_name, 'CAS') + # Only add to our list if actually added (CAS might fail) + if result == 1: + added_items.append(item_name) + except Exception as e: + print(f" CAS VADD failed: {e}") + else: + try: + # Add without CAS + result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, item_name) + # Only add to our list if actually added + if result == 1: + added_items.append(item_name) + except Exception as e: + print(f" VADD failed: {e}") + + # Randomly delete items (with 10% probability) + if random.random() < delete_probability and added_items: + try: + # Select a random item to delete + item_to_delete = random.choice(added_items) + # Delete the item using VREM (not VDEL) + self.redis.execute_command('VREM', self.test_key, item_to_delete) + # Remove from our list + added_items.remove(item_to_delete) + except Exception as e: + print(f" VREM failed: {e}") + + # Allow time for replication to complete + time.sleep(2.0) + + # Verify final VCARD matches + primary_card = self.redis.execute_command('VCARD', self.test_key) + replica_card = self.replica.execute_command('VCARD', self.test_key) + assert primary_card == replica_card, f"Final VCARD mismatch: primary={primary_card}, replica={replica_card}" + + # Verify VDIM matches + primary_dim = self.redis.execute_command('VDIM', self.test_key) + replica_dim = self.replica.execute_command('VDIM', self.test_key) + assert primary_dim == replica_dim, f"VDIM mismatch: primary={primary_dim}, replica={replica_dim}" + + # Verify digests match using DEBUG DIGEST + primary_digest = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) + replica_digest = self.replica.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key) + assert primary_digest == replica_digest, f"Digest mismatch: primary={primary_digest}, replica={replica_digest}" + + # Print summary + print(f"\n Added and maintained {len(added_items)} vectors with dimension {vector_dim}") + print(f" Final vector count: {primary_card}") + print(f" Final digest: {primary_digest[0].decode()}") diff --git a/examples/redis-unstable/modules/vector-sets/tests/threading_config.py b/examples/redis-unstable/modules/vector-sets/tests/threading_config.py new file mode 100644 index 0000000..dfc931a --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/threading_config.py @@ -0,0 +1,249 @@ +from test import TestCase, generate_random_vector +import struct + + +class ThreadingConfigTest(TestCase): + """ + Test suite for vset-force-single-threaded-execution configuration. + + This test validates the behavior of VADD and VSIM commands under different + threading configurations. The new configuration is MUTABLE and BINARY: + - false (0): Multi-threaded execution enabled (default) + - true (1): Force single-threaded execution + + Key behaviors tested: + - VADD with and without CAS option under both threading modes + - VSIM with and without NOTHREAD option under both threading modes + - Configuration reading, validation, and runtime modification + - Thread behavior switching (multi-threaded vs forced single-threaded) + """ + + def getname(self): + return "vset-force-single-threaded-execution configuration testing" + + def estimated_runtime(self): + return 0.5 # Updated for mutable config testing with mode switching + + def get_config_value(self): + """Get current vset-force-single-threaded-execution config value""" + try: + result = self.redis.execute_command('CONFIG', 'GET', 'vset-force-single-threaded-execution') + if len(result) >= 2: + # Redis returns 'yes'/'no' for boolean configs + return result[1].decode() if isinstance(result[1], bytes) else result[1] + return None + except Exception: + return None + + def set_config_value(self, value): + """Set vset-force-single-threaded-execution config value""" + try: + # Convert boolean to yes/no string + str_value = 'yes' if value else 'no' + result = self.redis.execute_command('CONFIG', 'SET', 'vset-force-single-threaded-execution', str_value) + return result == b'OK' or result == 'OK' + except Exception as e: + print(f"Failed to set config: {e}") + return False + + def test_config_access_and_mutability(self): + """Test 1: Configuration access and mutability""" + # Get initial value + initial_value = self.get_config_value() + assert initial_value is not None, "Should be able to read vset-force-single-threaded-execution config" + assert initial_value in ['yes', 'no'], f"Config value should be yes/no, got {initial_value}" + + # Test mutability by toggling the value + new_value = 'no' if initial_value == 'yes' else 'yes' + assert self.set_config_value(new_value == 'yes'), "Should be able to change config value" + + # Verify the change + current_value = self.get_config_value() + assert current_value == new_value, f"Config should be {new_value}, got {current_value}" + + # Restore original value + assert self.set_config_value(initial_value == 'yes'), "Should be able to restore original value" + + return initial_value == 'yes' + + def test_vadd_without_cas(self, force_single_threaded=False): + """Test 2: VADD command without CAS option""" + # Set threading mode + self.set_config_value(force_single_threaded) + + # Clear test data to avoid dimension conflicts + self.redis.delete(self.test_key) + + dim = 64 + vec = generate_random_vector(dim) + vec_bytes = struct.pack(f'{dim}f', *vec) + + result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') + assert result == 1, f"VADD should return 1 for new item, got {result}" + + # Verify the vector was added + card = self.redis.execute_command('VCARD', self.test_key) + assert card == 1, f"VCARD should return 1, got {card}" + + def test_vadd_with_cas(self, force_single_threaded=False): + """Test 3: VADD command with CAS option""" + # Set threading mode + self.set_config_value(force_single_threaded) + + # Clear test data to avoid dimension conflicts + self.redis.delete(self.test_key) + + dim = 64 + vec = generate_random_vector(dim) + vec_bytes = struct.pack(f'{dim}f', *vec) + + # First insertion with CAS should succeed + result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS') + assert result == 1, f"First VADD with CAS should return 1, got {result}" + + # Second insertion of same item with CAS should return 0 + result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS') + assert result == 0, f"Duplicate VADD with CAS should return 0, got {result}" + + def test_vsim_without_nothread(self, force_single_threaded=False): + """Test 4: VSIM command without NOTHREAD""" + # Set threading mode + self.set_config_value(force_single_threaded) + + # Clear test data to avoid dimension conflicts + self.redis.delete(self.test_key) + + dim = 64 + + # Add test vectors + for i in range(5): + vec = generate_random_vector(dim) + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}') + + # Test VSIM without NOTHREAD + query_vec = generate_random_vector(dim) + args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3] + result = self.redis.execute_command(*args) + + assert isinstance(result, list), f"VSIM should return a list, got {type(result)}" + assert len(result) <= 3, f"VSIM should return at most 3 results, got {len(result)}" + + def test_vsim_with_nothread(self, force_single_threaded=False): + """Test 5: VSIM command with NOTHREAD""" + # Set threading mode + self.set_config_value(force_single_threaded) + + dim = 64 + + # Ensure we have vectors to search (use existing vectors from previous test) + card = self.redis.execute_command('VCARD', self.test_key) + if card == 0: + # Add test vectors if none exist + for i in range(5): + vec = generate_random_vector(dim) + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}') + + # Test VSIM with NOTHREAD + query_vec = generate_random_vector(dim) + args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3, 'NOTHREAD'] + result = self.redis.execute_command(*args) + + assert isinstance(result, list), f"VSIM with NOTHREAD should return a list, got {type(result)}" + assert len(result) <= 3, f"VSIM with NOTHREAD should return at most 3 results, got {len(result)}" + + def test_threading_mode_comparison(self): + """Test 6: Compare behavior between threading modes""" + dim = 64 + + # Clear test data + self.redis.delete(self.test_key) + + # Test multi-threaded mode (default) + self.set_config_value(False) # Multi-threaded + self.test_vadd_without_cas(False) + self.test_vadd_with_cas(False) + multi_threaded_card = self.redis.execute_command('VCARD', self.test_key) + + # Clear and test single-threaded mode + self.redis.delete(self.test_key) + self.set_config_value(True) # Single-threaded + self.test_vadd_without_cas(True) + self.test_vadd_with_cas(True) + single_threaded_card = self.redis.execute_command('VCARD', self.test_key) + + # Both modes should produce same results + assert multi_threaded_card == single_threaded_card, \ + f"Both modes should produce same results: multi={multi_threaded_card}, single={single_threaded_card}" + + def test_nothread_override_behavior(self): + """Test 7: NOTHREAD option should work regardless of config""" + dim = 64 + + # Test with both config modes + for force_single in [False, True]: + self.set_config_value(force_single) + self.redis.delete(self.test_key) + + # Add test vectors + for i in range(3): + vec = generate_random_vector(dim) + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}') + + # NOTHREAD should work regardless of config + query_vec = generate_random_vector(dim) + args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 2, 'NOTHREAD'] + result = self.redis.execute_command(*args) + + assert isinstance(result, list), f"NOTHREAD should work with force_single={force_single}" + assert len(result) <= 2, f"NOTHREAD should return ≤2 results with force_single={force_single}" + + def test(self): + """Main test method - runs all threading configuration tests""" + # Get initial configuration + initial_force_single = self.test_config_access_and_mutability() + print(f"Initial vset-force-single-threaded-execution: {'yes' if initial_force_single else 'no'}") + + # Clear test data + self.redis.delete(self.test_key) + + # Test both threading modes + print("Testing multi-threaded mode...") + self.set_config_value(False) + self.test_vadd_without_cas(False) + self.test_vadd_with_cas(False) + self.test_vsim_without_nothread(False) + self.test_vsim_with_nothread(False) + + print("Testing single-threaded mode...") + self.set_config_value(True) + self.test_vadd_without_cas(True) + self.test_vadd_with_cas(True) + self.test_vsim_without_nothread(True) + self.test_vsim_with_nothread(True) + + # Test mode comparison and NOTHREAD override + self.test_threading_mode_comparison() + self.test_nothread_override_behavior() + + # Restore initial configuration + self.set_config_value(initial_force_single) + + # Print summary + self._print_test_summary(initial_force_single) + + def _print_test_summary(self, initial_force_single): + """Print a summary of what was tested""" + print(f"\nThreading Configuration Test Summary:") + print(f" Configuration: vset-force-single-threaded-execution") + print(f" Type: Boolean, Mutable") + print(f" Initial value: {'yes' if initial_force_single else 'no'}") + print(f" Tested modes: Both multi-threaded (no) and single-threaded (yes)") + print(f" VADD: Works correctly in both modes") + print(f" VADD with CAS: Works correctly in both modes") + print(f" VSIM: Works correctly in both modes") + print(f" NOTHREAD option: Overrides config in both modes") + print(f" Configuration mutability: ✅ Successfully changed at runtime") + print(f" All tests passed successfully!") diff --git a/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py b/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py new file mode 100644 index 0000000..3cb3508 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py @@ -0,0 +1,98 @@ +from test import TestCase, generate_random_vector +import threading +import struct +import math +import time +import random +from typing import List, Dict + +class ConcurrentCASTest(TestCase): + def getname(self): + return "Concurrent VADD with CAS" + + def estimated_runtime(self): + return 1.5 + + def worker(self, vectors: List[List[float]], start_idx: int, end_idx: int, + dim: int, results: Dict[str, bool]): + """Worker thread that adds a subset of vectors using VADD CAS""" + for i in range(start_idx, end_idx): + vec = vectors[i] + name = f"{self.test_key}:item:{i}" + vec_bytes = struct.pack(f'{dim}f', *vec) + + # Try to add the vector with CAS + try: + result = self.redis.execute_command('VADD', self.test_key, 'FP32', + vec_bytes, name, 'CAS') + results[name] = (result == 1) # Store if it was actually added + except Exception as e: + results[name] = False + print(f"Error adding {name}: {e}") + + def verify_vector_similarity(self, vec1: List[float], vec2: List[float]) -> float: + """Calculate cosine similarity between two vectors""" + dot_product = sum(a*b for a,b in zip(vec1, vec2)) + norm1 = math.sqrt(sum(x*x for x in vec1)) + norm2 = math.sqrt(sum(x*x for x in vec2)) + return dot_product / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else 0 + + def test(self): + # Test parameters + dim = 128 + total_vectors = 5000 + num_threads = 8 + vectors_per_thread = total_vectors // num_threads + + # Generate all vectors upfront + random.seed(42) # For reproducibility + vectors = [generate_random_vector(dim) for _ in range(total_vectors)] + + # Prepare threads and results dictionary + threads = [] + results = {} # Will store success/failure for each vector + + # Launch threads + for i in range(num_threads): + start_idx = i * vectors_per_thread + end_idx = start_idx + vectors_per_thread if i < num_threads-1 else total_vectors + thread = threading.Thread(target=self.worker, + args=(vectors, start_idx, end_idx, dim, results)) + threads.append(thread) + thread.start() + + # Wait for all threads to complete + for thread in threads: + thread.join() + + # Verify cardinality + card = self.redis.execute_command('VCARD', self.test_key) + assert card == total_vectors, \ + f"Expected {total_vectors} elements, but found {card}" + + # Verify each vector + num_verified = 0 + for i in range(total_vectors): + name = f"{self.test_key}:item:{i}" + + # Verify the item was successfully added + assert results[name], f"Vector {name} was not successfully added" + + # Get the stored vector + stored_vec_raw = self.redis.execute_command('VEMB', self.test_key, name) + stored_vec = [float(x) for x in stored_vec_raw] + + # Verify vector dimensions + assert len(stored_vec) == dim, \ + f"Stored vector dimension mismatch for {name}: {len(stored_vec)} != {dim}" + + # Calculate similarity with original vector + similarity = self.verify_vector_similarity(vectors[i], stored_vec) + assert similarity > 0.99, \ + f"Low similarity ({similarity}) for {name}" + + num_verified += 1 + + # Final verification + assert num_verified == total_vectors, \ + f"Only verified {num_verified} out of {total_vectors} vectors" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vemb.py b/examples/redis-unstable/modules/vector-sets/tests/vemb.py new file mode 100644 index 0000000..0f4cf77 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/vemb.py @@ -0,0 +1,41 @@ +from test import TestCase +import struct +import math + +class VEMB(TestCase): + def getname(self): + return "VEMB Command" + + def test(self): + dim = 4 + + # Add same vector in both formats + vec = [1, 0, 0, 0] + norm = math.sqrt(sum(x*x for x in vec)) + vec = [x/norm for x in vec] # Normalize the vector + + # Add using FP32 + vec_bytes = struct.pack(f'{dim}f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') + + # Add using VALUES + self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, + *[str(x) for x in vec], f'{self.test_key}:item:2') + + # Get both back with VEMB + result1 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:1') + result2 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:2') + + retrieved_vec1 = [float(x) for x in result1] + retrieved_vec2 = [float(x) for x in result2] + + # Compare both vectors with original (allow for small quantization errors) + for i in range(dim): + assert abs(vec[i] - retrieved_vec1[i]) < 0.01, \ + f"FP32 vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec1[i]}" + assert abs(vec[i] - retrieved_vec2[i]) < 0.01, \ + f"VALUES vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec2[i]}" + + # Test non-existent item + result = self.redis.execute_command('VEMB', self.test_key, 'nonexistent') + assert result is None, "Non-existent item should return nil" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vismember.py b/examples/redis-unstable/modules/vector-sets/tests/vismember.py new file mode 100644 index 0000000..eabebca --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/vismember.py @@ -0,0 +1,47 @@ +from test import TestCase, generate_random_vector +import struct + +class BasicVISMEMBER(TestCase): + def getname(self): + return "VISMEMBER basic functionality" + + def test(self): + # Add multiple vectors to the vector set + vec1 = generate_random_vector(4) + vec2 = generate_random_vector(4) + vec_bytes1 = struct.pack('4f', *vec1) + vec_bytes2 = struct.pack('4f', *vec2) + + # Create item keys + item1 = f'{self.test_key}:item:1' + item2 = f'{self.test_key}:item:2' + nonexistent_item = f'{self.test_key}:item:nonexistent' + + # Add the vectors + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, item1) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, item2) + + # Test VISMEMBER with existing elements + result1 = self.redis.execute_command('VISMEMBER', self.test_key, item1) + assert result1 == 1, f"VISMEMBER should return 1 for existing item, got {result1}" + + result2 = self.redis.execute_command('VISMEMBER', self.test_key, item2) + assert result2 == 1, f"VISMEMBER should return 1 for existing item, got {result2}" + + # Test VISMEMBER with non-existent element + result3 = self.redis.execute_command('VISMEMBER', self.test_key, nonexistent_item) + assert result3 == 0, f"VISMEMBER should return 0 for non-existent item, got {result3}" + + # Test VISMEMBER with non-existent key + nonexistent_key = f'{self.test_key}_nonexistent' + result4 = self.redis.execute_command('VISMEMBER', nonexistent_key, item1) + assert result4 == 0, f"VISMEMBER should return 0 for non-existent key, got {result4}" + + # Test VISMEMBER after removing an element + self.redis.execute_command('VREM', self.test_key, item1) + result5 = self.redis.execute_command('VISMEMBER', self.test_key, item1) + assert result5 == 0, f"VISMEMBER should return 0 after element removal, got {result5}" + + # Verify item2 still exists + result6 = self.redis.execute_command('VISMEMBER', self.test_key, item2) + assert result6 == 1, f"VISMEMBER should still return 1 for remaining item, got {result6}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py b/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py new file mode 100644 index 0000000..99d2e9a --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py @@ -0,0 +1,35 @@ +from test import TestCase, generate_random_vector +import struct + +class VRANDMEMBERPingPongRegressionTest(TestCase): + def getname(self): + return "[regression] VRANDMEMBER ping-pong" + + def test(self): + """ + This test ensures that when only two vectors exist, VRANDMEMBER + does not get stuck returning only one of them due to the "ping-pong" issue. + """ + self.redis.delete(self.test_key) # Clean up before test + dim = 4 + + # Add exactly two vectors + vec1_name = "vec1" + vec1_data = generate_random_vector(dim) + self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec1_data, vec1_name) + + vec2_name = "vec2" + vec2_data = generate_random_vector(dim) + self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec2_data, vec2_name) + + # Call VRANDMEMBER many times and check for distribution + iterations = 100 + results = [] + for _ in range(iterations): + member = self.redis.execute_command('VRANDMEMBER', self.test_key) + results.append(member.decode()) + + # Verify that both members were returned, proving it's not stuck + unique_results = set(results) + + assert len(unique_results) == 2, f"Ping-pong test failed: should have returned 2 unique members, but got {len(unique_results)}." diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py b/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py new file mode 100644 index 0000000..ca9e006 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py @@ -0,0 +1,55 @@ +from test import TestCase, generate_random_vector, fill_redis_with_vectors +import struct + +class VRANDMEMBERTest(TestCase): + def getname(self): + return "VRANDMEMBER basic functionality" + + def test(self): + # Test with empty key + result = self.redis.execute_command('VRANDMEMBER', self.test_key) + assert result is None, "VRANDMEMBER on non-existent key should return NULL" + + result = self.redis.execute_command('VRANDMEMBER', self.test_key, 5) + assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count on non-existent key should return empty array" + + # Fill with vectors + dim = 4 + count = 100 + data = fill_redis_with_vectors(self.redis, self.test_key, count, dim) + + # Test single random member + result = self.redis.execute_command('VRANDMEMBER', self.test_key) + assert result is not None, "VRANDMEMBER should return a random member" + assert result.decode() in data.names, "Random member should be in the set" + + # Test multiple unique members (positive count) + positive_count = 10 + result = self.redis.execute_command('VRANDMEMBER', self.test_key, positive_count) + assert isinstance(result, list), "VRANDMEMBER with positive count should return an array" + assert len(result) == positive_count, f"Should return {positive_count} members" + + # Check for uniqueness + decoded_results = [r.decode() for r in result] + assert len(decoded_results) == len(set(decoded_results)), "Results should be unique with positive count" + for item in decoded_results: + assert item in data.names, "All returned items should be in the set" + + # Test more members than in the set + result = self.redis.execute_command('VRANDMEMBER', self.test_key, count + 10) + assert len(result) == count, "Should return only the available members when asking for more than exist" + + # Test with duplicates (negative count) + negative_count = -20 + result = self.redis.execute_command('VRANDMEMBER', self.test_key, negative_count) + assert isinstance(result, list), "VRANDMEMBER with negative count should return an array" + assert len(result) == abs(negative_count), f"Should return {abs(negative_count)} members" + + # Check that all returned elements are valid + decoded_results = [r.decode() for r in result] + for item in decoded_results: + assert item in data.names, "All returned items should be in the set" + + # Test with count = 0 (edge case) + result = self.redis.execute_command('VRANDMEMBER', self.test_key, 0) + assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count=0 should return empty array" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrange.py b/examples/redis-unstable/modules/vector-sets/tests/vrange.py new file mode 100644 index 0000000..7e57588 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/vrange.py @@ -0,0 +1,113 @@ +from test import TestCase, generate_random_vector +import struct + +class BasicVRANGE(TestCase): + def getname(self): + return "VRANGE basic functionality and iteration" + + def test(self): + # Add multiple elements with different names for lexicographical ordering + elements = [ + "apple", "apricot", "banana", "cherry", "date", + "elderberry", "fig", "grape", "honeydew", "kiwi", + "lemon", "mango", "nectarine", "orange", "papaya", + "quince", "raspberry", "strawberry", "tangerine", "watermelon" + ] + + # Add all elements to the vector set + for elem in elements: + vec = generate_random_vector(4) + vec_bytes = struct.pack('4f', *vec) + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, elem) + + # Test 1: Basic range with inclusive boundaries + result = self.redis.execute_command('VRANGE', self.test_key, '[apple', '[grape', '5') + result = [r.decode() for r in result] + assert result == ['apple', 'apricot', 'banana', 'cherry', 'date'], f"Expected first 5 elements from apple, got {result}" + + # Test 2: Exclusive start boundary + result = self.redis.execute_command('VRANGE', self.test_key, '(apple', '[cherry', '10') + result = [r.decode() for r in result] + assert result == ['apricot', 'banana', 'cherry'], f"Expected elements after apple up to cherry inclusive, got {result}" + + # Test 3: Exclusive end boundary + result = self.redis.execute_command('VRANGE', self.test_key, '[banana', '(cherry', '10') + result = [r.decode() for r in result] + assert result == ['banana'], f"Expected only banana (cherry excluded), got {result}" + + # Test 4: Using '-' for minimum element + result = self.redis.execute_command('VRANGE', self.test_key, '-', '[banana', '10') + result = [r.decode() for r in result] + assert result[0] == 'apple', "Should start from the first element" + assert result[-1] == 'banana', "Should end at banana" + + # Test 5: Using '+' for maximum element + result = self.redis.execute_command('VRANGE', self.test_key, '[raspberry', '+', '10') + result = [r.decode() for r in result] + assert 'raspberry' in result and 'strawberry' in result and 'tangerine' in result and 'watermelon' in result, "Should include all elements from raspberry onwards" + + # Test 6: Full range with '-' and '+' + result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '100') + result = [r.decode() for r in result] + assert len(result) == len(elements), f"Should return all {len(elements)} elements" + assert result == sorted(elements), "Elements should be in lexicographical order" + + # Test 7: Iterator pattern - verify each element appears exactly once + seen = set() + batch_size = 3 + current = '-' + + while True: + if current == '-': + # First iteration + result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', str(batch_size)) + else: + # Subsequent iterations - exclusive start from last element + result = self.redis.execute_command('VRANGE', self.test_key, f'({current}', '+', str(batch_size)) + + result = [r.decode() for r in result] + + if not result: + break + + # Check no duplicates in this batch + for elem in result: + assert elem not in seen, f"Element {elem} appeared more than once" + seen.add(elem) + + # Update current to last element + current = result[-1] + + # Break if we got less than requested (end of set) + if len(result) < batch_size: + break + + # Verify we saw all elements exactly once + assert seen == set(elements), f"Iterator should visit all elements exactly once. Missing: {set(elements) - seen}, Extra: {seen - set(elements)}" + + # Test 8: Count of 0 returns empty array + result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '0') + assert result == [], f"Count of 0 should return empty array, got {result}" + + # Test 9: Range with no matching elements + result = self.redis.execute_command('VRANGE', self.test_key, '[zebra', '+', '10') + assert result == [], f"Range beyond all elements should return empty array, got {result}" + + # Test 10: Non-existent key + result = self.redis.execute_command('VRANGE', 'nonexistent_key', '-', '+', '10') + assert result == [], f"Non-existent key should return empty array, got {result}" + + # Test 11: Partial word boundaries + result = self.redis.execute_command('VRANGE', self.test_key, '[app', '[apr', '10') + result = [r.decode() for r in result] + assert 'apple' in result, "Should include 'apple' which starts with 'app'" + assert 'apricot' not in result, "Should not include 'apricot' as it's >= 'apr'" + + # Test 12: Single element range + result = self.redis.execute_command('VRANGE', self.test_key, '[cherry', '[cherry', '10') + result = [r.decode() for r in result] + assert result == ['cherry'], f"Inclusive single element range should return that element, got {result}" + + # Test 13: Empty range (start > end) + result = self.redis.execute_command('VRANGE', self.test_key, '[grape', '[apple', '10') + assert result == [], f"Range where start > end should return empty array, got {result}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py b/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py new file mode 100644 index 0000000..25b9689 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py @@ -0,0 +1,32 @@ +from test import TestCase, generate_random_vector +import struct + +class VSIMLimitEFSearch(TestCase): + def getname(self): + return "VSIM Limit EF Search" + + def estimated_runtime(self): + return 0.2 + + def test(self): + dim = 32 + vec = generate_random_vector(dim) + vec_bytes = struct.pack(f'{dim}f', *vec) + + # Add test vector + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1') + + query_vec = generate_random_vector(dim) + + # Test EF upper bound (should accept 1000000) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in query_vec], 'EF', 1000000) + assert isinstance(result, list), "EF=1000000 should be accepted" + + # Test EF over limit (should reject > 1000000) + try: + self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim, + *[str(x) for x in query_vec], 'EF', 1000001) + assert False, "EF=1000001 should be rejected" + except Exception as e: + assert "invalid EF" in str(e), f"Expected EF validation error, got: {e}" diff --git a/examples/redis-unstable/modules/vector-sets/tests/with.py b/examples/redis-unstable/modules/vector-sets/tests/with.py new file mode 100644 index 0000000..d14a23f --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/tests/with.py @@ -0,0 +1,214 @@ +from test import TestCase, generate_random_vector +import struct +import json +import random + +class VSIMWithAttribs(TestCase): + def getname(self): + return "VSIM WITHATTRIBS/WITHSCORES functionality testing" + + def setup(self): + super().setup() + self.dim = 8 + self.count = 20 + + # Create vectors with attributes + for i in range(self.count): + vec = generate_random_vector(self.dim) + vec_bytes = struct.pack(f'{self.dim}f', *vec) + + # Item name + name = f"{self.test_key}:item:{i}" + + # Add to Redis + self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name) + + # Create and add attribute + if i % 5 == 0: + # Every 5th item has no attribute (for testing NULL responses) + continue + + category = random.choice(["electronics", "furniture", "clothing"]) + price = random.randint(50, 1000) + attrs = {"category": category, "price": price, "id": i} + + self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs)) + + def is_numeric(self, value): + """Check if a value can be converted to float""" + try: + if isinstance(value, (int, float)): + return True + if isinstance(value, bytes): + float(value.decode('utf-8')) + return True + if isinstance(value, str): + float(value) + return True + return False + except (ValueError, TypeError): + return False + + def test(self): + # Create query vector + query_vec = generate_random_vector(self.dim) + + # Test 1: VSIM with no additional options (should be same for RESP2 and RESP3) + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 5]) + + results_resp2 = self.redis.execute_command(*cmd_args) + results_resp3 = self.redis3.execute_command(*cmd_args) + + # Both should return simple arrays of item names + assert len(results_resp2) == 5, f"RESP2: Expected 5 results, got {len(results_resp2)}" + assert len(results_resp3) == 5, f"RESP3: Expected 5 results, got {len(results_resp3)}" + assert all(isinstance(item, bytes) for item in results_resp2), "RESP2: Results should be byte strings" + assert all(isinstance(item, bytes) for item in results_resp3), "RESP3: Results should be byte strings" + + # Test 2: VSIM with WITHSCORES only + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 5, 'WITHSCORES']) + + results_resp2 = self.redis.execute_command(*cmd_args) + results_resp3 = self.redis3.execute_command(*cmd_args) + + # RESP2: Should be a flat array alternating item, score + assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}" + for i in range(0, len(results_resp2), 2): + assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes" + assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric" + score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1] + assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1" + + # RESP3: Should be a dict/map with items as keys and scores as DIRECT values (not arrays) + assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}" + assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}" + for item, score in results_resp3.items(): + assert isinstance(item, bytes), f"RESP3: Key should be bytes" + # Score should be a direct value, NOT an array + assert not isinstance(score, list), f"RESP3: With single WITH option, value should not be array" + assert self.is_numeric(score), f"RESP3: Score should be numeric, got {type(score)}" + score_val = float(score) if isinstance(score, bytes) else score + assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1" + + # Test 3: VSIM with WITHATTRIBS only + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 5, 'WITHATTRIBS']) + + results_resp2 = self.redis.execute_command(*cmd_args) + results_resp3 = self.redis3.execute_command(*cmd_args) + + # RESP2: Should be a flat array alternating item, attribute + assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}" + for i in range(0, len(results_resp2), 2): + assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes" + attr = results_resp2[i+1] + assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+1} should be None or bytes" + if attr is not None: + # Verify it's valid JSON + json.loads(attr) + + # RESP3: Should be a dict/map with items as keys and attributes as DIRECT values (not arrays) + assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}" + assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}" + for item, attr in results_resp3.items(): + assert isinstance(item, bytes), f"RESP3: Key should be bytes" + # Attribute should be a direct value, NOT an array + assert not isinstance(attr, list), f"RESP3: With single WITH option, value should not be array" + assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes" + if attr is not None: + # Verify it's valid JSON + json.loads(attr) + + # Test 4: VSIM with both WITHSCORES and WITHATTRIBS + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS']) + + results_resp2 = self.redis.execute_command(*cmd_args) + results_resp3 = self.redis3.execute_command(*cmd_args) + + # RESP2: Should be a flat array with pattern: item, score, attribute + assert len(results_resp2) == 15, f"RESP2: Expected 15 elements (5 items × 3), got {len(results_resp2)}" + for i in range(0, len(results_resp2), 3): + assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes" + assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric" + score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1] + assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1" + attr = results_resp2[i+2] + assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+2} should be None or bytes" + + # RESP3: Should be a dict where each value is a 2-element array [score, attribute] + assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}" + assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}" + for item, value in results_resp3.items(): + assert isinstance(item, bytes), f"RESP3: Key should be bytes" + # With BOTH options, value MUST be an array + assert isinstance(value, list), f"RESP3: With both WITH options, value should be a list, got {type(value)}" + assert len(value) == 2, f"RESP3: Value should have 2 elements [score, attr], got {len(value)}" + + score, attr = value + assert self.is_numeric(score), f"RESP3: Score should be numeric" + score_val = float(score) if isinstance(score, bytes) else score + assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1" + assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes" + + # Test 5: Verify consistency - same items returned in same order + cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args.extend([str(x) for x in query_vec]) + cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS']) + + results_resp2 = self.redis.execute_command(*cmd_args) + results_resp3 = self.redis3.execute_command(*cmd_args) + + # Extract items from RESP2 (every 3rd element starting from 0) + items_resp2 = [results_resp2[i] for i in range(0, len(results_resp2), 3)] + + # Extract items from RESP3 (keys of the dict) + items_resp3 = list(results_resp3.keys()) + + # Verify same items returned + assert set(items_resp2) == set(items_resp3), "RESP2 and RESP3 should return the same items" + + # Build a mapping from items to scores and attributes for comparison + data_resp2 = {} + for i in range(0, len(results_resp2), 3): + item = results_resp2[i] + score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1] + attr = results_resp2[i+2] + data_resp2[item] = (score, attr) + + data_resp3 = {} + for item, value in results_resp3.items(): + score = float(value[0]) if isinstance(value[0], bytes) else value[0] + attr = value[1] + data_resp3[item] = (score, attr) + + # Verify scores and attributes match for each item + for item in data_resp2: + score_resp2, attr_resp2 = data_resp2[item] + score_resp3, attr_resp3 = data_resp3[item] + + assert abs(score_resp2 - score_resp3) < 0.0001, \ + f"Scores for {item} don't match: RESP2={score_resp2}, RESP3={score_resp3}" + assert attr_resp2 == attr_resp3, \ + f"Attributes for {item} don't match: RESP2={attr_resp2}, RESP3={attr_resp3}" + + # Test 6: Test ordering of WITHSCORES and WITHATTRIBS doesn't matter + cmd_args1 = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args1.extend([str(x) for x in query_vec]) + cmd_args1.extend(['COUNT', 3, 'WITHSCORES', 'WITHATTRIBS']) + + cmd_args2 = ['VSIM', self.test_key, 'VALUES', self.dim] + cmd_args2.extend([str(x) for x in query_vec]) + cmd_args2.extend(['COUNT', 3, 'WITHATTRIBS', 'WITHSCORES']) # Reversed order + + results1_resp3 = self.redis3.execute_command(*cmd_args1) + results2_resp3 = self.redis3.execute_command(*cmd_args2) + + # Both should return the same structure + assert results1_resp3 == results2_resp3, "Order of WITH options shouldn't matter" |
