summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/tests
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
commit5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/modules/vector-sets/tests
parentc7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
downloadcrep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz
Add Redis source code for testing
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests')
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/basic_commands.py21
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py35
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py156
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py48
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/debug_digest.py39
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/deletion.py173
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py67
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/epsilon.py77
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/evict_empty.py27
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/filter_expr.py242
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/filter_int.py668
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/large_scale.py56
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/memory_usage.py36
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/node_update.py85
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/persistence.py86
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/reduce.py71
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/replication.py92
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/threading_config.py249
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py98
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vemb.py41
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vismember.py47
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py35
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vrandmember.py55
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vrange.py113
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py32
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/with.py214
26 files changed, 2863 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py b/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py
new file mode 100644
index 0000000..8481a36
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py
@@ -0,0 +1,21 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class BasicCommands(TestCase):
+ def getname(self):
+ return "VADD, VDIM, VCARD basic usage"
+
+ def test(self):
+ # Test VADD
+ vec = generate_random_vector(4)
+ vec_bytes = struct.pack('4f', *vec)
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+ assert result == 1, "VADD should return 1 for first item"
+
+ # Test VDIM
+ dim = self.redis.execute_command('VDIM', self.test_key)
+ assert dim == 4, f"VDIM should return 4, got {dim}"
+
+ # Test VCARD
+ card = self.redis.execute_command('VCARD', self.test_key)
+ assert card == 1, f"VCARD should return 1, got {card}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py b/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py
new file mode 100644
index 0000000..11c3c9b
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py
@@ -0,0 +1,35 @@
+from test import TestCase
+
+class BasicSimilarity(TestCase):
+ def getname(self):
+ return "VSIM reported distance makes sense with 4D vectors"
+
+ def test(self):
+ # Add two very similar vectors, one different
+ vec1 = [1, 0, 0, 0]
+ vec2 = [0.99, 0.01, 0, 0]
+ vec3 = [0.1, 1, -1, 0.5]
+
+ # Add vectors using VALUES format
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1], f'{self.test_key}:item:1')
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec2], f'{self.test_key}:item:2')
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec3], f'{self.test_key}:item:3')
+
+ # Query similarity with vec1
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1], 'WITHSCORES')
+
+ # Convert results to dictionary
+ results_dict = {}
+ for i in range(0, len(result), 2):
+ key = result[i].decode()
+ score = float(result[i+1])
+ results_dict[key] = score
+
+ # Verify results
+ assert results_dict[f'{self.test_key}:item:1'] > 0.99, "Self-similarity should be very high"
+ assert results_dict[f'{self.test_key}:item:2'] > 0.99, "Similar vector should have high similarity"
+ assert results_dict[f'{self.test_key}:item:3'] < 0.8, "Not very similar vector should have low similarity"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py
new file mode 100644
index 0000000..f4b3a12
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py
@@ -0,0 +1,156 @@
+from test import TestCase, generate_random_vector
+import threading
+import time
+import struct
+
+class ThreadingStressTest(TestCase):
+ def getname(self):
+ return "Concurrent VADD/DEL/VSIM operations stress test"
+
+ def estimated_runtime(self):
+ return 10 # Test runs for 10 seconds
+
+ def test(self):
+ # Constants - easy to modify if needed
+ NUM_VADD_THREADS = 10
+ NUM_VSIM_THREADS = 1
+ NUM_DEL_THREADS = 1
+ TEST_DURATION = 10 # seconds
+ VECTOR_DIM = 100
+ DEL_INTERVAL = 1 # seconds
+
+ # Shared flags and state
+ stop_event = threading.Event()
+ error_list = []
+ error_lock = threading.Lock()
+
+ def log_error(thread_name, error):
+ with error_lock:
+ error_list.append(f"{thread_name}: {error}")
+
+ def vadd_worker(thread_id):
+ """Thread function to perform VADD operations"""
+ thread_name = f"VADD-{thread_id}"
+ try:
+ vector_count = 0
+ while not stop_event.is_set():
+ try:
+ # Generate random vector
+ vec = generate_random_vector(VECTOR_DIM)
+ vec_bytes = struct.pack(f'{VECTOR_DIM}f', *vec)
+
+ # Add vector with CAS option
+ self.redis.execute_command(
+ 'VADD',
+ self.test_key,
+ 'FP32',
+ vec_bytes,
+ f'{self.test_key}:item:{thread_id}:{vector_count}',
+ 'CAS'
+ )
+
+ vector_count += 1
+
+ # Small sleep to reduce CPU pressure
+ if vector_count % 10 == 0:
+ time.sleep(0.001)
+ except Exception as e:
+ log_error(thread_name, f"Error: {str(e)}")
+ time.sleep(0.1) # Slight backoff on error
+ except Exception as e:
+ log_error(thread_name, f"Thread error: {str(e)}")
+
+ def del_worker():
+ """Thread function that deletes the key periodically"""
+ thread_name = "DEL"
+ try:
+ del_count = 0
+ while not stop_event.is_set():
+ try:
+ # Sleep first, then delete
+ time.sleep(DEL_INTERVAL)
+ if stop_event.is_set():
+ break
+
+ self.redis.delete(self.test_key)
+ del_count += 1
+ except Exception as e:
+ log_error(thread_name, f"Error: {str(e)}")
+ except Exception as e:
+ log_error(thread_name, f"Thread error: {str(e)}")
+
+ def vsim_worker(thread_id):
+ """Thread function to perform VSIM operations"""
+ thread_name = f"VSIM-{thread_id}"
+ try:
+ search_count = 0
+ while not stop_event.is_set():
+ try:
+ # Generate query vector
+ query_vec = generate_random_vector(VECTOR_DIM)
+ query_str = [str(x) for x in query_vec]
+
+ # Perform similarity search
+ args = ['VSIM', self.test_key, 'VALUES', VECTOR_DIM]
+ args.extend(query_str)
+ args.extend(['COUNT', 10])
+ self.redis.execute_command(*args)
+
+ search_count += 1
+
+ # Small sleep to reduce CPU pressure
+ if search_count % 10 == 0:
+ time.sleep(0.005)
+ except Exception as e:
+ # Don't log empty array errors, as they're expected when key doesn't exist
+ if "empty array" not in str(e).lower():
+ log_error(thread_name, f"Error: {str(e)}")
+ time.sleep(0.1) # Slight backoff on error
+ except Exception as e:
+ log_error(thread_name, f"Thread error: {str(e)}")
+
+ # Start all threads
+ threads = []
+
+ # VADD threads
+ for i in range(NUM_VADD_THREADS):
+ thread = threading.Thread(target=vadd_worker, args=(i,))
+ thread.start()
+ threads.append(thread)
+
+ # DEL threads
+ for _ in range(NUM_DEL_THREADS):
+ thread = threading.Thread(target=del_worker)
+ thread.start()
+ threads.append(thread)
+
+ # VSIM threads
+ for i in range(NUM_VSIM_THREADS):
+ thread = threading.Thread(target=vsim_worker, args=(i,))
+ thread.start()
+ threads.append(thread)
+
+ # Let the test run for the specified duration
+ time.sleep(TEST_DURATION)
+
+ # Signal all threads to stop
+ stop_event.set()
+
+ # Wait for threads to finish
+ for thread in threads:
+ thread.join(timeout=2.0)
+
+ # Check if Redis is still responsive
+ try:
+ ping_result = self.redis.ping()
+ assert ping_result, "Redis did not respond to PING after stress test"
+ except Exception as e:
+ assert False, f"Redis connection failed after stress test: {str(e)}"
+
+ # Report any errors for diagnosis, but don't fail the test unless PING fails
+ if error_list:
+ error_count = len(error_list)
+ print(f"\nEncountered {error_count} errors during stress test.")
+ print("First 5 errors:")
+ for error in error_list[:5]:
+ print(f"- {error}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py
new file mode 100644
index 0000000..9bbf011
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py
@@ -0,0 +1,48 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import threading, time
+
+class ConcurrentVSIMAndDEL(TestCase):
+ def getname(self):
+ return "Concurrent VSIM and DEL operations"
+
+ def estimated_runtime(self):
+ return 2
+
+ def test(self):
+ # Fill the key with 5000 random vectors
+ dim = 128
+ count = 5000
+ fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+ # List to store results from threads
+ thread_results = []
+
+ def vsim_thread():
+ """Thread function to perform VSIM operations until the key is deleted"""
+ while True:
+ query_vec = generate_random_vector(dim)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in query_vec], 'COUNT', 10)
+ if not result:
+ # Empty array detected, key is deleted
+ thread_results.append(True)
+ break
+
+ # Start multiple threads to perform VSIM operations
+ threads = []
+ for _ in range(4): # Start 4 threads
+ t = threading.Thread(target=vsim_thread)
+ t.start()
+ threads.append(t)
+
+ # Delete the key while threads are still running
+ time.sleep(1)
+ self.redis.delete(self.test_key)
+
+ # Wait for all threads to finish (they will exit once they detect the key is deleted)
+ for t in threads:
+ t.join()
+
+ # Verify that all threads detected an empty array or error
+ assert len(thread_results) == len(threads), "Not all threads detected the key deletion"
+ assert all(thread_results), "Some threads did not detect an empty array or error after DEL"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py b/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py
new file mode 100644
index 0000000..78f06d8
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py
@@ -0,0 +1,39 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class DebugDigestTest(TestCase):
+ def getname(self):
+ return "[regression] DEBUG DIGEST-VALUE with attributes"
+
+ def test(self):
+ # Generate random vectors
+ vec1 = generate_random_vector(4)
+ vec2 = generate_random_vector(4)
+ vec_bytes1 = struct.pack('4f', *vec1)
+ vec_bytes2 = struct.pack('4f', *vec2)
+
+ # Add vectors to the key, one with attribute, one without
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
+
+ # Call DEBUG DIGEST-VALUE on the key
+ try:
+ digest1 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+ assert digest1 is not None, "DEBUG DIGEST-VALUE should return a value"
+
+ # Change attribute and verify digest changes
+ self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '{"color":"blue"}')
+
+ digest2 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+ assert digest2 is not None, "DEBUG DIGEST-VALUE should return a value after attribute change"
+ assert digest1 != digest2, "Digest should change when an attribute is modified"
+
+ # Remove attribute and verify digest changes again
+ self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '')
+
+ digest3 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+ assert digest3 is not None, "DEBUG DIGEST-VALUE should return a value after attribute removal"
+ assert digest2 != digest3, "Digest should change when an attribute is removed"
+
+ except Exception as e:
+ raise AssertionError(f"DEBUG DIGEST-VALUE command failed: {str(e)}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/deletion.py b/examples/redis-unstable/modules/vector-sets/tests/deletion.py
new file mode 100644
index 0000000..cb91959
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/deletion.py
@@ -0,0 +1,173 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import random
+
+"""
+A note about this test:
+It was experimentally tried to modify hnsw.c in order to
+avoid calling hnsw_reconnect_nodes(). In this case, the test
+fails very often with EF set to 250, while it hardly
+fails at all with the same parameters if hnsw_reconnect_nodes()
+is called.
+
+Note that for the nature of the test (it is very strict) it can
+still fail from time to time, without this signaling any
+actual bug.
+"""
+
+class VREM(TestCase):
+ def getname(self):
+ return "Deletion and graph state after deletion"
+
+ def estimated_runtime(self):
+ return 2.0
+
+ def format_neighbors_with_scores(self, links_result, old_links=None, items_to_remove=None):
+ """Format neighbors with their similarity scores and status indicators"""
+ if not links_result:
+ return "No neighbors"
+
+ output = []
+ for level, neighbors in enumerate(links_result):
+ level_num = len(links_result) - level - 1
+ output.append(f"Level {level_num}:")
+
+ # Get neighbors and scores
+ neighbors_with_scores = []
+ for i in range(0, len(neighbors), 2):
+ neighbor = neighbors[i].decode() if isinstance(neighbors[i], bytes) else neighbors[i]
+ score = float(neighbors[i+1]) if i+1 < len(neighbors) else None
+ status = ""
+
+ # For old links, mark deleted ones
+ if items_to_remove and neighbor in items_to_remove:
+ status = " [lost]"
+ # For new links, mark newly added ones
+ elif old_links is not None:
+ # Check if this neighbor was in the old links at this level
+ was_present = False
+ if old_links and level < len(old_links):
+ old_neighbors = [n.decode() if isinstance(n, bytes) else n
+ for n in old_links[level]]
+ was_present = neighbor in old_neighbors
+ if not was_present:
+ status = " [gained]"
+
+ if score is not None:
+ neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor} ({score:.6f}){status}")
+ else:
+ neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor}{status}")
+
+ output.extend([" " + n for n in neighbors_with_scores])
+ return "\n".join(output)
+
+ def test(self):
+ # 1. Fill server with random elements
+ dim = 128
+ count = 5000
+ data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+ # 2. Do VSIM to get 200 items
+ query_vec = generate_random_vector(dim)
+ results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in query_vec],
+ 'COUNT', 200, 'WITHSCORES')
+
+ # Convert results to list of (item, score) pairs, sorted by score
+ items = []
+ for i in range(0, len(results), 2):
+ item = results[i].decode()
+ score = float(results[i+1])
+ items.append((item, score))
+ items.sort(key=lambda x: x[1], reverse=True) # Sort by similarity
+
+ # Store the graph structure for all items before deletion
+ neighbors_before = {}
+ for item, _ in items:
+ links = self.redis.execute_command('VLINKS', self.test_key, item, 'WITHSCORES')
+ if links: # Some items might not have links
+ neighbors_before[item] = links
+
+ # 3. Remove 100 random items
+ items_to_remove = set(item for item, _ in random.sample(items, 100))
+ # Keep track of top 10 non-removed items
+ top_remaining = []
+ for item, score in items:
+ if item not in items_to_remove:
+ top_remaining.append((item, score))
+ if len(top_remaining) == 10:
+ break
+
+ # Remove the items
+ for item in items_to_remove:
+ result = self.redis.execute_command('VREM', self.test_key, item)
+ assert result == 1, f"VREM failed to remove {item}"
+
+ # 4. Do VSIM again with same vector
+ new_results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in query_vec],
+ 'COUNT', 200, 'WITHSCORES',
+ 'EF', 500)
+
+ # Convert new results to dict of item -> score
+ new_scores = {}
+ for i in range(0, len(new_results), 2):
+ item = new_results[i].decode()
+ score = float(new_results[i+1])
+ new_scores[item] = score
+
+ failure = False
+ failed_item = None
+ failed_reason = None
+ # 5. Verify all top 10 non-removed items are still found with similar scores
+ for item, old_score in top_remaining:
+ if item not in new_scores:
+ failure = True
+ failed_item = item
+ failed_reason = "missing"
+ break
+ new_score = new_scores[item]
+ if abs(new_score - old_score) >= 0.01:
+ failure = True
+ failed_item = item
+ failed_reason = f"score changed: {old_score:.6f} -> {new_score:.6f}"
+ break
+
+ if failure:
+ print("\nTest failed!")
+ print(f"Problem with item: {failed_item} ({failed_reason})")
+
+ print("\nOriginal neighbors (with similarity scores):")
+ if failed_item in neighbors_before:
+ print(self.format_neighbors_with_scores(
+ neighbors_before[failed_item],
+ items_to_remove=items_to_remove))
+ else:
+ print("No neighbors found in original graph")
+
+ print("\nCurrent neighbors (with similarity scores):")
+ current_links = self.redis.execute_command('VLINKS', self.test_key,
+ failed_item, 'WITHSCORES')
+ if current_links:
+ print(self.format_neighbors_with_scores(
+ current_links,
+ old_links=neighbors_before.get(failed_item)))
+ else:
+ print("No neighbors in current graph")
+
+ print("\nOriginal results (top 20):")
+ for item, score in items[:20]:
+ deleted = "[deleted]" if item in items_to_remove else ""
+ print(f"{item}: {score:.6f} {deleted}")
+
+ print("\nNew results after removal (top 20):")
+ new_items = []
+ for i in range(0, len(new_results), 2):
+ item = new_results[i].decode()
+ score = float(new_results[i+1])
+ new_items.append((item, score))
+ new_items.sort(key=lambda x: x[1], reverse=True)
+ for item, score in new_items[:20]:
+ print(f"{item}: {score:.6f}")
+
+ raise AssertionError(f"Test failed: Problem with item {failed_item} ({failed_reason}). *** IMPORTANT *** This test may fail from time to time without indicating that there is a bug. However normally it should pass. The fact is that it's a quite extreme test where we destroy 50% of nodes of top results and still expect perfect recall, with vectors that are very hostile because of the distribution used.")
+
diff --git a/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py b/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py
new file mode 100644
index 0000000..f081152
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py
@@ -0,0 +1,67 @@
+from test import TestCase, generate_random_vector
+import struct
+import redis.exceptions
+
+class DimensionValidation(TestCase):
+ def getname(self):
+ return "[regression] Dimension Validation with Projection"
+
+ def estimated_runtime(self):
+ return 0.5
+
+ def test(self):
+ # Test scenario 1: Create a set with projection
+ original_dim = 100
+ reduced_dim = 50
+
+ # Create the initial vector and set with projection
+ vec1 = generate_random_vector(original_dim)
+ vec1_bytes = struct.pack(f'{original_dim}f', *vec1)
+
+ # Add first vector with projection
+ result = self.redis.execute_command('VADD', self.test_key,
+ 'REDUCE', reduced_dim,
+ 'FP32', vec1_bytes, f'{self.test_key}:item:1')
+ assert result == 1, "First VADD with REDUCE should return 1"
+
+ # Check VINFO returns the correct projection information
+ info = self.redis.execute_command('VINFO', self.test_key)
+ info_map = {k.decode('utf-8'): v for k, v in zip(info[::2], info[1::2])}
+ assert 'vector-dim' in info_map, "VINFO should contain vector-dim"
+ assert info_map['vector-dim'] == reduced_dim, f"Expected reduced dimension {reduced_dim}, got {info['vector-dim']}"
+ assert 'projection-input-dim' in info_map, "VINFO should contain projection-input-dim"
+ assert info_map['projection-input-dim'] == original_dim, f"Expected original dimension {original_dim}, got {info['projection-input-dim']}"
+
+ # Test scenario 2: Try adding a mismatched vector - should fail
+ wrong_dim = 80
+ wrong_vec = generate_random_vector(wrong_dim)
+ wrong_vec_bytes = struct.pack(f'{wrong_dim}f', *wrong_vec)
+
+ # This should fail with dimension mismatch error
+ try:
+ self.redis.execute_command('VADD', self.test_key,
+ 'REDUCE', reduced_dim,
+ 'FP32', wrong_vec_bytes, f'{self.test_key}:item:2')
+ assert False, "VADD with wrong dimension should fail"
+ except redis.exceptions.ResponseError as e:
+ assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error, got: {e}"
+
+ # Test scenario 3: Add a correctly-sized vector
+ vec2 = generate_random_vector(original_dim)
+ vec2_bytes = struct.pack(f'{original_dim}f', *vec2)
+
+ # This should succeed
+ result = self.redis.execute_command('VADD', self.test_key,
+ 'REDUCE', reduced_dim,
+ 'FP32', vec2_bytes, f'{self.test_key}:item:3')
+ assert result == 1, "VADD with correct dimensions should succeed"
+
+ # Check VSIM also validates input dimensions
+ wrong_query = generate_random_vector(wrong_dim)
+ try:
+ self.redis.execute_command('VSIM', self.test_key,
+ 'VALUES', wrong_dim, *[str(x) for x in wrong_query],
+ 'COUNT', 10)
+ assert False, "VSIM with wrong dimension should fail"
+ except redis.exceptions.ResponseError as e:
+ assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/epsilon.py b/examples/redis-unstable/modules/vector-sets/tests/epsilon.py
new file mode 100644
index 0000000..97e11c0
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/epsilon.py
@@ -0,0 +1,77 @@
+from test import TestCase
+
+class EpsilonOption(TestCase):
+ def getname(self):
+ return "VSIM EPSILON option filtering"
+
+ def estimated_runtime(self):
+ return 0.1
+
+ def test(self):
+ # Add vectors as shown in the example
+ # Vector 'a' at (1, 1) - normalized to (0.707, 0.707)
+ result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '1', '1', 'a')
+ assert result == 1, "VADD should return 1 for item 'a'"
+
+ # Vector 'b' at (0, 1) - normalized to (0, 1)
+ result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '1', 'b')
+ assert result == 1, "VADD should return 1 for item 'b'"
+
+ # Vector 'c' at (0, 0) - this will be a zero vector, might be handled specially
+ result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '0', 'c')
+ assert result == 1, "VADD should return 1 for item 'c'"
+
+ # Vector 'd' at (0, -1) - normalized to (0, -1)
+ result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '-1', 'd')
+ assert result == 1, "VADD should return 1 for item 'd'"
+
+ # Vector 'e' at (-1, -1) - normalized to (-0.707, -0.707)
+ result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '-1', '-1', 'e')
+ assert result == 1, "VADD should return 1 for item 'e'"
+
+ # Test without EPSILON - should return all items
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES')
+ # Result is a flat list: [elem1, score1, elem2, score2, ...]
+ elements_all = [result[i].decode() for i in range(0, len(result), 2)]
+ scores_all = [float(result[i]) for i in range(1, len(result), 2)]
+
+ assert len(elements_all) == 5, f"Should return 5 elements without EPSILON, got {len(elements_all)}"
+ assert elements_all[0] == 'a', "First element should be 'a' (most similar)"
+ assert scores_all[0] == 1.0, "Score for 'a' should be 1.0 (identical)"
+
+ # Test with EPSILON 0.5 - should return only elements with similarity >= 0.5 (distance < 0.5)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.5')
+ elements_epsilon_0_5 = [result[i].decode() for i in range(0, len(result), 2)]
+ scores_epsilon_0_5 = [float(result[i]) for i in range(1, len(result), 2)]
+
+ assert len(elements_epsilon_0_5) == 3, f"With EPSILON 0.5, should return 3 elements, got {len(elements_epsilon_0_5)}"
+ assert set(elements_epsilon_0_5) == {'a', 'b', 'c'}, f"With EPSILON 0.5, should get a, b, c, got {elements_epsilon_0_5}"
+
+ # Verify all returned scores are >= 0.5
+ for i, score in enumerate(scores_epsilon_0_5):
+ assert score >= 0.5, f"Element {elements_epsilon_0_5[i]} has score {score} which is < 0.5"
+
+ # Test with EPSILON 0.2 - should return only elements with similarity >= 0.8 (distance < 0.2)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.2')
+ elements_epsilon_0_2 = [result[i].decode() for i in range(0, len(result), 2)]
+ scores_epsilon_0_2 = [float(result[i]) for i in range(1, len(result), 2)]
+
+ assert len(elements_epsilon_0_2) == 2, f"With EPSILON 0.2, should return 2 elements, got {len(elements_epsilon_0_2)}"
+ assert set(elements_epsilon_0_2) == {'a', 'b'}, f"With EPSILON 0.2, should get a, b, got {elements_epsilon_0_2}"
+
+ # Verify all returned scores are >= 0.8 (since distance < 0.2 means similarity > 0.8)
+ for i, score in enumerate(scores_epsilon_0_2):
+ assert score >= 0.8, f"Element {elements_epsilon_0_2[i]} has score {score} which is < 0.8"
+
+ # Test with very small EPSILON - should return only the exact match
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.001')
+ elements_epsilon_small = [result[i].decode() for i in range(0, len(result), 2)]
+
+ assert len(elements_epsilon_small) == 1, f"With EPSILON 0.001, should return only 1 element, got {len(elements_epsilon_small)}"
+ assert elements_epsilon_small[0] == 'a', "With very small EPSILON, should only get 'a'"
+
+ # Test with EPSILON 1.0 - should return all elements (since all similarities are between 0 and 1)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '1.0')
+ elements_epsilon_1 = [result[i].decode() for i in range(0, len(result), 2)]
+
+ assert len(elements_epsilon_1) == 5, f"With EPSILON 1.0, should return all 5 elements, got {len(elements_epsilon_1)}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py b/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py
new file mode 100644
index 0000000..6c78c82
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py
@@ -0,0 +1,27 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class VREM_LastItemDeletesKey(TestCase):
+ def getname(self):
+ return "VREM last item deletes key"
+
+ def test(self):
+ # Generate a random vector
+ vec = generate_random_vector(4)
+ vec_bytes = struct.pack('4f', *vec)
+
+ # Add the vector to the key
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+ assert result == 1, "VADD should return 1 for first item"
+
+ # Verify the key exists
+ exists = self.redis.exists(self.test_key)
+ assert exists == 1, "Key should exist after VADD"
+
+ # Remove the item
+ result = self.redis.execute_command('VREM', self.test_key, f'{self.test_key}:item:1')
+ assert result == 1, "VREM should return 1 for successful removal"
+
+ # Verify the key no longer exists
+ exists = self.redis.exists(self.test_key)
+ assert exists == 0, "Key should no longer exist after VREM of last item"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py b/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py
new file mode 100644
index 0000000..364915d
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py
@@ -0,0 +1,242 @@
+from test import TestCase
+
+class VSIMFilterExpressions(TestCase):
+ def getname(self):
+ return "VSIM FILTER expressions basic functionality"
+
+ def test(self):
+ # Create a small set of vectors with different attributes
+
+ # Basic vectors for testing - all orthogonal for clear results
+ vec1 = [1, 0, 0, 0]
+ vec2 = [0, 1, 0, 0]
+ vec3 = [0, 0, 1, 0]
+ vec4 = [0, 0, 0, 1]
+ vec5 = [0.5, 0.5, 0, 0]
+
+ # Add vectors with various attributes
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1], f'{self.test_key}:item:1')
+ self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1',
+ '{"age": 25, "name": "Alice", "active": true, "scores": [85, 90, 95], "city": "New York"}')
+
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec2], f'{self.test_key}:item:2')
+ self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2',
+ '{"age": 30, "name": "Bob", "active": false, "scores": [70, 75, 80], "city": "Boston"}')
+
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec3], f'{self.test_key}:item:3')
+ self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:3',
+ '{"age": 35, "name": "Charlie", "scores": [60, 65, 70], "city": "Seattle"}')
+
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec4], f'{self.test_key}:item:4')
+ # Item 4 has no attribute at all
+
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec5], f'{self.test_key}:item:5')
+ self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5',
+ 'invalid json') # Intentionally malformed JSON
+
+ # Basic equality with numbers
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age == 25')
+ assert len(result) == 1, "Expected 1 result for age == 25"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25"
+
+ # Greater than
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age > 25')
+ assert len(result) == 2, "Expected 2 results for age > 25"
+
+ # Less than or equal
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age <= 30')
+ assert len(result) == 2, "Expected 2 results for age <= 30"
+
+ # String equality
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.name == "Alice"')
+ assert len(result) == 1, "Expected 1 result for name == Alice"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice"
+
+ # String inequality
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.name != "Alice"')
+ assert len(result) == 2, "Expected 2 results for name != Alice"
+
+ # Boolean value
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.active')
+ assert len(result) == 1, "Expected 1 result for .active being true"
+
+ # Logical AND
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age > 20 and .age < 30')
+ assert len(result) == 1, "Expected 1 result for 20 < age < 30"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30"
+
+ # Logical OR
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age < 30 or .age > 35')
+ assert len(result) == 1, "Expected 1 result for age < 30 or age > 35"
+
+ # Logical NOT
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '!(.age == 25)')
+ assert len(result) == 2, "Expected 2 results for NOT(age == 25)"
+
+ # The "in" operator with array
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age in [25, 35]')
+ assert len(result) == 2, "Expected 2 results for age in [25, 35]"
+
+ # The "in" operator with strings in array
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.name in ["Alice", "David"]')
+ assert len(result) == 1, "Expected 1 result for name in [Alice, David]"
+
+ # The "in" operator for substring matching
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"lic" in .name')
+ assert len(result) == 1, "Expected 1 result for 'lic' in name"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (Alice)"
+
+ # The "in" operator with city substring
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"ork" in .city')
+ assert len(result) == 1, "Expected 1 result for 'ork' in city"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (New York)"
+
+ # The "in" operator with no matches
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"xyz" in .name')
+ assert len(result) == 0, "Expected 0 results for 'xyz' in name"
+
+ # Off-by-one tests - substring at the beginning
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"Ali" in .name')
+ assert len(result) == 1, "Expected 1 result for 'Ali' at beginning of 'Alice'"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
+
+ # Off-by-one tests - substring at the end
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"ice" in .name')
+ assert len(result) == 1, "Expected 1 result for 'ice' at end of 'Alice'"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
+
+ # Off-by-one tests - exact match (entire string)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"Alice" in .name')
+ assert len(result) == 1, "Expected 1 result for exact match 'Alice' in 'Alice'"
+ assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
+
+ # Off-by-one tests - single character
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"A" in .name')
+ assert len(result) == 1, "Expected 1 result for single char 'A' in 'Alice'"
+
+ # Off-by-one tests - empty string (should match all strings)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"" in .name')
+ assert len(result) == 3, "Expected 3 results for empty string (matches all strings)"
+
+ # Off-by-one tests - non-empty strings are never substrings of ""
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.name in ""')
+ assert len(result) == 0, "Expected 0 results for empty string on the right of IN operator"
+
+ # Off-by-one tests - empty string match empty string.
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '"" in .name && "" in ""')
+ assert len(result) == 3, "Expected empty string matching empty string"
+
+ # Arithmetic operations - addition
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age + 10 > 40')
+ assert len(result) == 1, "Expected 1 result for age + 10 > 40"
+
+ # Arithmetic operations - multiplication
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age * 2 > 60')
+ assert len(result) == 1, "Expected 1 result for age * 2 > 60"
+
+ # Arithmetic operations - division
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age / 5 == 5')
+ assert len(result) == 1, "Expected 1 result for age / 5 == 5"
+
+ # Arithmetic operations - modulo
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age % 2 == 0')
+ assert len(result) == 1, "Expected 1 result for age % 2 == 0"
+
+ # Power operator
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age ** 2 > 900')
+ assert len(result) == 1, "Expected 1 result for age^2 > 900"
+
+ # Missing attribute (should exclude items missing that attribute)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.missing_field == "value"')
+ assert len(result) == 0, "Expected 0 results for missing_field == value"
+
+ # No attribute set at all
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.any_field')
+ assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded"
+
+ # Malformed JSON
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.any_field')
+ assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded"
+
+ # Complex expression combining multiple operators
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")')
+ assert len(result) == 2, "Expected 2 results for the complex expression"
+ expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2']
+ assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression"
+
+ # Parentheses to control operator precedence
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.age > (20 + 10)')
+ assert len(result) == 1, "Expected 1 result for age > (20 + 10)"
+
+ # Array access (arrays evaluate to true)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
+ *[str(x) for x in vec1],
+ 'FILTER', '.scores')
+ assert len(result) == 3, "Expected 3 results for .scores (arrays evaluate to true)"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_int.py b/examples/redis-unstable/modules/vector-sets/tests/filter_int.py
new file mode 100644
index 0000000..0fd1dc1
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/filter_int.py
@@ -0,0 +1,668 @@
+from test import TestCase, generate_random_vector
+import struct
+import random
+import math
+import json
+import time
+
+class VSIMFilterAdvanced(TestCase):
+ def getname(self):
+ return "VSIM FILTER comprehensive functionality testing"
+
+ def estimated_runtime(self):
+ return 15 # This test might take up to 15 seconds for the large dataset
+
+ def setup(self):
+ super().setup()
+ self.dim = 32 # Vector dimension
+ self.count = 5000 # Number of vectors for large tests
+ self.small_count = 50 # Number of vectors for small/quick tests
+
+ # Categories for attributes
+ self.categories = ["electronics", "furniture", "clothing", "books", "food"]
+ self.cities = ["New York", "London", "Tokyo", "Paris", "Berlin", "Sydney", "Toronto", "Singapore"]
+ self.price_ranges = [(10, 50), (50, 200), (200, 1000), (1000, 5000)]
+ self.years = list(range(2000, 2025))
+
+ def create_attributes(self, index):
+ """Create realistic attributes for a vector"""
+ category = random.choice(self.categories)
+ city = random.choice(self.cities)
+ min_price, max_price = random.choice(self.price_ranges)
+ price = round(random.uniform(min_price, max_price), 2)
+ year = random.choice(self.years)
+ in_stock = random.random() > 0.3 # 70% chance of being in stock
+ rating = round(random.uniform(1, 5), 1)
+ views = int(random.expovariate(1/1000)) # Exponential distribution for page views
+ tags = random.sample(["popular", "sale", "new", "limited", "exclusive", "clearance"],
+ k=random.randint(0, 3))
+
+ # Add some specific patterns for testing
+ # Every 10th item has a specific property combination for testing
+ is_premium = (index % 10 == 0)
+
+ # Create attributes dictionary
+ attrs = {
+ "id": index,
+ "category": category,
+ "location": city,
+ "price": price,
+ "year": year,
+ "in_stock": in_stock,
+ "rating": rating,
+ "views": views,
+ "tags": tags
+ }
+
+ if is_premium:
+ attrs["is_premium"] = True
+ attrs["special_features"] = ["premium", "warranty", "support"]
+
+ # Add sub-categories for more complex filters
+ if category == "electronics":
+ attrs["subcategory"] = random.choice(["phones", "computers", "cameras", "audio"])
+ elif category == "furniture":
+ attrs["subcategory"] = random.choice(["chairs", "tables", "sofas", "beds"])
+ elif category == "clothing":
+ attrs["subcategory"] = random.choice(["shirts", "pants", "dresses", "shoes"])
+
+ # Add some intentionally missing fields for testing
+ if random.random() > 0.9: # 10% chance of missing price
+ del attrs["price"]
+
+ # Some items have promotion field
+ if random.random() > 0.7: # 30% chance of having a promotion
+ attrs["promotion"] = random.choice(["discount", "bundle", "gift"])
+
+ # Create invalid JSON for a small percentage of vectors
+ if random.random() > 0.98: # 2% chance of having invalid JSON
+ return "{{invalid json}}"
+
+ return json.dumps(attrs)
+
+ def create_vectors_with_attributes(self, key, count):
+ """Create vectors and add attributes to them"""
+ vectors = []
+ names = []
+ attribute_map = {} # To store attributes for verification
+
+ # Create vectors
+ for i in range(count):
+ vec = generate_random_vector(self.dim)
+ vectors.append(vec)
+ name = f"{key}:item:{i}"
+ names.append(name)
+
+ # Add to Redis
+ vec_bytes = struct.pack(f'{self.dim}f', *vec)
+ self.redis.execute_command('VADD', key, 'FP32', vec_bytes, name)
+
+ # Create and add attributes
+ attrs = self.create_attributes(i)
+ self.redis.execute_command('VSETATTR', key, name, attrs)
+
+ # Store attributes for later verification
+ try:
+ attribute_map[name] = json.loads(attrs) if '{' in attrs else None
+ except json.JSONDecodeError:
+ attribute_map[name] = None
+
+ return vectors, names, attribute_map
+
+ def filter_linear_search(self, vectors, names, query_vector, filter_expr, attribute_map, k=10):
+ """Perform a linear search with filtering for verification"""
+ similarities = []
+ query_norm = math.sqrt(sum(x*x for x in query_vector))
+
+ if query_norm == 0:
+ return []
+
+ for i, vec in enumerate(vectors):
+ name = names[i]
+ attributes = attribute_map.get(name)
+
+ # Skip if doesn't match filter
+ if not self.matches_filter(attributes, filter_expr):
+ continue
+
+ vec_norm = math.sqrt(sum(x*x for x in vec))
+ if vec_norm == 0:
+ continue
+
+ dot_product = sum(a*b for a,b in zip(query_vector, vec))
+ cosine_sim = dot_product / (query_norm * vec_norm)
+ distance = 1.0 - cosine_sim
+ redis_similarity = 1.0 - (distance/2.0)
+ similarities.append((name, redis_similarity))
+
+ similarities.sort(key=lambda x: x[1], reverse=True)
+ return similarities[:k]
+
+ def matches_filter(self, attributes, filter_expr):
+ """Filter matching for verification - uses Python eval to handle complex expressions"""
+ if attributes is None:
+ return False # No attributes or invalid JSON
+
+ # Replace JSON path selectors with Python dictionary access
+ py_expr = filter_expr
+
+ # Handle `.field` notation (replace with attributes['field'])
+ i = 0
+ while i < len(py_expr):
+ if py_expr[i] == '.' and (i == 0 or not py_expr[i-1].isalnum()):
+ # Find the end of the selector (stops at operators or whitespace)
+ j = i + 1
+ while j < len(py_expr) and (py_expr[j].isalnum() or py_expr[j] == '_'):
+ j += 1
+
+ if j > i + 1: # Found a valid selector
+ field = py_expr[i+1:j]
+ # Use a safe access pattern that returns a default value based on context
+ py_expr = py_expr[:i] + f"attributes.get('{field}')" + py_expr[j:]
+ i = i + len(f"attributes.get('{field}')")
+ else:
+ i += 1
+ else:
+ i += 1
+
+ # Convert not operator if needed
+ py_expr = py_expr.replace('!', ' not ')
+
+ try:
+ # Custom evaluation that handles exceptions for missing fields
+ # by returning False for the entire expression
+
+ # Split the expression on logical operators
+ parts = []
+ for op in [' and ', ' or ']:
+ if op in py_expr:
+ parts = py_expr.split(op)
+ break
+
+ if not parts: # No logical operators found
+ parts = [py_expr]
+
+ # Try to evaluate each part - if any part fails,
+ # the whole expression should fail
+ try:
+ result = eval(py_expr, {"attributes": attributes})
+ return bool(result)
+ except (TypeError, AttributeError):
+ # This typically happens when trying to compare None with
+ # numbers or other types, or when an attribute doesn't exist
+ return False
+ except Exception as e:
+ print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
+ return False
+
+ except Exception as e:
+ print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
+ return False
+
+ def safe_decode(self,item):
+ return item.decode() if isinstance(item, bytes) else item
+
+ def calculate_recall(self, redis_results, linear_results, k=10):
+ """Calculate recall (percentage of correct results retrieved)"""
+ redis_set = set(self.safe_decode(item) for item in redis_results)
+ linear_set = set(item[0] for item in linear_results[:k])
+
+ if not linear_set:
+ return 1.0 # If no linear results, consider it perfect recall
+
+ intersection = redis_set.intersection(linear_set)
+ return len(intersection) / len(linear_set)
+
+ def test_recall_with_filter(self, filter_expr, ef=500, filter_ef=None):
+ """Test recall for a given filter expression"""
+ # Create query vector
+ query_vec = generate_random_vector(self.dim)
+
+ # First, get ground truth using linear scan
+ linear_results = self.filter_linear_search(
+ self.vectors, self.names, query_vec, filter_expr, self.attribute_map, k=50)
+
+ # Calculate true selectivity from ground truth
+ true_selectivity = len(linear_results) / len(self.names) if self.names else 0
+
+ # Perform Redis search with filter
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 50, 'WITHSCORES', 'EF', ef, 'FILTER', filter_expr])
+ if filter_ef:
+ cmd_args.extend(['FILTER-EF', filter_ef])
+
+ start_time = time.time()
+ redis_results = self.redis.execute_command(*cmd_args)
+ query_time = time.time() - start_time
+
+ # Convert Redis results to dict
+ redis_items = {}
+ for i in range(0, len(redis_results), 2):
+ key = redis_results[i].decode() if isinstance(redis_results[i], bytes) else redis_results[i]
+ score = float(redis_results[i+1])
+ redis_items[key] = score
+
+ # Calculate metrics
+ recall = self.calculate_recall(redis_items.keys(), linear_results)
+ selectivity = len(redis_items) / len(self.names) if redis_items else 0
+
+ # Compare against the true selectivity from linear scan
+ assert abs(selectivity - true_selectivity) < 0.1, \
+ f"Redis selectivity {selectivity:.3f} differs significantly from ground truth {true_selectivity:.3f}"
+
+ # We expect high recall for standard parameters
+ if ef >= 500 and (filter_ef is None or filter_ef >= 1000):
+ try:
+ assert recall >= 0.7, \
+ f"Low recall {recall:.2f} for filter '{filter_expr}'"
+ except AssertionError as e:
+ # Get items found in each set
+ redis_items_set = set(redis_items.keys())
+ linear_items_set = set(item[0] for item in linear_results)
+
+ # Find items in each set
+ only_in_redis = redis_items_set - linear_items_set
+ only_in_linear = linear_items_set - redis_items_set
+ in_both = redis_items_set & linear_items_set
+
+ # Build comprehensive debug message
+ debug = f"\nGround Truth: {len(linear_results)} matching items (total vectors: {len(self.vectors)})"
+ debug += f"\nRedis Found: {len(redis_items)} items with FILTER-EF: {filter_ef or 'default'}"
+ debug += f"\nItems in both sets: {len(in_both)} (recall: {recall:.4f})"
+ debug += f"\nItems only in Redis: {len(only_in_redis)}"
+ debug += f"\nItems only in Ground Truth: {len(only_in_linear)}"
+
+ # Show some example items from each set with their scores
+ if only_in_redis:
+ debug += "\n\nTOP 5 ITEMS ONLY IN REDIS:"
+ sorted_redis = sorted([(k, v) for k, v in redis_items.items()], key=lambda x: x[1], reverse=True)
+ for i, (item, score) in enumerate(sorted_redis[:5]):
+ if item in only_in_redis:
+ debug += f"\n {i+1}. {item} (Score: {score:.4f})"
+
+ # Show attribute that should match filter
+ attr = self.attribute_map.get(item)
+ if attr:
+ debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
+
+ if only_in_linear:
+ debug += "\n\nTOP 5 ITEMS ONLY IN GROUND TRUTH:"
+ for i, (item, score) in enumerate(linear_results[:5]):
+ if item in only_in_linear:
+ debug += f"\n {i+1}. {item} (Score: {score:.4f})"
+
+ # Show attribute that should match filter
+ attr = self.attribute_map.get(item)
+ if attr:
+ debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
+
+ # Help identify parsing issues
+ debug += "\n\nPARSING CHECK:"
+ debug += f"\nRedis command: VSIM {self.test_key} VALUES {self.dim} [...] FILTER '{filter_expr}'"
+
+ # Check for WITHSCORES handling issues
+ if len(redis_results) > 0 and len(redis_results) % 2 == 0:
+ debug += f"\nRedis returned {len(redis_results)} items (looks like item,score pairs)"
+ debug += f"\nFirst few results: {redis_results[:4]}"
+
+ # Check the filter implementation
+ debug += "\n\nFILTER IMPLEMENTATION CHECK:"
+ debug += f"\nFilter expression: '{filter_expr}'"
+ debug += "\nSample attribute matches from attribute_map:"
+ count_matching = 0
+ for i, (name, attrs) in enumerate(self.attribute_map.items()):
+ if attrs and self.matches_filter(attrs, filter_expr):
+ count_matching += 1
+ if i < 3: # Show first 3 matches
+ debug += f"\n - {name}: {attrs}"
+ debug += f"\nTotal items matching filter in attribute_map: {count_matching}"
+
+ # Check if results array handling could be wrong
+ debug += "\n\nRESULT ARRAYS CHECK:"
+ if len(linear_results) >= 1:
+ debug += f"\nlinear_results[0]: {linear_results[0]}"
+ if isinstance(linear_results[0], tuple) and len(linear_results[0]) == 2:
+ debug += " (correct tuple format: (name, score))"
+ else:
+ debug += " (UNEXPECTED FORMAT!)"
+
+ # Debug sort order
+ debug += "\n\nSORTING CHECK:"
+ if len(linear_results) >= 2:
+ debug += f"\nGround truth first item score: {linear_results[0][1]}"
+ debug += f"\nGround truth second item score: {linear_results[1][1]}"
+ debug += f"\nCorrectly sorted by similarity? {linear_results[0][1] >= linear_results[1][1]}"
+
+ # Re-raise with detailed information
+ raise AssertionError(str(e) + debug)
+
+ return recall, selectivity, query_time, len(redis_items)
+
+ def test(self):
+ print(f"\nRunning comprehensive VSIM FILTER tests...")
+
+ # Create a larger dataset for testing
+ print(f"Creating dataset with {self.count} vectors and attributes...")
+ self.vectors, self.names, self.attribute_map = self.create_vectors_with_attributes(
+ self.test_key, self.count)
+
+ # ==== 1. Recall and Precision Testing ====
+ print("Testing recall for various filters...")
+
+ # Test basic filters with different selectivity
+ results = {}
+ results["category"] = self.test_recall_with_filter('.category == "electronics"')
+ results["price_high"] = self.test_recall_with_filter('.price > 1000')
+ results["in_stock"] = self.test_recall_with_filter('.in_stock')
+ results["rating"] = self.test_recall_with_filter('.rating >= 4')
+ results["complex1"] = self.test_recall_with_filter('.category == "electronics" and .price < 500')
+
+ print("Filter | Recall | Selectivity | Time (ms) | Results")
+ print("----------------------------------------------------")
+ for name, (recall, selectivity, time_ms, count) in results.items():
+ print(f"{name:7} | {recall:.3f} | {selectivity:.3f} | {time_ms*1000:.1f} | {count}")
+
+ # ==== 2. Filter Selectivity Performance ====
+ print("\nTesting filter selectivity performance...")
+
+ # High selectivity (very few matches)
+ high_sel_recall, _, high_sel_time, _ = self.test_recall_with_filter('.is_premium')
+
+ # Medium selectivity
+ med_sel_recall, _, med_sel_time, _ = self.test_recall_with_filter('.price > 100 and .price < 1000')
+
+ # Low selectivity (many matches)
+ low_sel_recall, _, low_sel_time, _ = self.test_recall_with_filter('.year > 2000')
+
+ print(f"High selectivity recall: {high_sel_recall:.3f}, time: {high_sel_time*1000:.1f}ms")
+ print(f"Med selectivity recall: {med_sel_recall:.3f}, time: {med_sel_time*1000:.1f}ms")
+ print(f"Low selectivity recall: {low_sel_recall:.3f}, time: {low_sel_time*1000:.1f}ms")
+
+ # ==== 3. FILTER-EF Parameter Testing ====
+ print("\nTesting FILTER-EF parameter...")
+
+ # Test with different FILTER-EF values
+ filter_expr = '.category == "electronics" and .price > 200'
+ ef_values = [100, 500, 2000, 5000]
+
+ print("FILTER-EF | Recall | Time (ms)")
+ print("-----------------------------")
+ for filter_ef in ef_values:
+ recall, _, query_time, _ = self.test_recall_with_filter(
+ filter_expr, ef=500, filter_ef=filter_ef)
+ print(f"{filter_ef:9} | {recall:.3f} | {query_time*1000:.1f}")
+
+ # Assert that higher FILTER-EF generally gives better recall
+ low_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=100)
+ high_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=5000)
+
+ # This might not always be true due to randomness, but generally holds
+ # We use a softer assertion to avoid flaky tests
+ assert high_ef_recall >= low_ef_recall * 0.8, \
+ f"Higher FILTER-EF should generally give better recall: {high_ef_recall:.3f} vs {low_ef_recall:.3f}"
+
+ # ==== 4. Complex Filter Expressions ====
+ print("\nTesting complex filter expressions...")
+
+ # Test a variety of complex expressions
+ complex_filters = [
+ '.price > 100 and (.category == "electronics" or .category == "furniture")',
+ '(.rating > 4 and .in_stock) or (.price < 50 and .views > 1000)',
+ '.category in ["electronics", "clothing"] and .price > 200 and .rating >= 3',
+ '(.category == "electronics" and .subcategory == "phones") or (.category == "furniture" and .price > 1000)',
+ '.year > 2010 and !(.price < 100) and .in_stock'
+ ]
+
+ print("Expression | Results | Time (ms)")
+ print("-----------------------------")
+ for i, expr in enumerate(complex_filters):
+ try:
+ _, _, query_time, result_count = self.test_recall_with_filter(expr)
+ print(f"Complex {i+1} | {result_count:7} | {query_time*1000:.1f}")
+ except Exception as e:
+ print(f"Complex {i+1} | Error: {str(e)}")
+
+ # ==== 5. Attribute Type Testing ====
+ print("\nTesting different attribute types...")
+
+ type_filters = [
+ ('.price > 500', "Numeric"),
+ ('.category == "books"', "String equality"),
+ ('.in_stock', "Boolean"),
+ ('.tags in ["sale", "new"]', "Array membership"),
+ ('.rating * 2 > 8', "Arithmetic")
+ ]
+
+ for expr, type_name in type_filters:
+ try:
+ _, _, query_time, result_count = self.test_recall_with_filter(expr)
+ print(f"{type_name:16} | {expr:30} | {result_count:5} results | {query_time*1000:.1f}ms")
+ except Exception as e:
+ print(f"{type_name:16} | {expr:30} | Error: {str(e)}")
+
+ # ==== 6. Filter + Count Interaction ====
+ print("\nTesting COUNT parameter with filters...")
+
+ filter_expr = '.category == "electronics"'
+ counts = [5, 20, 100]
+
+ for count in counts:
+ query_vec = generate_random_vector(self.dim)
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', count, 'WITHSCORES', 'FILTER', filter_expr])
+
+ results = self.redis.execute_command(*cmd_args)
+ result_count = len(results) // 2 # Divide by 2 because WITHSCORES returns pairs
+
+ # We expect result count to be at most the requested count
+ assert result_count <= count, f"Got {result_count} results with COUNT {count}"
+ print(f"COUNT {count:3} | Got {result_count:3} results")
+
+ # ==== 7. Edge Cases ====
+ print("\nTesting edge cases...")
+
+ # Test with no matching items
+ no_match_expr = '.category == "nonexistent_category"'
+ results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+ *[str(x) for x in generate_random_vector(self.dim)],
+ 'FILTER', no_match_expr)
+ assert len(results) == 0, f"Expected 0 results for non-matching filter, got {len(results)}"
+ print(f"No matching items: {len(results)} results (expected 0)")
+
+ # Test with invalid filter syntax
+ try:
+ self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+ *[str(x) for x in generate_random_vector(self.dim)],
+ 'FILTER', '.category === "books"') # Triple equals is invalid
+ assert False, "Expected error for invalid filter syntax"
+ except:
+ print("Invalid filter syntax correctly raised an error")
+
+ # Test with extremely long complex expression
+ long_expr = ' and '.join([f'.rating > {i/10}' for i in range(10)])
+ try:
+ results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+ *[str(x) for x in generate_random_vector(self.dim)],
+ 'FILTER', long_expr)
+ print(f"Long expression: {len(results)} results")
+ except Exception as e:
+ print(f"Long expression error: {str(e)}")
+
+ print("\nComprehensive VSIM FILTER tests completed successfully")
+
+
+class VSIMFilterSelectivityTest(TestCase):
+ def getname(self):
+ return "VSIM FILTER selectivity performance benchmark"
+
+ def estimated_runtime(self):
+ return 8 # This test might take up to 8 seconds
+
+ def setup(self):
+ super().setup()
+ self.dim = 32
+ self.count = 10000
+ self.test_key = f"{self.test_key}:selectivity" # Use a different key
+
+ def create_vector_with_age_attribute(self, name, age):
+ """Create a vector with a specific age attribute"""
+ vec = generate_random_vector(self.dim)
+ vec_bytes = struct.pack(f'{self.dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
+ self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps({"age": age}))
+
+ def test(self):
+ print("\nRunning VSIM FILTER selectivity benchmark...")
+
+ # Create a dataset where we control the exact selectivity
+ print(f"Creating controlled dataset with {self.count} vectors...")
+
+ # Create vectors with age attributes from 1 to 100
+ for i in range(self.count):
+ age = (i % 100) + 1 # Ages from 1 to 100
+ name = f"{self.test_key}:item:{i}"
+ self.create_vector_with_age_attribute(name, age)
+
+ # Create a query vector
+ query_vec = generate_random_vector(self.dim)
+
+ # Test filters with different selectivities
+ selectivities = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.99]
+ results = []
+
+ print("\nSelectivity | Filter | Results | Time (ms)")
+ print("--------------------------------------------------")
+
+ for target_selectivity in selectivities:
+ # Calculate age threshold for desired selectivity
+ # For example, age <= 10 gives 10% selectivity
+ age_threshold = int(target_selectivity * 100)
+ filter_expr = f'.age <= {age_threshold}'
+
+ # Run query and measure time
+ start_time = time.time()
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr])
+
+ results = self.redis.execute_command(*cmd_args)
+ query_time = time.time() - start_time
+
+ actual_selectivity = len(results) / min(100, int(target_selectivity * self.count))
+ print(f"{target_selectivity:.2f} | {filter_expr:15} | {len(results):7} | {query_time*1000:.1f}")
+
+ # Add assertion to ensure reasonable performance for different selectivities
+ # For very selective queries (1%), we might need more exploration
+ if target_selectivity <= 0.05:
+ # For very selective queries, ensure we can find some results
+ assert len(results) > 0, f"No results found for {filter_expr}"
+ else:
+ # For less selective queries, performance should be reasonable
+ assert query_time < 1.0, f"Query too slow: {query_time:.3f}s for {filter_expr}"
+
+ print("\nSelectivity benchmark completed successfully")
+
+
+class VSIMFilterComparisonTest(TestCase):
+ def getname(self):
+ return "VSIM FILTER EF parameter comparison"
+
+ def estimated_runtime(self):
+ return 8 # This test might take up to 8 seconds
+
+ def setup(self):
+ super().setup()
+ self.dim = 32
+ self.count = 5000
+ self.test_key = f"{self.test_key}:efparams" # Use a different key
+
+ def create_dataset(self):
+ """Create a dataset with specific attribute patterns for testing FILTER-EF"""
+ vectors = []
+ names = []
+
+ # Create vectors with category and quality score attributes
+ for i in range(self.count):
+ vec = generate_random_vector(self.dim)
+ name = f"{self.test_key}:item:{i}"
+
+ # Add vector to Redis
+ vec_bytes = struct.pack(f'{self.dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
+
+ # Create attributes - we want a very selective filter
+ # Only 2% of items have category=premium AND quality>90
+ category = "premium" if random.random() < 0.1 else random.choice(["standard", "economy", "basic"])
+ quality = random.randint(1, 100)
+
+ attrs = {
+ "id": i,
+ "category": category,
+ "quality": quality
+ }
+
+ self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs))
+ vectors.append(vec)
+ names.append(name)
+
+ return vectors, names
+
+ def test(self):
+ print("\nRunning VSIM FILTER-EF parameter comparison...")
+
+ # Create dataset
+ vectors, names = self.create_dataset()
+
+ # Create a selective filter that matches ~2% of items
+ filter_expr = '.category == "premium" and .quality > 90'
+
+ # Create query vector
+ query_vec = generate_random_vector(self.dim)
+
+ # Test different FILTER-EF values
+ ef_values = [50, 100, 500, 1000, 5000]
+ results = []
+
+ print("\nFILTER-EF | Results | Time (ms) | Notes")
+ print("---------------------------------------")
+
+ baseline_count = None
+
+ for ef in ef_values:
+ # Run query and measure time
+ start_time = time.time()
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr, 'FILTER-EF', ef])
+
+ query_results = self.redis.execute_command(*cmd_args)
+ query_time = time.time() - start_time
+
+ # Set baseline for comparison
+ if baseline_count is None:
+ baseline_count = len(query_results)
+
+ recall_rate = len(query_results) / max(1, baseline_count) if baseline_count > 0 else 1.0
+
+ notes = ""
+ if ef == 5000:
+ notes = "Baseline"
+ elif recall_rate < 0.5:
+ notes = "Low recall!"
+
+ print(f"{ef:9} | {len(query_results):7} | {query_time*1000:.1f} | {notes}")
+ results.append((ef, len(query_results), query_time))
+
+ # If we have enough results at highest EF, check that recall improves with higher EF
+ if results[-1][1] >= 5: # At least 5 results for highest EF
+ # Extract result counts
+ result_counts = [r[1] for r in results]
+
+ # The last result (highest EF) should typically find more results than the first (lowest EF)
+ # but we use a soft assertion to avoid flaky tests
+ assert result_counts[-1] >= result_counts[0], \
+ f"Higher FILTER-EF should find at least as many results: {result_counts[-1]} vs {result_counts[0]}"
+
+ print("\nFILTER-EF parameter comparison completed successfully")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py
new file mode 100644
index 0000000..eac5dca
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py
@@ -0,0 +1,56 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import random
+
+class LargeScale(TestCase):
+ def getname(self):
+ return "Large Scale Comparison"
+
+ def estimated_runtime(self):
+ return 10
+
+ def test(self):
+ dim = 300
+ count = 20000
+ k = 50
+
+ # Fill Redis and get reference data for comparison
+ random.seed(42) # Make test deterministic
+ data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+ # Generate query vector
+ query_vec = generate_random_vector(dim)
+
+ # Get results from Redis with good exploration factor
+ redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in query_vec],
+ 'COUNT', k, 'WITHSCORES', 'EF', 500)
+
+ # Convert Redis results to dict
+ redis_results = {}
+ for i in range(0, len(redis_raw), 2):
+ key = redis_raw[i].decode()
+ score = float(redis_raw[i+1])
+ redis_results[key] = score
+
+ # Get results from linear scan
+ linear_results = data.find_k_nearest(query_vec, k)
+ linear_items = {name: score for name, score in linear_results}
+
+ # Compare overlap
+ redis_set = set(redis_results.keys())
+ linear_set = set(linear_items.keys())
+ overlap = len(redis_set & linear_set)
+
+ # If test fails, print comparison for debugging
+ if overlap < k * 0.7:
+ data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k)
+
+ assert overlap >= k * 0.7, \
+ f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%"
+
+ # Verify scores for common items
+ for item in redis_set & linear_set:
+ redis_score = redis_results[item]
+ linear_score = linear_items[item]
+ assert abs(redis_score - linear_score) < 0.01, \
+ f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py b/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py
new file mode 100644
index 0000000..d0f3f09
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py
@@ -0,0 +1,36 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class MemoryUsageTest(TestCase):
+ def getname(self):
+ return "[regression] MEMORY USAGE with attributes"
+
+ def test(self):
+ # Generate random vectors
+ vec1 = generate_random_vector(4)
+ vec2 = generate_random_vector(4)
+ vec_bytes1 = struct.pack('4f', *vec1)
+ vec_bytes2 = struct.pack('4f', *vec2)
+
+ # Add vectors to the key, one with attribute, one without
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
+
+ # Get memory usage for the key
+ try:
+ memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
+ # If we got here without exception, the command worked
+ assert memory_usage > 0, "MEMORY USAGE should return a positive value"
+
+ # Add more attributes to increase complexity
+ self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', '{"color":"blue","size":10}')
+
+ # Check memory usage again
+ new_memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
+ assert new_memory_usage > 0, "MEMORY USAGE should still return a positive value after setting attributes"
+
+ # Memory usage should be higher after adding attributes
+ assert new_memory_usage > memory_usage, "Memory usage increase after adding attributes"
+
+ except Exception as e:
+ raise AssertionError(f"MEMORY USAGE command failed: {str(e)}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/node_update.py b/examples/redis-unstable/modules/vector-sets/tests/node_update.py
new file mode 100644
index 0000000..53aa2dd
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/node_update.py
@@ -0,0 +1,85 @@
+from test import TestCase, generate_random_vector
+import struct
+import math
+import random
+
+class VectorUpdateAndClusters(TestCase):
+ def getname(self):
+ return "VADD vector update with cluster relocation"
+
+ def estimated_runtime(self):
+ return 2.0 # Should take around 2 seconds
+
+ def generate_cluster_vector(self, base_vec, noise=0.1):
+ """Generate a vector that's similar to base_vec with some noise."""
+ vec = [x + random.gauss(0, noise) for x in base_vec]
+ # Normalize
+ norm = math.sqrt(sum(x*x for x in vec))
+ return [x/norm for x in vec]
+
+ def test(self):
+ dim = 128
+ vectors_per_cluster = 5000
+
+ # Create two very different base vectors for our clusters
+ cluster1_base = generate_random_vector(dim)
+ cluster2_base = [-x for x in cluster1_base] # Opposite direction
+
+ # Add vectors from first cluster
+ for i in range(vectors_per_cluster):
+ vec = self.generate_cluster_vector(cluster1_base)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
+ f'{self.test_key}:cluster1:{i}')
+
+ # Add vectors from second cluster
+ for i in range(vectors_per_cluster):
+ vec = self.generate_cluster_vector(cluster2_base)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
+ f'{self.test_key}:cluster2:{i}')
+
+ # Pick a test vector from cluster1
+ test_key = f'{self.test_key}:cluster1:0'
+
+ # Verify it's in cluster1 using VSIM
+ initial_vec = self.generate_cluster_vector(cluster1_base)
+ results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in initial_vec],
+ 'COUNT', 100, 'WITHSCORES')
+
+ # Count how many cluster1 items are in top results
+ cluster1_count = sum(1 for i in range(0, len(results), 2)
+ if b'cluster1' in results[i])
+ assert cluster1_count > 80, "Initial clustering check failed"
+
+ # Now update the test vector to be in cluster2
+ new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05)
+ vec_bytes = struct.pack(f'{dim}f', *new_vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key)
+
+ # Verify the embedding was actually updated using VEMB
+ emb_result = self.redis.execute_command('VEMB', self.test_key, test_key)
+ updated_vec = [float(x) for x in emb_result]
+
+ # Verify updated vector matches what we inserted
+ dot_product = sum(a*b for a,b in zip(updated_vec, new_vec))
+ similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) *
+ math.sqrt(sum(x*x for x in new_vec)))
+ assert similarity > 0.9, "Vector was not properly updated"
+
+ # Verify it's now in cluster2 using VSIM
+ results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in cluster2_base],
+ 'COUNT', 100, 'WITHSCORES')
+
+ # Verify our updated vector is among top results
+ found = False
+ for i in range(0, len(results), 2):
+ if results[i].decode() == test_key:
+ found = True
+ similarity = float(results[i+1])
+ assert similarity > 0.80, f"Updated vector has low similarity: {similarity}"
+ break
+
+ assert found, "Updated vector not found in cluster2 proximity"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/persistence.py b/examples/redis-unstable/modules/vector-sets/tests/persistence.py
new file mode 100644
index 0000000..79730f4
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/persistence.py
@@ -0,0 +1,86 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+import random
+
+class HNSWPersistence(TestCase):
+ def getname(self):
+ return "HNSW Persistence"
+
+ def estimated_runtime(self):
+ return 30
+
+ def _verify_results(self, key, dim, query_vec, reduced_dim=None):
+ """Run a query and return results dict"""
+ k = 10
+ args = ['VSIM', key]
+
+ if reduced_dim:
+ args.extend(['VALUES', dim])
+ args.extend([str(x) for x in query_vec])
+ else:
+ args.extend(['VALUES', dim])
+ args.extend([str(x) for x in query_vec])
+
+ args.extend(['COUNT', k, 'WITHSCORES'])
+ results = self.redis.execute_command(*args)
+
+ results_dict = {}
+ for i in range(0, len(results), 2):
+ key = results[i].decode()
+ score = float(results[i+1])
+ results_dict[key] = score
+ return results_dict
+
+ def test(self):
+ # Setup dimensions
+ dim = 128
+ reduced_dim = 32
+ count = 5000
+ random.seed(42)
+
+ # Create two datasets - one normal and one with dimension reduction
+ normal_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:normal", count, dim)
+ projected_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:projected",
+ count, dim, reduced_dim)
+
+ # Generate query vectors we'll use before and after reload
+ query_vec_normal = generate_random_vector(dim)
+ query_vec_projected = generate_random_vector(dim)
+
+ # Get initial results for both sets
+ initial_normal = self._verify_results(f"{self.test_key}:normal",
+ dim, query_vec_normal)
+ initial_projected = self._verify_results(f"{self.test_key}:projected",
+ dim, query_vec_projected, reduced_dim)
+
+ # Force Redis to save and reload the dataset
+ self.redis.execute_command('DEBUG', 'RELOAD')
+
+ # Verify results after reload
+ reloaded_normal = self._verify_results(f"{self.test_key}:normal",
+ dim, query_vec_normal)
+ reloaded_projected = self._verify_results(f"{self.test_key}:projected",
+ dim, query_vec_projected, reduced_dim)
+
+ # Verify normal vectors results
+ assert len(initial_normal) == len(reloaded_normal), \
+ "Normal vectors: Result count mismatch before/after reload"
+
+ for key in initial_normal:
+ assert key in reloaded_normal, f"Normal vectors: Missing item after reload: {key}"
+ assert abs(initial_normal[key] - reloaded_normal[key]) < 0.0001, \
+ f"Normal vectors: Score mismatch for {key}: " + \
+ f"before={initial_normal[key]:.6f}, after={reloaded_normal[key]:.6f}"
+
+ # Verify projected vectors results
+ assert len(initial_projected) == len(reloaded_projected), \
+ "Projected vectors: Result count mismatch before/after reload"
+
+ for key in initial_projected:
+ assert key in reloaded_projected, \
+ f"Projected vectors: Missing item after reload: {key}"
+ assert abs(initial_projected[key] - reloaded_projected[key]) < 0.0001, \
+ f"Projected vectors: Score mismatch for {key}: " + \
+ f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}"
+
+ self.redis.delete(f"{self.test_key}:normal")
+ self.redis.delete(f"{self.test_key}:projected")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py
new file mode 100644
index 0000000..e39164f
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/reduce.py
@@ -0,0 +1,71 @@
+from test import TestCase, fill_redis_with_vectors, generate_random_vector
+
+class Reduce(TestCase):
+ def getname(self):
+ return "Dimension Reduction"
+
+ def estimated_runtime(self):
+ return 0.2
+
+ def test(self):
+ original_dim = 100
+ reduced_dim = 80
+ count = 1000
+ k = 50 # Number of nearest neighbors to check
+
+ # Fill Redis with vectors using REDUCE and get reference data
+ data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim)
+
+ # Verify dimension is reduced
+ dim = self.redis.execute_command('VDIM', self.test_key)
+ assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}"
+
+ # Generate query vector and get nearest neighbors using Redis
+ query_vec = generate_random_vector(original_dim)
+ redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES',
+ original_dim, *[str(x) for x in query_vec],
+ 'COUNT', k, 'WITHSCORES')
+
+ # Convert Redis results to dict
+ redis_results = {}
+ for i in range(0, len(redis_raw), 2):
+ key = redis_raw[i].decode()
+ score = float(redis_raw[i+1])
+ redis_results[key] = score
+
+ # Get results from linear scan with original vectors
+ linear_results = data.find_k_nearest(query_vec, k)
+ linear_items = {name: score for name, score in linear_results}
+
+ # Compare overlap between reduced and non-reduced results
+ redis_set = set(redis_results.keys())
+ linear_set = set(linear_items.keys())
+ overlap = len(redis_set & linear_set)
+ overlap_ratio = overlap / k
+
+ # With random projection, we expect some loss of accuracy but should
+ # maintain at least some similarity structure.
+ # Note that gaussian distribution is the worse with this test, so
+ # in real world practice, things will be better.
+ min_expected_overlap = 0.1 # At least 10% overlap in top-k
+ assert overlap_ratio >= min_expected_overlap, \
+ f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}"
+
+ # For items that appear in both results, scores should be reasonably correlated
+ common_items = redis_set & linear_set
+ for item in common_items:
+ redis_score = redis_results[item]
+ linear_score = linear_items[item]
+ # Allow for some deviation due to dimensionality reduction
+ assert abs(redis_score - linear_score) < 0.2, \
+ f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
+
+ # If test fails, print comparison for debugging
+ if overlap_ratio < min_expected_overlap:
+ print("\nLow overlap in results. Details:")
+ print("\nTop results from linear scan (original vectors):")
+ for name, score in linear_results:
+ print(f"{name}: {score:.3f}")
+ print("\nTop results from Redis (reduced vectors):")
+ for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True):
+ print(f"{item}: {score:.3f}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/replication.py b/examples/redis-unstable/modules/vector-sets/tests/replication.py
new file mode 100644
index 0000000..91dfdf7
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/replication.py
@@ -0,0 +1,92 @@
+from test import TestCase, generate_random_vector
+import struct
+import random
+import time
+
+class ComprehensiveReplicationTest(TestCase):
+ def getname(self):
+ return "Comprehensive Replication Test with mixed operations"
+
+ def estimated_runtime(self):
+ # This test will take longer than the default 100ms
+ return 20.0 # 20 seconds estimate
+
+ def test(self):
+ # Setup replication between primary and replica
+ assert self.setup_replication(), "Failed to setup replication"
+
+ # Test parameters
+ num_vectors = 5000
+ vector_dim = 8
+ delete_probability = 0.1
+ cas_probability = 0.3
+
+ # Keep track of added items for potential deletion
+ added_items = []
+
+ # Add vectors and occasionally delete
+ for i in range(num_vectors):
+ # Generate a random vector
+ vec = generate_random_vector(vector_dim)
+ vec_bytes = struct.pack(f'{vector_dim}f', *vec)
+ item_name = f"{self.test_key}:item:{i}"
+
+ # Decide whether to use CAS or not
+ use_cas = random.random() < cas_probability
+
+ if use_cas and added_items:
+ # Get an existing item for CAS reference (if available)
+ cas_item = random.choice(added_items)
+ try:
+ # Add with CAS
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
+ item_name, 'CAS')
+ # Only add to our list if actually added (CAS might fail)
+ if result == 1:
+ added_items.append(item_name)
+ except Exception as e:
+ print(f" CAS VADD failed: {e}")
+ else:
+ try:
+ # Add without CAS
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, item_name)
+ # Only add to our list if actually added
+ if result == 1:
+ added_items.append(item_name)
+ except Exception as e:
+ print(f" VADD failed: {e}")
+
+ # Randomly delete items (with 10% probability)
+ if random.random() < delete_probability and added_items:
+ try:
+ # Select a random item to delete
+ item_to_delete = random.choice(added_items)
+ # Delete the item using VREM (not VDEL)
+ self.redis.execute_command('VREM', self.test_key, item_to_delete)
+ # Remove from our list
+ added_items.remove(item_to_delete)
+ except Exception as e:
+ print(f" VREM failed: {e}")
+
+ # Allow time for replication to complete
+ time.sleep(2.0)
+
+ # Verify final VCARD matches
+ primary_card = self.redis.execute_command('VCARD', self.test_key)
+ replica_card = self.replica.execute_command('VCARD', self.test_key)
+ assert primary_card == replica_card, f"Final VCARD mismatch: primary={primary_card}, replica={replica_card}"
+
+ # Verify VDIM matches
+ primary_dim = self.redis.execute_command('VDIM', self.test_key)
+ replica_dim = self.replica.execute_command('VDIM', self.test_key)
+ assert primary_dim == replica_dim, f"VDIM mismatch: primary={primary_dim}, replica={replica_dim}"
+
+ # Verify digests match using DEBUG DIGEST
+ primary_digest = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+ replica_digest = self.replica.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
+ assert primary_digest == replica_digest, f"Digest mismatch: primary={primary_digest}, replica={replica_digest}"
+
+ # Print summary
+ print(f"\n Added and maintained {len(added_items)} vectors with dimension {vector_dim}")
+ print(f" Final vector count: {primary_card}")
+ print(f" Final digest: {primary_digest[0].decode()}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/threading_config.py b/examples/redis-unstable/modules/vector-sets/tests/threading_config.py
new file mode 100644
index 0000000..dfc931a
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/threading_config.py
@@ -0,0 +1,249 @@
+from test import TestCase, generate_random_vector
+import struct
+
+
+class ThreadingConfigTest(TestCase):
+ """
+ Test suite for vset-force-single-threaded-execution configuration.
+
+ This test validates the behavior of VADD and VSIM commands under different
+ threading configurations. The new configuration is MUTABLE and BINARY:
+ - false (0): Multi-threaded execution enabled (default)
+ - true (1): Force single-threaded execution
+
+ Key behaviors tested:
+ - VADD with and without CAS option under both threading modes
+ - VSIM with and without NOTHREAD option under both threading modes
+ - Configuration reading, validation, and runtime modification
+ - Thread behavior switching (multi-threaded vs forced single-threaded)
+ """
+
+ def getname(self):
+ return "vset-force-single-threaded-execution configuration testing"
+
+ def estimated_runtime(self):
+ return 0.5 # Updated for mutable config testing with mode switching
+
+ def get_config_value(self):
+ """Get current vset-force-single-threaded-execution config value"""
+ try:
+ result = self.redis.execute_command('CONFIG', 'GET', 'vset-force-single-threaded-execution')
+ if len(result) >= 2:
+ # Redis returns 'yes'/'no' for boolean configs
+ return result[1].decode() if isinstance(result[1], bytes) else result[1]
+ return None
+ except Exception:
+ return None
+
+ def set_config_value(self, value):
+ """Set vset-force-single-threaded-execution config value"""
+ try:
+ # Convert boolean to yes/no string
+ str_value = 'yes' if value else 'no'
+ result = self.redis.execute_command('CONFIG', 'SET', 'vset-force-single-threaded-execution', str_value)
+ return result == b'OK' or result == 'OK'
+ except Exception as e:
+ print(f"Failed to set config: {e}")
+ return False
+
+ def test_config_access_and_mutability(self):
+ """Test 1: Configuration access and mutability"""
+ # Get initial value
+ initial_value = self.get_config_value()
+ assert initial_value is not None, "Should be able to read vset-force-single-threaded-execution config"
+ assert initial_value in ['yes', 'no'], f"Config value should be yes/no, got {initial_value}"
+
+ # Test mutability by toggling the value
+ new_value = 'no' if initial_value == 'yes' else 'yes'
+ assert self.set_config_value(new_value == 'yes'), "Should be able to change config value"
+
+ # Verify the change
+ current_value = self.get_config_value()
+ assert current_value == new_value, f"Config should be {new_value}, got {current_value}"
+
+ # Restore original value
+ assert self.set_config_value(initial_value == 'yes'), "Should be able to restore original value"
+
+ return initial_value == 'yes'
+
+ def test_vadd_without_cas(self, force_single_threaded=False):
+ """Test 2: VADD command without CAS option"""
+ # Set threading mode
+ self.set_config_value(force_single_threaded)
+
+ # Clear test data to avoid dimension conflicts
+ self.redis.delete(self.test_key)
+
+ dim = 64
+ vec = generate_random_vector(dim)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+ assert result == 1, f"VADD should return 1 for new item, got {result}"
+
+ # Verify the vector was added
+ card = self.redis.execute_command('VCARD', self.test_key)
+ assert card == 1, f"VCARD should return 1, got {card}"
+
+ def test_vadd_with_cas(self, force_single_threaded=False):
+ """Test 3: VADD command with CAS option"""
+ # Set threading mode
+ self.set_config_value(force_single_threaded)
+
+ # Clear test data to avoid dimension conflicts
+ self.redis.delete(self.test_key)
+
+ dim = 64
+ vec = generate_random_vector(dim)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+
+ # First insertion with CAS should succeed
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS')
+ assert result == 1, f"First VADD with CAS should return 1, got {result}"
+
+ # Second insertion of same item with CAS should return 0
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS')
+ assert result == 0, f"Duplicate VADD with CAS should return 0, got {result}"
+
+ def test_vsim_without_nothread(self, force_single_threaded=False):
+ """Test 4: VSIM command without NOTHREAD"""
+ # Set threading mode
+ self.set_config_value(force_single_threaded)
+
+ # Clear test data to avoid dimension conflicts
+ self.redis.delete(self.test_key)
+
+ dim = 64
+
+ # Add test vectors
+ for i in range(5):
+ vec = generate_random_vector(dim)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}')
+
+ # Test VSIM without NOTHREAD
+ query_vec = generate_random_vector(dim)
+ args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3]
+ result = self.redis.execute_command(*args)
+
+ assert isinstance(result, list), f"VSIM should return a list, got {type(result)}"
+ assert len(result) <= 3, f"VSIM should return at most 3 results, got {len(result)}"
+
+ def test_vsim_with_nothread(self, force_single_threaded=False):
+ """Test 5: VSIM command with NOTHREAD"""
+ # Set threading mode
+ self.set_config_value(force_single_threaded)
+
+ dim = 64
+
+ # Ensure we have vectors to search (use existing vectors from previous test)
+ card = self.redis.execute_command('VCARD', self.test_key)
+ if card == 0:
+ # Add test vectors if none exist
+ for i in range(5):
+ vec = generate_random_vector(dim)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}')
+
+ # Test VSIM with NOTHREAD
+ query_vec = generate_random_vector(dim)
+ args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3, 'NOTHREAD']
+ result = self.redis.execute_command(*args)
+
+ assert isinstance(result, list), f"VSIM with NOTHREAD should return a list, got {type(result)}"
+ assert len(result) <= 3, f"VSIM with NOTHREAD should return at most 3 results, got {len(result)}"
+
+ def test_threading_mode_comparison(self):
+ """Test 6: Compare behavior between threading modes"""
+ dim = 64
+
+ # Clear test data
+ self.redis.delete(self.test_key)
+
+ # Test multi-threaded mode (default)
+ self.set_config_value(False) # Multi-threaded
+ self.test_vadd_without_cas(False)
+ self.test_vadd_with_cas(False)
+ multi_threaded_card = self.redis.execute_command('VCARD', self.test_key)
+
+ # Clear and test single-threaded mode
+ self.redis.delete(self.test_key)
+ self.set_config_value(True) # Single-threaded
+ self.test_vadd_without_cas(True)
+ self.test_vadd_with_cas(True)
+ single_threaded_card = self.redis.execute_command('VCARD', self.test_key)
+
+ # Both modes should produce same results
+ assert multi_threaded_card == single_threaded_card, \
+ f"Both modes should produce same results: multi={multi_threaded_card}, single={single_threaded_card}"
+
+ def test_nothread_override_behavior(self):
+ """Test 7: NOTHREAD option should work regardless of config"""
+ dim = 64
+
+ # Test with both config modes
+ for force_single in [False, True]:
+ self.set_config_value(force_single)
+ self.redis.delete(self.test_key)
+
+ # Add test vectors
+ for i in range(3):
+ vec = generate_random_vector(dim)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}')
+
+ # NOTHREAD should work regardless of config
+ query_vec = generate_random_vector(dim)
+ args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 2, 'NOTHREAD']
+ result = self.redis.execute_command(*args)
+
+ assert isinstance(result, list), f"NOTHREAD should work with force_single={force_single}"
+ assert len(result) <= 2, f"NOTHREAD should return ≤2 results with force_single={force_single}"
+
+ def test(self):
+ """Main test method - runs all threading configuration tests"""
+ # Get initial configuration
+ initial_force_single = self.test_config_access_and_mutability()
+ print(f"Initial vset-force-single-threaded-execution: {'yes' if initial_force_single else 'no'}")
+
+ # Clear test data
+ self.redis.delete(self.test_key)
+
+ # Test both threading modes
+ print("Testing multi-threaded mode...")
+ self.set_config_value(False)
+ self.test_vadd_without_cas(False)
+ self.test_vadd_with_cas(False)
+ self.test_vsim_without_nothread(False)
+ self.test_vsim_with_nothread(False)
+
+ print("Testing single-threaded mode...")
+ self.set_config_value(True)
+ self.test_vadd_without_cas(True)
+ self.test_vadd_with_cas(True)
+ self.test_vsim_without_nothread(True)
+ self.test_vsim_with_nothread(True)
+
+ # Test mode comparison and NOTHREAD override
+ self.test_threading_mode_comparison()
+ self.test_nothread_override_behavior()
+
+ # Restore initial configuration
+ self.set_config_value(initial_force_single)
+
+ # Print summary
+ self._print_test_summary(initial_force_single)
+
+ def _print_test_summary(self, initial_force_single):
+ """Print a summary of what was tested"""
+ print(f"\nThreading Configuration Test Summary:")
+ print(f" Configuration: vset-force-single-threaded-execution")
+ print(f" Type: Boolean, Mutable")
+ print(f" Initial value: {'yes' if initial_force_single else 'no'}")
+ print(f" Tested modes: Both multi-threaded (no) and single-threaded (yes)")
+ print(f" VADD: Works correctly in both modes")
+ print(f" VADD with CAS: Works correctly in both modes")
+ print(f" VSIM: Works correctly in both modes")
+ print(f" NOTHREAD option: Overrides config in both modes")
+ print(f" Configuration mutability: ✅ Successfully changed at runtime")
+ print(f" All tests passed successfully!")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py b/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py
new file mode 100644
index 0000000..3cb3508
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py
@@ -0,0 +1,98 @@
+from test import TestCase, generate_random_vector
+import threading
+import struct
+import math
+import time
+import random
+from typing import List, Dict
+
+class ConcurrentCASTest(TestCase):
+ def getname(self):
+ return "Concurrent VADD with CAS"
+
+ def estimated_runtime(self):
+ return 1.5
+
+ def worker(self, vectors: List[List[float]], start_idx: int, end_idx: int,
+ dim: int, results: Dict[str, bool]):
+ """Worker thread that adds a subset of vectors using VADD CAS"""
+ for i in range(start_idx, end_idx):
+ vec = vectors[i]
+ name = f"{self.test_key}:item:{i}"
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+
+ # Try to add the vector with CAS
+ try:
+ result = self.redis.execute_command('VADD', self.test_key, 'FP32',
+ vec_bytes, name, 'CAS')
+ results[name] = (result == 1) # Store if it was actually added
+ except Exception as e:
+ results[name] = False
+ print(f"Error adding {name}: {e}")
+
+ def verify_vector_similarity(self, vec1: List[float], vec2: List[float]) -> float:
+ """Calculate cosine similarity between two vectors"""
+ dot_product = sum(a*b for a,b in zip(vec1, vec2))
+ norm1 = math.sqrt(sum(x*x for x in vec1))
+ norm2 = math.sqrt(sum(x*x for x in vec2))
+ return dot_product / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else 0
+
+ def test(self):
+ # Test parameters
+ dim = 128
+ total_vectors = 5000
+ num_threads = 8
+ vectors_per_thread = total_vectors // num_threads
+
+ # Generate all vectors upfront
+ random.seed(42) # For reproducibility
+ vectors = [generate_random_vector(dim) for _ in range(total_vectors)]
+
+ # Prepare threads and results dictionary
+ threads = []
+ results = {} # Will store success/failure for each vector
+
+ # Launch threads
+ for i in range(num_threads):
+ start_idx = i * vectors_per_thread
+ end_idx = start_idx + vectors_per_thread if i < num_threads-1 else total_vectors
+ thread = threading.Thread(target=self.worker,
+ args=(vectors, start_idx, end_idx, dim, results))
+ threads.append(thread)
+ thread.start()
+
+ # Wait for all threads to complete
+ for thread in threads:
+ thread.join()
+
+ # Verify cardinality
+ card = self.redis.execute_command('VCARD', self.test_key)
+ assert card == total_vectors, \
+ f"Expected {total_vectors} elements, but found {card}"
+
+ # Verify each vector
+ num_verified = 0
+ for i in range(total_vectors):
+ name = f"{self.test_key}:item:{i}"
+
+ # Verify the item was successfully added
+ assert results[name], f"Vector {name} was not successfully added"
+
+ # Get the stored vector
+ stored_vec_raw = self.redis.execute_command('VEMB', self.test_key, name)
+ stored_vec = [float(x) for x in stored_vec_raw]
+
+ # Verify vector dimensions
+ assert len(stored_vec) == dim, \
+ f"Stored vector dimension mismatch for {name}: {len(stored_vec)} != {dim}"
+
+ # Calculate similarity with original vector
+ similarity = self.verify_vector_similarity(vectors[i], stored_vec)
+ assert similarity > 0.99, \
+ f"Low similarity ({similarity}) for {name}"
+
+ num_verified += 1
+
+ # Final verification
+ assert num_verified == total_vectors, \
+ f"Only verified {num_verified} out of {total_vectors} vectors"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vemb.py b/examples/redis-unstable/modules/vector-sets/tests/vemb.py
new file mode 100644
index 0000000..0f4cf77
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/vemb.py
@@ -0,0 +1,41 @@
+from test import TestCase
+import struct
+import math
+
+class VEMB(TestCase):
+ def getname(self):
+ return "VEMB Command"
+
+ def test(self):
+ dim = 4
+
+ # Add same vector in both formats
+ vec = [1, 0, 0, 0]
+ norm = math.sqrt(sum(x*x for x in vec))
+ vec = [x/norm for x in vec] # Normalize the vector
+
+ # Add using FP32
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+
+ # Add using VALUES
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', dim,
+ *[str(x) for x in vec], f'{self.test_key}:item:2')
+
+ # Get both back with VEMB
+ result1 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:1')
+ result2 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:2')
+
+ retrieved_vec1 = [float(x) for x in result1]
+ retrieved_vec2 = [float(x) for x in result2]
+
+ # Compare both vectors with original (allow for small quantization errors)
+ for i in range(dim):
+ assert abs(vec[i] - retrieved_vec1[i]) < 0.01, \
+ f"FP32 vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec1[i]}"
+ assert abs(vec[i] - retrieved_vec2[i]) < 0.01, \
+ f"VALUES vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec2[i]}"
+
+ # Test non-existent item
+ result = self.redis.execute_command('VEMB', self.test_key, 'nonexistent')
+ assert result is None, "Non-existent item should return nil"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vismember.py b/examples/redis-unstable/modules/vector-sets/tests/vismember.py
new file mode 100644
index 0000000..eabebca
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/vismember.py
@@ -0,0 +1,47 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class BasicVISMEMBER(TestCase):
+ def getname(self):
+ return "VISMEMBER basic functionality"
+
+ def test(self):
+ # Add multiple vectors to the vector set
+ vec1 = generate_random_vector(4)
+ vec2 = generate_random_vector(4)
+ vec_bytes1 = struct.pack('4f', *vec1)
+ vec_bytes2 = struct.pack('4f', *vec2)
+
+ # Create item keys
+ item1 = f'{self.test_key}:item:1'
+ item2 = f'{self.test_key}:item:2'
+ nonexistent_item = f'{self.test_key}:item:nonexistent'
+
+ # Add the vectors
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, item1)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, item2)
+
+ # Test VISMEMBER with existing elements
+ result1 = self.redis.execute_command('VISMEMBER', self.test_key, item1)
+ assert result1 == 1, f"VISMEMBER should return 1 for existing item, got {result1}"
+
+ result2 = self.redis.execute_command('VISMEMBER', self.test_key, item2)
+ assert result2 == 1, f"VISMEMBER should return 1 for existing item, got {result2}"
+
+ # Test VISMEMBER with non-existent element
+ result3 = self.redis.execute_command('VISMEMBER', self.test_key, nonexistent_item)
+ assert result3 == 0, f"VISMEMBER should return 0 for non-existent item, got {result3}"
+
+ # Test VISMEMBER with non-existent key
+ nonexistent_key = f'{self.test_key}_nonexistent'
+ result4 = self.redis.execute_command('VISMEMBER', nonexistent_key, item1)
+ assert result4 == 0, f"VISMEMBER should return 0 for non-existent key, got {result4}"
+
+ # Test VISMEMBER after removing an element
+ self.redis.execute_command('VREM', self.test_key, item1)
+ result5 = self.redis.execute_command('VISMEMBER', self.test_key, item1)
+ assert result5 == 0, f"VISMEMBER should return 0 after element removal, got {result5}"
+
+ # Verify item2 still exists
+ result6 = self.redis.execute_command('VISMEMBER', self.test_key, item2)
+ assert result6 == 1, f"VISMEMBER should still return 1 for remaining item, got {result6}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py b/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py
new file mode 100644
index 0000000..99d2e9a
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py
@@ -0,0 +1,35 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class VRANDMEMBERPingPongRegressionTest(TestCase):
+ def getname(self):
+ return "[regression] VRANDMEMBER ping-pong"
+
+ def test(self):
+ """
+ This test ensures that when only two vectors exist, VRANDMEMBER
+ does not get stuck returning only one of them due to the "ping-pong" issue.
+ """
+ self.redis.delete(self.test_key) # Clean up before test
+ dim = 4
+
+ # Add exactly two vectors
+ vec1_name = "vec1"
+ vec1_data = generate_random_vector(dim)
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec1_data, vec1_name)
+
+ vec2_name = "vec2"
+ vec2_data = generate_random_vector(dim)
+ self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec2_data, vec2_name)
+
+ # Call VRANDMEMBER many times and check for distribution
+ iterations = 100
+ results = []
+ for _ in range(iterations):
+ member = self.redis.execute_command('VRANDMEMBER', self.test_key)
+ results.append(member.decode())
+
+ # Verify that both members were returned, proving it's not stuck
+ unique_results = set(results)
+
+ assert len(unique_results) == 2, f"Ping-pong test failed: should have returned 2 unique members, but got {len(unique_results)}."
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py b/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py
new file mode 100644
index 0000000..ca9e006
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py
@@ -0,0 +1,55 @@
+from test import TestCase, generate_random_vector, fill_redis_with_vectors
+import struct
+
+class VRANDMEMBERTest(TestCase):
+ def getname(self):
+ return "VRANDMEMBER basic functionality"
+
+ def test(self):
+ # Test with empty key
+ result = self.redis.execute_command('VRANDMEMBER', self.test_key)
+ assert result is None, "VRANDMEMBER on non-existent key should return NULL"
+
+ result = self.redis.execute_command('VRANDMEMBER', self.test_key, 5)
+ assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count on non-existent key should return empty array"
+
+ # Fill with vectors
+ dim = 4
+ count = 100
+ data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
+
+ # Test single random member
+ result = self.redis.execute_command('VRANDMEMBER', self.test_key)
+ assert result is not None, "VRANDMEMBER should return a random member"
+ assert result.decode() in data.names, "Random member should be in the set"
+
+ # Test multiple unique members (positive count)
+ positive_count = 10
+ result = self.redis.execute_command('VRANDMEMBER', self.test_key, positive_count)
+ assert isinstance(result, list), "VRANDMEMBER with positive count should return an array"
+ assert len(result) == positive_count, f"Should return {positive_count} members"
+
+ # Check for uniqueness
+ decoded_results = [r.decode() for r in result]
+ assert len(decoded_results) == len(set(decoded_results)), "Results should be unique with positive count"
+ for item in decoded_results:
+ assert item in data.names, "All returned items should be in the set"
+
+ # Test more members than in the set
+ result = self.redis.execute_command('VRANDMEMBER', self.test_key, count + 10)
+ assert len(result) == count, "Should return only the available members when asking for more than exist"
+
+ # Test with duplicates (negative count)
+ negative_count = -20
+ result = self.redis.execute_command('VRANDMEMBER', self.test_key, negative_count)
+ assert isinstance(result, list), "VRANDMEMBER with negative count should return an array"
+ assert len(result) == abs(negative_count), f"Should return {abs(negative_count)} members"
+
+ # Check that all returned elements are valid
+ decoded_results = [r.decode() for r in result]
+ for item in decoded_results:
+ assert item in data.names, "All returned items should be in the set"
+
+ # Test with count = 0 (edge case)
+ result = self.redis.execute_command('VRANDMEMBER', self.test_key, 0)
+ assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count=0 should return empty array"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrange.py b/examples/redis-unstable/modules/vector-sets/tests/vrange.py
new file mode 100644
index 0000000..7e57588
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/vrange.py
@@ -0,0 +1,113 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class BasicVRANGE(TestCase):
+ def getname(self):
+ return "VRANGE basic functionality and iteration"
+
+ def test(self):
+ # Add multiple elements with different names for lexicographical ordering
+ elements = [
+ "apple", "apricot", "banana", "cherry", "date",
+ "elderberry", "fig", "grape", "honeydew", "kiwi",
+ "lemon", "mango", "nectarine", "orange", "papaya",
+ "quince", "raspberry", "strawberry", "tangerine", "watermelon"
+ ]
+
+ # Add all elements to the vector set
+ for elem in elements:
+ vec = generate_random_vector(4)
+ vec_bytes = struct.pack('4f', *vec)
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, elem)
+
+ # Test 1: Basic range with inclusive boundaries
+ result = self.redis.execute_command('VRANGE', self.test_key, '[apple', '[grape', '5')
+ result = [r.decode() for r in result]
+ assert result == ['apple', 'apricot', 'banana', 'cherry', 'date'], f"Expected first 5 elements from apple, got {result}"
+
+ # Test 2: Exclusive start boundary
+ result = self.redis.execute_command('VRANGE', self.test_key, '(apple', '[cherry', '10')
+ result = [r.decode() for r in result]
+ assert result == ['apricot', 'banana', 'cherry'], f"Expected elements after apple up to cherry inclusive, got {result}"
+
+ # Test 3: Exclusive end boundary
+ result = self.redis.execute_command('VRANGE', self.test_key, '[banana', '(cherry', '10')
+ result = [r.decode() for r in result]
+ assert result == ['banana'], f"Expected only banana (cherry excluded), got {result}"
+
+ # Test 4: Using '-' for minimum element
+ result = self.redis.execute_command('VRANGE', self.test_key, '-', '[banana', '10')
+ result = [r.decode() for r in result]
+ assert result[0] == 'apple', "Should start from the first element"
+ assert result[-1] == 'banana', "Should end at banana"
+
+ # Test 5: Using '+' for maximum element
+ result = self.redis.execute_command('VRANGE', self.test_key, '[raspberry', '+', '10')
+ result = [r.decode() for r in result]
+ assert 'raspberry' in result and 'strawberry' in result and 'tangerine' in result and 'watermelon' in result, "Should include all elements from raspberry onwards"
+
+ # Test 6: Full range with '-' and '+'
+ result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '100')
+ result = [r.decode() for r in result]
+ assert len(result) == len(elements), f"Should return all {len(elements)} elements"
+ assert result == sorted(elements), "Elements should be in lexicographical order"
+
+ # Test 7: Iterator pattern - verify each element appears exactly once
+ seen = set()
+ batch_size = 3
+ current = '-'
+
+ while True:
+ if current == '-':
+ # First iteration
+ result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', str(batch_size))
+ else:
+ # Subsequent iterations - exclusive start from last element
+ result = self.redis.execute_command('VRANGE', self.test_key, f'({current}', '+', str(batch_size))
+
+ result = [r.decode() for r in result]
+
+ if not result:
+ break
+
+ # Check no duplicates in this batch
+ for elem in result:
+ assert elem not in seen, f"Element {elem} appeared more than once"
+ seen.add(elem)
+
+ # Update current to last element
+ current = result[-1]
+
+ # Break if we got less than requested (end of set)
+ if len(result) < batch_size:
+ break
+
+ # Verify we saw all elements exactly once
+ assert seen == set(elements), f"Iterator should visit all elements exactly once. Missing: {set(elements) - seen}, Extra: {seen - set(elements)}"
+
+ # Test 8: Count of 0 returns empty array
+ result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '0')
+ assert result == [], f"Count of 0 should return empty array, got {result}"
+
+ # Test 9: Range with no matching elements
+ result = self.redis.execute_command('VRANGE', self.test_key, '[zebra', '+', '10')
+ assert result == [], f"Range beyond all elements should return empty array, got {result}"
+
+ # Test 10: Non-existent key
+ result = self.redis.execute_command('VRANGE', 'nonexistent_key', '-', '+', '10')
+ assert result == [], f"Non-existent key should return empty array, got {result}"
+
+ # Test 11: Partial word boundaries
+ result = self.redis.execute_command('VRANGE', self.test_key, '[app', '[apr', '10')
+ result = [r.decode() for r in result]
+ assert 'apple' in result, "Should include 'apple' which starts with 'app'"
+ assert 'apricot' not in result, "Should not include 'apricot' as it's >= 'apr'"
+
+ # Test 12: Single element range
+ result = self.redis.execute_command('VRANGE', self.test_key, '[cherry', '[cherry', '10')
+ result = [r.decode() for r in result]
+ assert result == ['cherry'], f"Inclusive single element range should return that element, got {result}"
+
+ # Test 13: Empty range (start > end)
+ result = self.redis.execute_command('VRANGE', self.test_key, '[grape', '[apple', '10')
+ assert result == [], f"Range where start > end should return empty array, got {result}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py b/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py
new file mode 100644
index 0000000..25b9689
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py
@@ -0,0 +1,32 @@
+from test import TestCase, generate_random_vector
+import struct
+
+class VSIMLimitEFSearch(TestCase):
+ def getname(self):
+ return "VSIM Limit EF Search"
+
+ def estimated_runtime(self):
+ return 0.2
+
+ def test(self):
+ dim = 32
+ vec = generate_random_vector(dim)
+ vec_bytes = struct.pack(f'{dim}f', *vec)
+
+ # Add test vector
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
+
+ query_vec = generate_random_vector(dim)
+
+ # Test EF upper bound (should accept 1000000)
+ result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in query_vec], 'EF', 1000000)
+ assert isinstance(result, list), "EF=1000000 should be accepted"
+
+ # Test EF over limit (should reject > 1000000)
+ try:
+ self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
+ *[str(x) for x in query_vec], 'EF', 1000001)
+ assert False, "EF=1000001 should be rejected"
+ except Exception as e:
+ assert "invalid EF" in str(e), f"Expected EF validation error, got: {e}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/with.py b/examples/redis-unstable/modules/vector-sets/tests/with.py
new file mode 100644
index 0000000..d14a23f
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/tests/with.py
@@ -0,0 +1,214 @@
+from test import TestCase, generate_random_vector
+import struct
+import json
+import random
+
+class VSIMWithAttribs(TestCase):
+ def getname(self):
+ return "VSIM WITHATTRIBS/WITHSCORES functionality testing"
+
+ def setup(self):
+ super().setup()
+ self.dim = 8
+ self.count = 20
+
+ # Create vectors with attributes
+ for i in range(self.count):
+ vec = generate_random_vector(self.dim)
+ vec_bytes = struct.pack(f'{self.dim}f', *vec)
+
+ # Item name
+ name = f"{self.test_key}:item:{i}"
+
+ # Add to Redis
+ self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
+
+ # Create and add attribute
+ if i % 5 == 0:
+ # Every 5th item has no attribute (for testing NULL responses)
+ continue
+
+ category = random.choice(["electronics", "furniture", "clothing"])
+ price = random.randint(50, 1000)
+ attrs = {"category": category, "price": price, "id": i}
+
+ self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs))
+
+ def is_numeric(self, value):
+ """Check if a value can be converted to float"""
+ try:
+ if isinstance(value, (int, float)):
+ return True
+ if isinstance(value, bytes):
+ float(value.decode('utf-8'))
+ return True
+ if isinstance(value, str):
+ float(value)
+ return True
+ return False
+ except (ValueError, TypeError):
+ return False
+
+ def test(self):
+ # Create query vector
+ query_vec = generate_random_vector(self.dim)
+
+ # Test 1: VSIM with no additional options (should be same for RESP2 and RESP3)
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 5])
+
+ results_resp2 = self.redis.execute_command(*cmd_args)
+ results_resp3 = self.redis3.execute_command(*cmd_args)
+
+ # Both should return simple arrays of item names
+ assert len(results_resp2) == 5, f"RESP2: Expected 5 results, got {len(results_resp2)}"
+ assert len(results_resp3) == 5, f"RESP3: Expected 5 results, got {len(results_resp3)}"
+ assert all(isinstance(item, bytes) for item in results_resp2), "RESP2: Results should be byte strings"
+ assert all(isinstance(item, bytes) for item in results_resp3), "RESP3: Results should be byte strings"
+
+ # Test 2: VSIM with WITHSCORES only
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 5, 'WITHSCORES'])
+
+ results_resp2 = self.redis.execute_command(*cmd_args)
+ results_resp3 = self.redis3.execute_command(*cmd_args)
+
+ # RESP2: Should be a flat array alternating item, score
+ assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}"
+ for i in range(0, len(results_resp2), 2):
+ assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes"
+ assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric"
+ score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1]
+ assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1"
+
+ # RESP3: Should be a dict/map with items as keys and scores as DIRECT values (not arrays)
+ assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}"
+ assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}"
+ for item, score in results_resp3.items():
+ assert isinstance(item, bytes), f"RESP3: Key should be bytes"
+ # Score should be a direct value, NOT an array
+ assert not isinstance(score, list), f"RESP3: With single WITH option, value should not be array"
+ assert self.is_numeric(score), f"RESP3: Score should be numeric, got {type(score)}"
+ score_val = float(score) if isinstance(score, bytes) else score
+ assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1"
+
+ # Test 3: VSIM with WITHATTRIBS only
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 5, 'WITHATTRIBS'])
+
+ results_resp2 = self.redis.execute_command(*cmd_args)
+ results_resp3 = self.redis3.execute_command(*cmd_args)
+
+ # RESP2: Should be a flat array alternating item, attribute
+ assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}"
+ for i in range(0, len(results_resp2), 2):
+ assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes"
+ attr = results_resp2[i+1]
+ assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+1} should be None or bytes"
+ if attr is not None:
+ # Verify it's valid JSON
+ json.loads(attr)
+
+ # RESP3: Should be a dict/map with items as keys and attributes as DIRECT values (not arrays)
+ assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}"
+ assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}"
+ for item, attr in results_resp3.items():
+ assert isinstance(item, bytes), f"RESP3: Key should be bytes"
+ # Attribute should be a direct value, NOT an array
+ assert not isinstance(attr, list), f"RESP3: With single WITH option, value should not be array"
+ assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes"
+ if attr is not None:
+ # Verify it's valid JSON
+ json.loads(attr)
+
+ # Test 4: VSIM with both WITHSCORES and WITHATTRIBS
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS'])
+
+ results_resp2 = self.redis.execute_command(*cmd_args)
+ results_resp3 = self.redis3.execute_command(*cmd_args)
+
+ # RESP2: Should be a flat array with pattern: item, score, attribute
+ assert len(results_resp2) == 15, f"RESP2: Expected 15 elements (5 items × 3), got {len(results_resp2)}"
+ for i in range(0, len(results_resp2), 3):
+ assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes"
+ assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric"
+ score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1]
+ assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1"
+ attr = results_resp2[i+2]
+ assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+2} should be None or bytes"
+
+ # RESP3: Should be a dict where each value is a 2-element array [score, attribute]
+ assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}"
+ assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}"
+ for item, value in results_resp3.items():
+ assert isinstance(item, bytes), f"RESP3: Key should be bytes"
+ # With BOTH options, value MUST be an array
+ assert isinstance(value, list), f"RESP3: With both WITH options, value should be a list, got {type(value)}"
+ assert len(value) == 2, f"RESP3: Value should have 2 elements [score, attr], got {len(value)}"
+
+ score, attr = value
+ assert self.is_numeric(score), f"RESP3: Score should be numeric"
+ score_val = float(score) if isinstance(score, bytes) else score
+ assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1"
+ assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes"
+
+ # Test 5: Verify consistency - same items returned in same order
+ cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args.extend([str(x) for x in query_vec])
+ cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS'])
+
+ results_resp2 = self.redis.execute_command(*cmd_args)
+ results_resp3 = self.redis3.execute_command(*cmd_args)
+
+ # Extract items from RESP2 (every 3rd element starting from 0)
+ items_resp2 = [results_resp2[i] for i in range(0, len(results_resp2), 3)]
+
+ # Extract items from RESP3 (keys of the dict)
+ items_resp3 = list(results_resp3.keys())
+
+ # Verify same items returned
+ assert set(items_resp2) == set(items_resp3), "RESP2 and RESP3 should return the same items"
+
+ # Build a mapping from items to scores and attributes for comparison
+ data_resp2 = {}
+ for i in range(0, len(results_resp2), 3):
+ item = results_resp2[i]
+ score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1]
+ attr = results_resp2[i+2]
+ data_resp2[item] = (score, attr)
+
+ data_resp3 = {}
+ for item, value in results_resp3.items():
+ score = float(value[0]) if isinstance(value[0], bytes) else value[0]
+ attr = value[1]
+ data_resp3[item] = (score, attr)
+
+ # Verify scores and attributes match for each item
+ for item in data_resp2:
+ score_resp2, attr_resp2 = data_resp2[item]
+ score_resp3, attr_resp3 = data_resp3[item]
+
+ assert abs(score_resp2 - score_resp3) < 0.0001, \
+ f"Scores for {item} don't match: RESP2={score_resp2}, RESP3={score_resp3}"
+ assert attr_resp2 == attr_resp3, \
+ f"Attributes for {item} don't match: RESP2={attr_resp2}, RESP3={attr_resp3}"
+
+ # Test 6: Test ordering of WITHSCORES and WITHATTRIBS doesn't matter
+ cmd_args1 = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args1.extend([str(x) for x in query_vec])
+ cmd_args1.extend(['COUNT', 3, 'WITHSCORES', 'WITHATTRIBS'])
+
+ cmd_args2 = ['VSIM', self.test_key, 'VALUES', self.dim]
+ cmd_args2.extend([str(x) for x in query_vec])
+ cmd_args2.extend(['COUNT', 3, 'WITHATTRIBS', 'WITHSCORES']) # Reversed order
+
+ results1_resp3 = self.redis3.execute_command(*cmd_args1)
+ results2_resp3 = self.redis3.execute_command(*cmd_args2)
+
+ # Both should return the same structure
+ assert results1_resp3 == results2_resp3, "Order of WITH options shouldn't matter"