summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/tests
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/tests')
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/basic_commands.py21
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py35
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py156
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py48
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/debug_digest.py39
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/deletion.py173
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py67
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/epsilon.py77
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/evict_empty.py27
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/filter_expr.py242
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/filter_int.py668
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/large_scale.py56
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/memory_usage.py36
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/node_update.py85
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/persistence.py86
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/reduce.py71
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/replication.py92
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/threading_config.py249
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py98
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vemb.py41
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vismember.py47
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py35
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vrandmember.py55
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vrange.py113
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py32
-rw-r--r--examples/redis-unstable/modules/vector-sets/tests/with.py214
26 files changed, 0 insertions, 2863 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py b/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py
deleted file mode 100644
index 8481a36..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/basic_commands.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class BasicCommands(TestCase):
- def getname(self):
- return "VADD, VDIM, VCARD basic usage"
-
- def test(self):
- # Test VADD
- vec = generate_random_vector(4)
- vec_bytes = struct.pack('4f', *vec)
- result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
- assert result == 1, "VADD should return 1 for first item"
-
- # Test VDIM
- dim = self.redis.execute_command('VDIM', self.test_key)
- assert dim == 4, f"VDIM should return 4, got {dim}"
-
- # Test VCARD
- card = self.redis.execute_command('VCARD', self.test_key)
- assert card == 1, f"VCARD should return 1, got {card}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py b/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py
deleted file mode 100644
index 11c3c9b..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/basic_similarity.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from test import TestCase
-
-class BasicSimilarity(TestCase):
- def getname(self):
- return "VSIM reported distance makes sense with 4D vectors"
-
- def test(self):
- # Add two very similar vectors, one different
- vec1 = [1, 0, 0, 0]
- vec2 = [0.99, 0.01, 0, 0]
- vec3 = [0.1, 1, -1, 0.5]
-
- # Add vectors using VALUES format
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1], f'{self.test_key}:item:1')
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec2], f'{self.test_key}:item:2')
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec3], f'{self.test_key}:item:3')
-
- # Query similarity with vec1
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1], 'WITHSCORES')
-
- # Convert results to dictionary
- results_dict = {}
- for i in range(0, len(result), 2):
- key = result[i].decode()
- score = float(result[i+1])
- results_dict[key] = score
-
- # Verify results
- assert results_dict[f'{self.test_key}:item:1'] > 0.99, "Self-similarity should be very high"
- assert results_dict[f'{self.test_key}:item:2'] > 0.99, "Similar vector should have high similarity"
- assert results_dict[f'{self.test_key}:item:3'] < 0.8, "Not very similar vector should have low similarity"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py
deleted file mode 100644
index f4b3a12..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vadd_cas_del_vsim.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from test import TestCase, generate_random_vector
-import threading
-import time
-import struct
-
-class ThreadingStressTest(TestCase):
- def getname(self):
- return "Concurrent VADD/DEL/VSIM operations stress test"
-
- def estimated_runtime(self):
- return 10 # Test runs for 10 seconds
-
- def test(self):
- # Constants - easy to modify if needed
- NUM_VADD_THREADS = 10
- NUM_VSIM_THREADS = 1
- NUM_DEL_THREADS = 1
- TEST_DURATION = 10 # seconds
- VECTOR_DIM = 100
- DEL_INTERVAL = 1 # seconds
-
- # Shared flags and state
- stop_event = threading.Event()
- error_list = []
- error_lock = threading.Lock()
-
- def log_error(thread_name, error):
- with error_lock:
- error_list.append(f"{thread_name}: {error}")
-
- def vadd_worker(thread_id):
- """Thread function to perform VADD operations"""
- thread_name = f"VADD-{thread_id}"
- try:
- vector_count = 0
- while not stop_event.is_set():
- try:
- # Generate random vector
- vec = generate_random_vector(VECTOR_DIM)
- vec_bytes = struct.pack(f'{VECTOR_DIM}f', *vec)
-
- # Add vector with CAS option
- self.redis.execute_command(
- 'VADD',
- self.test_key,
- 'FP32',
- vec_bytes,
- f'{self.test_key}:item:{thread_id}:{vector_count}',
- 'CAS'
- )
-
- vector_count += 1
-
- # Small sleep to reduce CPU pressure
- if vector_count % 10 == 0:
- time.sleep(0.001)
- except Exception as e:
- log_error(thread_name, f"Error: {str(e)}")
- time.sleep(0.1) # Slight backoff on error
- except Exception as e:
- log_error(thread_name, f"Thread error: {str(e)}")
-
- def del_worker():
- """Thread function that deletes the key periodically"""
- thread_name = "DEL"
- try:
- del_count = 0
- while not stop_event.is_set():
- try:
- # Sleep first, then delete
- time.sleep(DEL_INTERVAL)
- if stop_event.is_set():
- break
-
- self.redis.delete(self.test_key)
- del_count += 1
- except Exception as e:
- log_error(thread_name, f"Error: {str(e)}")
- except Exception as e:
- log_error(thread_name, f"Thread error: {str(e)}")
-
- def vsim_worker(thread_id):
- """Thread function to perform VSIM operations"""
- thread_name = f"VSIM-{thread_id}"
- try:
- search_count = 0
- while not stop_event.is_set():
- try:
- # Generate query vector
- query_vec = generate_random_vector(VECTOR_DIM)
- query_str = [str(x) for x in query_vec]
-
- # Perform similarity search
- args = ['VSIM', self.test_key, 'VALUES', VECTOR_DIM]
- args.extend(query_str)
- args.extend(['COUNT', 10])
- self.redis.execute_command(*args)
-
- search_count += 1
-
- # Small sleep to reduce CPU pressure
- if search_count % 10 == 0:
- time.sleep(0.005)
- except Exception as e:
- # Don't log empty array errors, as they're expected when key doesn't exist
- if "empty array" not in str(e).lower():
- log_error(thread_name, f"Error: {str(e)}")
- time.sleep(0.1) # Slight backoff on error
- except Exception as e:
- log_error(thread_name, f"Thread error: {str(e)}")
-
- # Start all threads
- threads = []
-
- # VADD threads
- for i in range(NUM_VADD_THREADS):
- thread = threading.Thread(target=vadd_worker, args=(i,))
- thread.start()
- threads.append(thread)
-
- # DEL threads
- for _ in range(NUM_DEL_THREADS):
- thread = threading.Thread(target=del_worker)
- thread.start()
- threads.append(thread)
-
- # VSIM threads
- for i in range(NUM_VSIM_THREADS):
- thread = threading.Thread(target=vsim_worker, args=(i,))
- thread.start()
- threads.append(thread)
-
- # Let the test run for the specified duration
- time.sleep(TEST_DURATION)
-
- # Signal all threads to stop
- stop_event.set()
-
- # Wait for threads to finish
- for thread in threads:
- thread.join(timeout=2.0)
-
- # Check if Redis is still responsive
- try:
- ping_result = self.redis.ping()
- assert ping_result, "Redis did not respond to PING after stress test"
- except Exception as e:
- assert False, f"Redis connection failed after stress test: {str(e)}"
-
- # Report any errors for diagnosis, but don't fail the test unless PING fails
- if error_list:
- error_count = len(error_list)
- print(f"\nEncountered {error_count} errors during stress test.")
- print("First 5 errors:")
- for error in error_list[:5]:
- print(f"- {error}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py b/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py
deleted file mode 100644
index 9bbf011..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/concurrent_vsim_and_del.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-import threading, time
-
-class ConcurrentVSIMAndDEL(TestCase):
- def getname(self):
- return "Concurrent VSIM and DEL operations"
-
- def estimated_runtime(self):
- return 2
-
- def test(self):
- # Fill the key with 5000 random vectors
- dim = 128
- count = 5000
- fill_redis_with_vectors(self.redis, self.test_key, count, dim)
-
- # List to store results from threads
- thread_results = []
-
- def vsim_thread():
- """Thread function to perform VSIM operations until the key is deleted"""
- while True:
- query_vec = generate_random_vector(dim)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec], 'COUNT', 10)
- if not result:
- # Empty array detected, key is deleted
- thread_results.append(True)
- break
-
- # Start multiple threads to perform VSIM operations
- threads = []
- for _ in range(4): # Start 4 threads
- t = threading.Thread(target=vsim_thread)
- t.start()
- threads.append(t)
-
- # Delete the key while threads are still running
- time.sleep(1)
- self.redis.delete(self.test_key)
-
- # Wait for all threads to finish (they will exit once they detect the key is deleted)
- for t in threads:
- t.join()
-
- # Verify that all threads detected an empty array or error
- assert len(thread_results) == len(threads), "Not all threads detected the key deletion"
- assert all(thread_results), "Some threads did not detect an empty array or error after DEL"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py b/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py
deleted file mode 100644
index 78f06d8..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/debug_digest.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class DebugDigestTest(TestCase):
- def getname(self):
- return "[regression] DEBUG DIGEST-VALUE with attributes"
-
- def test(self):
- # Generate random vectors
- vec1 = generate_random_vector(4)
- vec2 = generate_random_vector(4)
- vec_bytes1 = struct.pack('4f', *vec1)
- vec_bytes2 = struct.pack('4f', *vec2)
-
- # Add vectors to the key, one with attribute, one without
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
-
- # Call DEBUG DIGEST-VALUE on the key
- try:
- digest1 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
- assert digest1 is not None, "DEBUG DIGEST-VALUE should return a value"
-
- # Change attribute and verify digest changes
- self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '{"color":"blue"}')
-
- digest2 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
- assert digest2 is not None, "DEBUG DIGEST-VALUE should return a value after attribute change"
- assert digest1 != digest2, "Digest should change when an attribute is modified"
-
- # Remove attribute and verify digest changes again
- self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '')
-
- digest3 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
- assert digest3 is not None, "DEBUG DIGEST-VALUE should return a value after attribute removal"
- assert digest2 != digest3, "Digest should change when an attribute is removed"
-
- except Exception as e:
- raise AssertionError(f"DEBUG DIGEST-VALUE command failed: {str(e)}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/deletion.py b/examples/redis-unstable/modules/vector-sets/tests/deletion.py
deleted file mode 100644
index cb91959..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/deletion.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-import random
-
-"""
-A note about this test:
-It was experimentally tried to modify hnsw.c in order to
-avoid calling hnsw_reconnect_nodes(). In this case, the test
-fails very often with EF set to 250, while it hardly
-fails at all with the same parameters if hnsw_reconnect_nodes()
-is called.
-
-Note that for the nature of the test (it is very strict) it can
-still fail from time to time, without this signaling any
-actual bug.
-"""
-
-class VREM(TestCase):
- def getname(self):
- return "Deletion and graph state after deletion"
-
- def estimated_runtime(self):
- return 2.0
-
- def format_neighbors_with_scores(self, links_result, old_links=None, items_to_remove=None):
- """Format neighbors with their similarity scores and status indicators"""
- if not links_result:
- return "No neighbors"
-
- output = []
- for level, neighbors in enumerate(links_result):
- level_num = len(links_result) - level - 1
- output.append(f"Level {level_num}:")
-
- # Get neighbors and scores
- neighbors_with_scores = []
- for i in range(0, len(neighbors), 2):
- neighbor = neighbors[i].decode() if isinstance(neighbors[i], bytes) else neighbors[i]
- score = float(neighbors[i+1]) if i+1 < len(neighbors) else None
- status = ""
-
- # For old links, mark deleted ones
- if items_to_remove and neighbor in items_to_remove:
- status = " [lost]"
- # For new links, mark newly added ones
- elif old_links is not None:
- # Check if this neighbor was in the old links at this level
- was_present = False
- if old_links and level < len(old_links):
- old_neighbors = [n.decode() if isinstance(n, bytes) else n
- for n in old_links[level]]
- was_present = neighbor in old_neighbors
- if not was_present:
- status = " [gained]"
-
- if score is not None:
- neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor} ({score:.6f}){status}")
- else:
- neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor}{status}")
-
- output.extend([" " + n for n in neighbors_with_scores])
- return "\n".join(output)
-
- def test(self):
- # 1. Fill server with random elements
- dim = 128
- count = 5000
- data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
-
- # 2. Do VSIM to get 200 items
- query_vec = generate_random_vector(dim)
- results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec],
- 'COUNT', 200, 'WITHSCORES')
-
- # Convert results to list of (item, score) pairs, sorted by score
- items = []
- for i in range(0, len(results), 2):
- item = results[i].decode()
- score = float(results[i+1])
- items.append((item, score))
- items.sort(key=lambda x: x[1], reverse=True) # Sort by similarity
-
- # Store the graph structure for all items before deletion
- neighbors_before = {}
- for item, _ in items:
- links = self.redis.execute_command('VLINKS', self.test_key, item, 'WITHSCORES')
- if links: # Some items might not have links
- neighbors_before[item] = links
-
- # 3. Remove 100 random items
- items_to_remove = set(item for item, _ in random.sample(items, 100))
- # Keep track of top 10 non-removed items
- top_remaining = []
- for item, score in items:
- if item not in items_to_remove:
- top_remaining.append((item, score))
- if len(top_remaining) == 10:
- break
-
- # Remove the items
- for item in items_to_remove:
- result = self.redis.execute_command('VREM', self.test_key, item)
- assert result == 1, f"VREM failed to remove {item}"
-
- # 4. Do VSIM again with same vector
- new_results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec],
- 'COUNT', 200, 'WITHSCORES',
- 'EF', 500)
-
- # Convert new results to dict of item -> score
- new_scores = {}
- for i in range(0, len(new_results), 2):
- item = new_results[i].decode()
- score = float(new_results[i+1])
- new_scores[item] = score
-
- failure = False
- failed_item = None
- failed_reason = None
- # 5. Verify all top 10 non-removed items are still found with similar scores
- for item, old_score in top_remaining:
- if item not in new_scores:
- failure = True
- failed_item = item
- failed_reason = "missing"
- break
- new_score = new_scores[item]
- if abs(new_score - old_score) >= 0.01:
- failure = True
- failed_item = item
- failed_reason = f"score changed: {old_score:.6f} -> {new_score:.6f}"
- break
-
- if failure:
- print("\nTest failed!")
- print(f"Problem with item: {failed_item} ({failed_reason})")
-
- print("\nOriginal neighbors (with similarity scores):")
- if failed_item in neighbors_before:
- print(self.format_neighbors_with_scores(
- neighbors_before[failed_item],
- items_to_remove=items_to_remove))
- else:
- print("No neighbors found in original graph")
-
- print("\nCurrent neighbors (with similarity scores):")
- current_links = self.redis.execute_command('VLINKS', self.test_key,
- failed_item, 'WITHSCORES')
- if current_links:
- print(self.format_neighbors_with_scores(
- current_links,
- old_links=neighbors_before.get(failed_item)))
- else:
- print("No neighbors in current graph")
-
- print("\nOriginal results (top 20):")
- for item, score in items[:20]:
- deleted = "[deleted]" if item in items_to_remove else ""
- print(f"{item}: {score:.6f} {deleted}")
-
- print("\nNew results after removal (top 20):")
- new_items = []
- for i in range(0, len(new_results), 2):
- item = new_results[i].decode()
- score = float(new_results[i+1])
- new_items.append((item, score))
- new_items.sort(key=lambda x: x[1], reverse=True)
- for item, score in new_items[:20]:
- print(f"{item}: {score:.6f}")
-
- raise AssertionError(f"Test failed: Problem with item {failed_item} ({failed_reason}). *** IMPORTANT *** This test may fail from time to time without indicating that there is a bug. However normally it should pass. The fact is that it's a quite extreme test where we destroy 50% of nodes of top results and still expect perfect recall, with vectors that are very hostile because of the distribution used.")
-
diff --git a/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py b/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py
deleted file mode 100644
index f081152..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/dimension_validation.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-import redis.exceptions
-
-class DimensionValidation(TestCase):
- def getname(self):
- return "[regression] Dimension Validation with Projection"
-
- def estimated_runtime(self):
- return 0.5
-
- def test(self):
- # Test scenario 1: Create a set with projection
- original_dim = 100
- reduced_dim = 50
-
- # Create the initial vector and set with projection
- vec1 = generate_random_vector(original_dim)
- vec1_bytes = struct.pack(f'{original_dim}f', *vec1)
-
- # Add first vector with projection
- result = self.redis.execute_command('VADD', self.test_key,
- 'REDUCE', reduced_dim,
- 'FP32', vec1_bytes, f'{self.test_key}:item:1')
- assert result == 1, "First VADD with REDUCE should return 1"
-
- # Check VINFO returns the correct projection information
- info = self.redis.execute_command('VINFO', self.test_key)
- info_map = {k.decode('utf-8'): v for k, v in zip(info[::2], info[1::2])}
- assert 'vector-dim' in info_map, "VINFO should contain vector-dim"
- assert info_map['vector-dim'] == reduced_dim, f"Expected reduced dimension {reduced_dim}, got {info['vector-dim']}"
- assert 'projection-input-dim' in info_map, "VINFO should contain projection-input-dim"
- assert info_map['projection-input-dim'] == original_dim, f"Expected original dimension {original_dim}, got {info['projection-input-dim']}"
-
- # Test scenario 2: Try adding a mismatched vector - should fail
- wrong_dim = 80
- wrong_vec = generate_random_vector(wrong_dim)
- wrong_vec_bytes = struct.pack(f'{wrong_dim}f', *wrong_vec)
-
- # This should fail with dimension mismatch error
- try:
- self.redis.execute_command('VADD', self.test_key,
- 'REDUCE', reduced_dim,
- 'FP32', wrong_vec_bytes, f'{self.test_key}:item:2')
- assert False, "VADD with wrong dimension should fail"
- except redis.exceptions.ResponseError as e:
- assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error, got: {e}"
-
- # Test scenario 3: Add a correctly-sized vector
- vec2 = generate_random_vector(original_dim)
- vec2_bytes = struct.pack(f'{original_dim}f', *vec2)
-
- # This should succeed
- result = self.redis.execute_command('VADD', self.test_key,
- 'REDUCE', reduced_dim,
- 'FP32', vec2_bytes, f'{self.test_key}:item:3')
- assert result == 1, "VADD with correct dimensions should succeed"
-
- # Check VSIM also validates input dimensions
- wrong_query = generate_random_vector(wrong_dim)
- try:
- self.redis.execute_command('VSIM', self.test_key,
- 'VALUES', wrong_dim, *[str(x) for x in wrong_query],
- 'COUNT', 10)
- assert False, "VSIM with wrong dimension should fail"
- except redis.exceptions.ResponseError as e:
- assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/epsilon.py b/examples/redis-unstable/modules/vector-sets/tests/epsilon.py
deleted file mode 100644
index 97e11c0..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/epsilon.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from test import TestCase
-
-class EpsilonOption(TestCase):
- def getname(self):
- return "VSIM EPSILON option filtering"
-
- def estimated_runtime(self):
- return 0.1
-
- def test(self):
- # Add vectors as shown in the example
- # Vector 'a' at (1, 1) - normalized to (0.707, 0.707)
- result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '1', '1', 'a')
- assert result == 1, "VADD should return 1 for item 'a'"
-
- # Vector 'b' at (0, 1) - normalized to (0, 1)
- result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '1', 'b')
- assert result == 1, "VADD should return 1 for item 'b'"
-
- # Vector 'c' at (0, 0) - this will be a zero vector, might be handled specially
- result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '0', 'c')
- assert result == 1, "VADD should return 1 for item 'c'"
-
- # Vector 'd' at (0, -1) - normalized to (0, -1)
- result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '0', '-1', 'd')
- assert result == 1, "VADD should return 1 for item 'd'"
-
- # Vector 'e' at (-1, -1) - normalized to (-0.707, -0.707)
- result = self.redis.execute_command('VADD', self.test_key, 'VALUES', '2', '-1', '-1', 'e')
- assert result == 1, "VADD should return 1 for item 'e'"
-
- # Test without EPSILON - should return all items
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES')
- # Result is a flat list: [elem1, score1, elem2, score2, ...]
- elements_all = [result[i].decode() for i in range(0, len(result), 2)]
- scores_all = [float(result[i]) for i in range(1, len(result), 2)]
-
- assert len(elements_all) == 5, f"Should return 5 elements without EPSILON, got {len(elements_all)}"
- assert elements_all[0] == 'a', "First element should be 'a' (most similar)"
- assert scores_all[0] == 1.0, "Score for 'a' should be 1.0 (identical)"
-
- # Test with EPSILON 0.5 - should return only elements with similarity >= 0.5 (distance < 0.5)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.5')
- elements_epsilon_0_5 = [result[i].decode() for i in range(0, len(result), 2)]
- scores_epsilon_0_5 = [float(result[i]) for i in range(1, len(result), 2)]
-
- assert len(elements_epsilon_0_5) == 3, f"With EPSILON 0.5, should return 3 elements, got {len(elements_epsilon_0_5)}"
- assert set(elements_epsilon_0_5) == {'a', 'b', 'c'}, f"With EPSILON 0.5, should get a, b, c, got {elements_epsilon_0_5}"
-
- # Verify all returned scores are >= 0.5
- for i, score in enumerate(scores_epsilon_0_5):
- assert score >= 0.5, f"Element {elements_epsilon_0_5[i]} has score {score} which is < 0.5"
-
- # Test with EPSILON 0.2 - should return only elements with similarity >= 0.8 (distance < 0.2)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.2')
- elements_epsilon_0_2 = [result[i].decode() for i in range(0, len(result), 2)]
- scores_epsilon_0_2 = [float(result[i]) for i in range(1, len(result), 2)]
-
- assert len(elements_epsilon_0_2) == 2, f"With EPSILON 0.2, should return 2 elements, got {len(elements_epsilon_0_2)}"
- assert set(elements_epsilon_0_2) == {'a', 'b'}, f"With EPSILON 0.2, should get a, b, got {elements_epsilon_0_2}"
-
- # Verify all returned scores are >= 0.8 (since distance < 0.2 means similarity > 0.8)
- for i, score in enumerate(scores_epsilon_0_2):
- assert score >= 0.8, f"Element {elements_epsilon_0_2[i]} has score {score} which is < 0.8"
-
- # Test with very small EPSILON - should return only the exact match
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '0.001')
- elements_epsilon_small = [result[i].decode() for i in range(0, len(result), 2)]
-
- assert len(elements_epsilon_small) == 1, f"With EPSILON 0.001, should return only 1 element, got {len(elements_epsilon_small)}"
- assert elements_epsilon_small[0] == 'a', "With very small EPSILON, should only get 'a'"
-
- # Test with EPSILON 1.0 - should return all elements (since all similarities are between 0 and 1)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', '2', '1', '1', 'WITHSCORES', 'EPSILON', '1.0')
- elements_epsilon_1 = [result[i].decode() for i in range(0, len(result), 2)]
-
- assert len(elements_epsilon_1) == 5, f"With EPSILON 1.0, should return all 5 elements, got {len(elements_epsilon_1)}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py b/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py
deleted file mode 100644
index 6c78c82..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/evict_empty.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class VREM_LastItemDeletesKey(TestCase):
- def getname(self):
- return "VREM last item deletes key"
-
- def test(self):
- # Generate a random vector
- vec = generate_random_vector(4)
- vec_bytes = struct.pack('4f', *vec)
-
- # Add the vector to the key
- result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
- assert result == 1, "VADD should return 1 for first item"
-
- # Verify the key exists
- exists = self.redis.exists(self.test_key)
- assert exists == 1, "Key should exist after VADD"
-
- # Remove the item
- result = self.redis.execute_command('VREM', self.test_key, f'{self.test_key}:item:1')
- assert result == 1, "VREM should return 1 for successful removal"
-
- # Verify the key no longer exists
- exists = self.redis.exists(self.test_key)
- assert exists == 0, "Key should no longer exist after VREM of last item"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py b/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py
deleted file mode 100644
index 364915d..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/filter_expr.py
+++ /dev/null
@@ -1,242 +0,0 @@
-from test import TestCase
-
-class VSIMFilterExpressions(TestCase):
- def getname(self):
- return "VSIM FILTER expressions basic functionality"
-
- def test(self):
- # Create a small set of vectors with different attributes
-
- # Basic vectors for testing - all orthogonal for clear results
- vec1 = [1, 0, 0, 0]
- vec2 = [0, 1, 0, 0]
- vec3 = [0, 0, 1, 0]
- vec4 = [0, 0, 0, 1]
- vec5 = [0.5, 0.5, 0, 0]
-
- # Add vectors with various attributes
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1], f'{self.test_key}:item:1')
- self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1',
- '{"age": 25, "name": "Alice", "active": true, "scores": [85, 90, 95], "city": "New York"}')
-
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec2], f'{self.test_key}:item:2')
- self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2',
- '{"age": 30, "name": "Bob", "active": false, "scores": [70, 75, 80], "city": "Boston"}')
-
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec3], f'{self.test_key}:item:3')
- self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:3',
- '{"age": 35, "name": "Charlie", "scores": [60, 65, 70], "city": "Seattle"}')
-
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec4], f'{self.test_key}:item:4')
- # Item 4 has no attribute at all
-
- self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec5], f'{self.test_key}:item:5')
- self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5',
- 'invalid json') # Intentionally malformed JSON
-
- # Basic equality with numbers
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age == 25')
- assert len(result) == 1, "Expected 1 result for age == 25"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25"
-
- # Greater than
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age > 25')
- assert len(result) == 2, "Expected 2 results for age > 25"
-
- # Less than or equal
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age <= 30')
- assert len(result) == 2, "Expected 2 results for age <= 30"
-
- # String equality
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.name == "Alice"')
- assert len(result) == 1, "Expected 1 result for name == Alice"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice"
-
- # String inequality
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.name != "Alice"')
- assert len(result) == 2, "Expected 2 results for name != Alice"
-
- # Boolean value
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.active')
- assert len(result) == 1, "Expected 1 result for .active being true"
-
- # Logical AND
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age > 20 and .age < 30')
- assert len(result) == 1, "Expected 1 result for 20 < age < 30"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30"
-
- # Logical OR
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age < 30 or .age > 35')
- assert len(result) == 1, "Expected 1 result for age < 30 or age > 35"
-
- # Logical NOT
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '!(.age == 25)')
- assert len(result) == 2, "Expected 2 results for NOT(age == 25)"
-
- # The "in" operator with array
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age in [25, 35]')
- assert len(result) == 2, "Expected 2 results for age in [25, 35]"
-
- # The "in" operator with strings in array
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.name in ["Alice", "David"]')
- assert len(result) == 1, "Expected 1 result for name in [Alice, David]"
-
- # The "in" operator for substring matching
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"lic" in .name')
- assert len(result) == 1, "Expected 1 result for 'lic' in name"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (Alice)"
-
- # The "in" operator with city substring
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"ork" in .city')
- assert len(result) == 1, "Expected 1 result for 'ork' in city"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (New York)"
-
- # The "in" operator with no matches
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"xyz" in .name')
- assert len(result) == 0, "Expected 0 results for 'xyz' in name"
-
- # Off-by-one tests - substring at the beginning
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"Ali" in .name')
- assert len(result) == 1, "Expected 1 result for 'Ali' at beginning of 'Alice'"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
-
- # Off-by-one tests - substring at the end
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"ice" in .name')
- assert len(result) == 1, "Expected 1 result for 'ice' at end of 'Alice'"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
-
- # Off-by-one tests - exact match (entire string)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"Alice" in .name')
- assert len(result) == 1, "Expected 1 result for exact match 'Alice' in 'Alice'"
- assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
-
- # Off-by-one tests - single character
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"A" in .name')
- assert len(result) == 1, "Expected 1 result for single char 'A' in 'Alice'"
-
- # Off-by-one tests - empty string (should match all strings)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"" in .name')
- assert len(result) == 3, "Expected 3 results for empty string (matches all strings)"
-
- # Off-by-one tests - non-empty strings are never substrings of ""
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.name in ""')
- assert len(result) == 0, "Expected 0 results for empty string on the right of IN operator"
-
- # Off-by-one tests - empty string match empty string.
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '"" in .name && "" in ""')
- assert len(result) == 3, "Expected empty string matching empty string"
-
- # Arithmetic operations - addition
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age + 10 > 40')
- assert len(result) == 1, "Expected 1 result for age + 10 > 40"
-
- # Arithmetic operations - multiplication
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age * 2 > 60')
- assert len(result) == 1, "Expected 1 result for age * 2 > 60"
-
- # Arithmetic operations - division
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age / 5 == 5')
- assert len(result) == 1, "Expected 1 result for age / 5 == 5"
-
- # Arithmetic operations - modulo
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age % 2 == 0')
- assert len(result) == 1, "Expected 1 result for age % 2 == 0"
-
- # Power operator
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age ** 2 > 900')
- assert len(result) == 1, "Expected 1 result for age^2 > 900"
-
- # Missing attribute (should exclude items missing that attribute)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.missing_field == "value"')
- assert len(result) == 0, "Expected 0 results for missing_field == value"
-
- # No attribute set at all
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.any_field')
- assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded"
-
- # Malformed JSON
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.any_field')
- assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded"
-
- # Complex expression combining multiple operators
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")')
- assert len(result) == 2, "Expected 2 results for the complex expression"
- expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2']
- assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression"
-
- # Parentheses to control operator precedence
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.age > (20 + 10)')
- assert len(result) == 1, "Expected 1 result for age > (20 + 10)"
-
- # Array access (arrays evaluate to true)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
- *[str(x) for x in vec1],
- 'FILTER', '.scores')
- assert len(result) == 3, "Expected 3 results for .scores (arrays evaluate to true)"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/filter_int.py b/examples/redis-unstable/modules/vector-sets/tests/filter_int.py
deleted file mode 100644
index 0fd1dc1..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/filter_int.py
+++ /dev/null
@@ -1,668 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-import random
-import math
-import json
-import time
-
-class VSIMFilterAdvanced(TestCase):
- def getname(self):
- return "VSIM FILTER comprehensive functionality testing"
-
- def estimated_runtime(self):
- return 15 # This test might take up to 15 seconds for the large dataset
-
- def setup(self):
- super().setup()
- self.dim = 32 # Vector dimension
- self.count = 5000 # Number of vectors for large tests
- self.small_count = 50 # Number of vectors for small/quick tests
-
- # Categories for attributes
- self.categories = ["electronics", "furniture", "clothing", "books", "food"]
- self.cities = ["New York", "London", "Tokyo", "Paris", "Berlin", "Sydney", "Toronto", "Singapore"]
- self.price_ranges = [(10, 50), (50, 200), (200, 1000), (1000, 5000)]
- self.years = list(range(2000, 2025))
-
- def create_attributes(self, index):
- """Create realistic attributes for a vector"""
- category = random.choice(self.categories)
- city = random.choice(self.cities)
- min_price, max_price = random.choice(self.price_ranges)
- price = round(random.uniform(min_price, max_price), 2)
- year = random.choice(self.years)
- in_stock = random.random() > 0.3 # 70% chance of being in stock
- rating = round(random.uniform(1, 5), 1)
- views = int(random.expovariate(1/1000)) # Exponential distribution for page views
- tags = random.sample(["popular", "sale", "new", "limited", "exclusive", "clearance"],
- k=random.randint(0, 3))
-
- # Add some specific patterns for testing
- # Every 10th item has a specific property combination for testing
- is_premium = (index % 10 == 0)
-
- # Create attributes dictionary
- attrs = {
- "id": index,
- "category": category,
- "location": city,
- "price": price,
- "year": year,
- "in_stock": in_stock,
- "rating": rating,
- "views": views,
- "tags": tags
- }
-
- if is_premium:
- attrs["is_premium"] = True
- attrs["special_features"] = ["premium", "warranty", "support"]
-
- # Add sub-categories for more complex filters
- if category == "electronics":
- attrs["subcategory"] = random.choice(["phones", "computers", "cameras", "audio"])
- elif category == "furniture":
- attrs["subcategory"] = random.choice(["chairs", "tables", "sofas", "beds"])
- elif category == "clothing":
- attrs["subcategory"] = random.choice(["shirts", "pants", "dresses", "shoes"])
-
- # Add some intentionally missing fields for testing
- if random.random() > 0.9: # 10% chance of missing price
- del attrs["price"]
-
- # Some items have promotion field
- if random.random() > 0.7: # 30% chance of having a promotion
- attrs["promotion"] = random.choice(["discount", "bundle", "gift"])
-
- # Create invalid JSON for a small percentage of vectors
- if random.random() > 0.98: # 2% chance of having invalid JSON
- return "{{invalid json}}"
-
- return json.dumps(attrs)
-
- def create_vectors_with_attributes(self, key, count):
- """Create vectors and add attributes to them"""
- vectors = []
- names = []
- attribute_map = {} # To store attributes for verification
-
- # Create vectors
- for i in range(count):
- vec = generate_random_vector(self.dim)
- vectors.append(vec)
- name = f"{key}:item:{i}"
- names.append(name)
-
- # Add to Redis
- vec_bytes = struct.pack(f'{self.dim}f', *vec)
- self.redis.execute_command('VADD', key, 'FP32', vec_bytes, name)
-
- # Create and add attributes
- attrs = self.create_attributes(i)
- self.redis.execute_command('VSETATTR', key, name, attrs)
-
- # Store attributes for later verification
- try:
- attribute_map[name] = json.loads(attrs) if '{' in attrs else None
- except json.JSONDecodeError:
- attribute_map[name] = None
-
- return vectors, names, attribute_map
-
- def filter_linear_search(self, vectors, names, query_vector, filter_expr, attribute_map, k=10):
- """Perform a linear search with filtering for verification"""
- similarities = []
- query_norm = math.sqrt(sum(x*x for x in query_vector))
-
- if query_norm == 0:
- return []
-
- for i, vec in enumerate(vectors):
- name = names[i]
- attributes = attribute_map.get(name)
-
- # Skip if doesn't match filter
- if not self.matches_filter(attributes, filter_expr):
- continue
-
- vec_norm = math.sqrt(sum(x*x for x in vec))
- if vec_norm == 0:
- continue
-
- dot_product = sum(a*b for a,b in zip(query_vector, vec))
- cosine_sim = dot_product / (query_norm * vec_norm)
- distance = 1.0 - cosine_sim
- redis_similarity = 1.0 - (distance/2.0)
- similarities.append((name, redis_similarity))
-
- similarities.sort(key=lambda x: x[1], reverse=True)
- return similarities[:k]
-
- def matches_filter(self, attributes, filter_expr):
- """Filter matching for verification - uses Python eval to handle complex expressions"""
- if attributes is None:
- return False # No attributes or invalid JSON
-
- # Replace JSON path selectors with Python dictionary access
- py_expr = filter_expr
-
- # Handle `.field` notation (replace with attributes['field'])
- i = 0
- while i < len(py_expr):
- if py_expr[i] == '.' and (i == 0 or not py_expr[i-1].isalnum()):
- # Find the end of the selector (stops at operators or whitespace)
- j = i + 1
- while j < len(py_expr) and (py_expr[j].isalnum() or py_expr[j] == '_'):
- j += 1
-
- if j > i + 1: # Found a valid selector
- field = py_expr[i+1:j]
- # Use a safe access pattern that returns a default value based on context
- py_expr = py_expr[:i] + f"attributes.get('{field}')" + py_expr[j:]
- i = i + len(f"attributes.get('{field}')")
- else:
- i += 1
- else:
- i += 1
-
- # Convert not operator if needed
- py_expr = py_expr.replace('!', ' not ')
-
- try:
- # Custom evaluation that handles exceptions for missing fields
- # by returning False for the entire expression
-
- # Split the expression on logical operators
- parts = []
- for op in [' and ', ' or ']:
- if op in py_expr:
- parts = py_expr.split(op)
- break
-
- if not parts: # No logical operators found
- parts = [py_expr]
-
- # Try to evaluate each part - if any part fails,
- # the whole expression should fail
- try:
- result = eval(py_expr, {"attributes": attributes})
- return bool(result)
- except (TypeError, AttributeError):
- # This typically happens when trying to compare None with
- # numbers or other types, or when an attribute doesn't exist
- return False
- except Exception as e:
- print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
- return False
-
- except Exception as e:
- print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
- return False
-
- def safe_decode(self,item):
- return item.decode() if isinstance(item, bytes) else item
-
- def calculate_recall(self, redis_results, linear_results, k=10):
- """Calculate recall (percentage of correct results retrieved)"""
- redis_set = set(self.safe_decode(item) for item in redis_results)
- linear_set = set(item[0] for item in linear_results[:k])
-
- if not linear_set:
- return 1.0 # If no linear results, consider it perfect recall
-
- intersection = redis_set.intersection(linear_set)
- return len(intersection) / len(linear_set)
-
- def test_recall_with_filter(self, filter_expr, ef=500, filter_ef=None):
- """Test recall for a given filter expression"""
- # Create query vector
- query_vec = generate_random_vector(self.dim)
-
- # First, get ground truth using linear scan
- linear_results = self.filter_linear_search(
- self.vectors, self.names, query_vec, filter_expr, self.attribute_map, k=50)
-
- # Calculate true selectivity from ground truth
- true_selectivity = len(linear_results) / len(self.names) if self.names else 0
-
- # Perform Redis search with filter
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 50, 'WITHSCORES', 'EF', ef, 'FILTER', filter_expr])
- if filter_ef:
- cmd_args.extend(['FILTER-EF', filter_ef])
-
- start_time = time.time()
- redis_results = self.redis.execute_command(*cmd_args)
- query_time = time.time() - start_time
-
- # Convert Redis results to dict
- redis_items = {}
- for i in range(0, len(redis_results), 2):
- key = redis_results[i].decode() if isinstance(redis_results[i], bytes) else redis_results[i]
- score = float(redis_results[i+1])
- redis_items[key] = score
-
- # Calculate metrics
- recall = self.calculate_recall(redis_items.keys(), linear_results)
- selectivity = len(redis_items) / len(self.names) if redis_items else 0
-
- # Compare against the true selectivity from linear scan
- assert abs(selectivity - true_selectivity) < 0.1, \
- f"Redis selectivity {selectivity:.3f} differs significantly from ground truth {true_selectivity:.3f}"
-
- # We expect high recall for standard parameters
- if ef >= 500 and (filter_ef is None or filter_ef >= 1000):
- try:
- assert recall >= 0.7, \
- f"Low recall {recall:.2f} for filter '{filter_expr}'"
- except AssertionError as e:
- # Get items found in each set
- redis_items_set = set(redis_items.keys())
- linear_items_set = set(item[0] for item in linear_results)
-
- # Find items in each set
- only_in_redis = redis_items_set - linear_items_set
- only_in_linear = linear_items_set - redis_items_set
- in_both = redis_items_set & linear_items_set
-
- # Build comprehensive debug message
- debug = f"\nGround Truth: {len(linear_results)} matching items (total vectors: {len(self.vectors)})"
- debug += f"\nRedis Found: {len(redis_items)} items with FILTER-EF: {filter_ef or 'default'}"
- debug += f"\nItems in both sets: {len(in_both)} (recall: {recall:.4f})"
- debug += f"\nItems only in Redis: {len(only_in_redis)}"
- debug += f"\nItems only in Ground Truth: {len(only_in_linear)}"
-
- # Show some example items from each set with their scores
- if only_in_redis:
- debug += "\n\nTOP 5 ITEMS ONLY IN REDIS:"
- sorted_redis = sorted([(k, v) for k, v in redis_items.items()], key=lambda x: x[1], reverse=True)
- for i, (item, score) in enumerate(sorted_redis[:5]):
- if item in only_in_redis:
- debug += f"\n {i+1}. {item} (Score: {score:.4f})"
-
- # Show attribute that should match filter
- attr = self.attribute_map.get(item)
- if attr:
- debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
-
- if only_in_linear:
- debug += "\n\nTOP 5 ITEMS ONLY IN GROUND TRUTH:"
- for i, (item, score) in enumerate(linear_results[:5]):
- if item in only_in_linear:
- debug += f"\n {i+1}. {item} (Score: {score:.4f})"
-
- # Show attribute that should match filter
- attr = self.attribute_map.get(item)
- if attr:
- debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
-
- # Help identify parsing issues
- debug += "\n\nPARSING CHECK:"
- debug += f"\nRedis command: VSIM {self.test_key} VALUES {self.dim} [...] FILTER '{filter_expr}'"
-
- # Check for WITHSCORES handling issues
- if len(redis_results) > 0 and len(redis_results) % 2 == 0:
- debug += f"\nRedis returned {len(redis_results)} items (looks like item,score pairs)"
- debug += f"\nFirst few results: {redis_results[:4]}"
-
- # Check the filter implementation
- debug += "\n\nFILTER IMPLEMENTATION CHECK:"
- debug += f"\nFilter expression: '{filter_expr}'"
- debug += "\nSample attribute matches from attribute_map:"
- count_matching = 0
- for i, (name, attrs) in enumerate(self.attribute_map.items()):
- if attrs and self.matches_filter(attrs, filter_expr):
- count_matching += 1
- if i < 3: # Show first 3 matches
- debug += f"\n - {name}: {attrs}"
- debug += f"\nTotal items matching filter in attribute_map: {count_matching}"
-
- # Check if results array handling could be wrong
- debug += "\n\nRESULT ARRAYS CHECK:"
- if len(linear_results) >= 1:
- debug += f"\nlinear_results[0]: {linear_results[0]}"
- if isinstance(linear_results[0], tuple) and len(linear_results[0]) == 2:
- debug += " (correct tuple format: (name, score))"
- else:
- debug += " (UNEXPECTED FORMAT!)"
-
- # Debug sort order
- debug += "\n\nSORTING CHECK:"
- if len(linear_results) >= 2:
- debug += f"\nGround truth first item score: {linear_results[0][1]}"
- debug += f"\nGround truth second item score: {linear_results[1][1]}"
- debug += f"\nCorrectly sorted by similarity? {linear_results[0][1] >= linear_results[1][1]}"
-
- # Re-raise with detailed information
- raise AssertionError(str(e) + debug)
-
- return recall, selectivity, query_time, len(redis_items)
-
- def test(self):
- print(f"\nRunning comprehensive VSIM FILTER tests...")
-
- # Create a larger dataset for testing
- print(f"Creating dataset with {self.count} vectors and attributes...")
- self.vectors, self.names, self.attribute_map = self.create_vectors_with_attributes(
- self.test_key, self.count)
-
- # ==== 1. Recall and Precision Testing ====
- print("Testing recall for various filters...")
-
- # Test basic filters with different selectivity
- results = {}
- results["category"] = self.test_recall_with_filter('.category == "electronics"')
- results["price_high"] = self.test_recall_with_filter('.price > 1000')
- results["in_stock"] = self.test_recall_with_filter('.in_stock')
- results["rating"] = self.test_recall_with_filter('.rating >= 4')
- results["complex1"] = self.test_recall_with_filter('.category == "electronics" and .price < 500')
-
- print("Filter | Recall | Selectivity | Time (ms) | Results")
- print("----------------------------------------------------")
- for name, (recall, selectivity, time_ms, count) in results.items():
- print(f"{name:7} | {recall:.3f} | {selectivity:.3f} | {time_ms*1000:.1f} | {count}")
-
- # ==== 2. Filter Selectivity Performance ====
- print("\nTesting filter selectivity performance...")
-
- # High selectivity (very few matches)
- high_sel_recall, _, high_sel_time, _ = self.test_recall_with_filter('.is_premium')
-
- # Medium selectivity
- med_sel_recall, _, med_sel_time, _ = self.test_recall_with_filter('.price > 100 and .price < 1000')
-
- # Low selectivity (many matches)
- low_sel_recall, _, low_sel_time, _ = self.test_recall_with_filter('.year > 2000')
-
- print(f"High selectivity recall: {high_sel_recall:.3f}, time: {high_sel_time*1000:.1f}ms")
- print(f"Med selectivity recall: {med_sel_recall:.3f}, time: {med_sel_time*1000:.1f}ms")
- print(f"Low selectivity recall: {low_sel_recall:.3f}, time: {low_sel_time*1000:.1f}ms")
-
- # ==== 3. FILTER-EF Parameter Testing ====
- print("\nTesting FILTER-EF parameter...")
-
- # Test with different FILTER-EF values
- filter_expr = '.category == "electronics" and .price > 200'
- ef_values = [100, 500, 2000, 5000]
-
- print("FILTER-EF | Recall | Time (ms)")
- print("-----------------------------")
- for filter_ef in ef_values:
- recall, _, query_time, _ = self.test_recall_with_filter(
- filter_expr, ef=500, filter_ef=filter_ef)
- print(f"{filter_ef:9} | {recall:.3f} | {query_time*1000:.1f}")
-
- # Assert that higher FILTER-EF generally gives better recall
- low_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=100)
- high_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=5000)
-
- # This might not always be true due to randomness, but generally holds
- # We use a softer assertion to avoid flaky tests
- assert high_ef_recall >= low_ef_recall * 0.8, \
- f"Higher FILTER-EF should generally give better recall: {high_ef_recall:.3f} vs {low_ef_recall:.3f}"
-
- # ==== 4. Complex Filter Expressions ====
- print("\nTesting complex filter expressions...")
-
- # Test a variety of complex expressions
- complex_filters = [
- '.price > 100 and (.category == "electronics" or .category == "furniture")',
- '(.rating > 4 and .in_stock) or (.price < 50 and .views > 1000)',
- '.category in ["electronics", "clothing"] and .price > 200 and .rating >= 3',
- '(.category == "electronics" and .subcategory == "phones") or (.category == "furniture" and .price > 1000)',
- '.year > 2010 and !(.price < 100) and .in_stock'
- ]
-
- print("Expression | Results | Time (ms)")
- print("-----------------------------")
- for i, expr in enumerate(complex_filters):
- try:
- _, _, query_time, result_count = self.test_recall_with_filter(expr)
- print(f"Complex {i+1} | {result_count:7} | {query_time*1000:.1f}")
- except Exception as e:
- print(f"Complex {i+1} | Error: {str(e)}")
-
- # ==== 5. Attribute Type Testing ====
- print("\nTesting different attribute types...")
-
- type_filters = [
- ('.price > 500', "Numeric"),
- ('.category == "books"', "String equality"),
- ('.in_stock', "Boolean"),
- ('.tags in ["sale", "new"]', "Array membership"),
- ('.rating * 2 > 8', "Arithmetic")
- ]
-
- for expr, type_name in type_filters:
- try:
- _, _, query_time, result_count = self.test_recall_with_filter(expr)
- print(f"{type_name:16} | {expr:30} | {result_count:5} results | {query_time*1000:.1f}ms")
- except Exception as e:
- print(f"{type_name:16} | {expr:30} | Error: {str(e)}")
-
- # ==== 6. Filter + Count Interaction ====
- print("\nTesting COUNT parameter with filters...")
-
- filter_expr = '.category == "electronics"'
- counts = [5, 20, 100]
-
- for count in counts:
- query_vec = generate_random_vector(self.dim)
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', count, 'WITHSCORES', 'FILTER', filter_expr])
-
- results = self.redis.execute_command(*cmd_args)
- result_count = len(results) // 2 # Divide by 2 because WITHSCORES returns pairs
-
- # We expect result count to be at most the requested count
- assert result_count <= count, f"Got {result_count} results with COUNT {count}"
- print(f"COUNT {count:3} | Got {result_count:3} results")
-
- # ==== 7. Edge Cases ====
- print("\nTesting edge cases...")
-
- # Test with no matching items
- no_match_expr = '.category == "nonexistent_category"'
- results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
- *[str(x) for x in generate_random_vector(self.dim)],
- 'FILTER', no_match_expr)
- assert len(results) == 0, f"Expected 0 results for non-matching filter, got {len(results)}"
- print(f"No matching items: {len(results)} results (expected 0)")
-
- # Test with invalid filter syntax
- try:
- self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
- *[str(x) for x in generate_random_vector(self.dim)],
- 'FILTER', '.category === "books"') # Triple equals is invalid
- assert False, "Expected error for invalid filter syntax"
- except:
- print("Invalid filter syntax correctly raised an error")
-
- # Test with extremely long complex expression
- long_expr = ' and '.join([f'.rating > {i/10}' for i in range(10)])
- try:
- results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
- *[str(x) for x in generate_random_vector(self.dim)],
- 'FILTER', long_expr)
- print(f"Long expression: {len(results)} results")
- except Exception as e:
- print(f"Long expression error: {str(e)}")
-
- print("\nComprehensive VSIM FILTER tests completed successfully")
-
-
-class VSIMFilterSelectivityTest(TestCase):
- def getname(self):
- return "VSIM FILTER selectivity performance benchmark"
-
- def estimated_runtime(self):
- return 8 # This test might take up to 8 seconds
-
- def setup(self):
- super().setup()
- self.dim = 32
- self.count = 10000
- self.test_key = f"{self.test_key}:selectivity" # Use a different key
-
- def create_vector_with_age_attribute(self, name, age):
- """Create a vector with a specific age attribute"""
- vec = generate_random_vector(self.dim)
- vec_bytes = struct.pack(f'{self.dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
- self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps({"age": age}))
-
- def test(self):
- print("\nRunning VSIM FILTER selectivity benchmark...")
-
- # Create a dataset where we control the exact selectivity
- print(f"Creating controlled dataset with {self.count} vectors...")
-
- # Create vectors with age attributes from 1 to 100
- for i in range(self.count):
- age = (i % 100) + 1 # Ages from 1 to 100
- name = f"{self.test_key}:item:{i}"
- self.create_vector_with_age_attribute(name, age)
-
- # Create a query vector
- query_vec = generate_random_vector(self.dim)
-
- # Test filters with different selectivities
- selectivities = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.99]
- results = []
-
- print("\nSelectivity | Filter | Results | Time (ms)")
- print("--------------------------------------------------")
-
- for target_selectivity in selectivities:
- # Calculate age threshold for desired selectivity
- # For example, age <= 10 gives 10% selectivity
- age_threshold = int(target_selectivity * 100)
- filter_expr = f'.age <= {age_threshold}'
-
- # Run query and measure time
- start_time = time.time()
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr])
-
- results = self.redis.execute_command(*cmd_args)
- query_time = time.time() - start_time
-
- actual_selectivity = len(results) / min(100, int(target_selectivity * self.count))
- print(f"{target_selectivity:.2f} | {filter_expr:15} | {len(results):7} | {query_time*1000:.1f}")
-
- # Add assertion to ensure reasonable performance for different selectivities
- # For very selective queries (1%), we might need more exploration
- if target_selectivity <= 0.05:
- # For very selective queries, ensure we can find some results
- assert len(results) > 0, f"No results found for {filter_expr}"
- else:
- # For less selective queries, performance should be reasonable
- assert query_time < 1.0, f"Query too slow: {query_time:.3f}s for {filter_expr}"
-
- print("\nSelectivity benchmark completed successfully")
-
-
-class VSIMFilterComparisonTest(TestCase):
- def getname(self):
- return "VSIM FILTER EF parameter comparison"
-
- def estimated_runtime(self):
- return 8 # This test might take up to 8 seconds
-
- def setup(self):
- super().setup()
- self.dim = 32
- self.count = 5000
- self.test_key = f"{self.test_key}:efparams" # Use a different key
-
- def create_dataset(self):
- """Create a dataset with specific attribute patterns for testing FILTER-EF"""
- vectors = []
- names = []
-
- # Create vectors with category and quality score attributes
- for i in range(self.count):
- vec = generate_random_vector(self.dim)
- name = f"{self.test_key}:item:{i}"
-
- # Add vector to Redis
- vec_bytes = struct.pack(f'{self.dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
-
- # Create attributes - we want a very selective filter
- # Only 2% of items have category=premium AND quality>90
- category = "premium" if random.random() < 0.1 else random.choice(["standard", "economy", "basic"])
- quality = random.randint(1, 100)
-
- attrs = {
- "id": i,
- "category": category,
- "quality": quality
- }
-
- self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs))
- vectors.append(vec)
- names.append(name)
-
- return vectors, names
-
- def test(self):
- print("\nRunning VSIM FILTER-EF parameter comparison...")
-
- # Create dataset
- vectors, names = self.create_dataset()
-
- # Create a selective filter that matches ~2% of items
- filter_expr = '.category == "premium" and .quality > 90'
-
- # Create query vector
- query_vec = generate_random_vector(self.dim)
-
- # Test different FILTER-EF values
- ef_values = [50, 100, 500, 1000, 5000]
- results = []
-
- print("\nFILTER-EF | Results | Time (ms) | Notes")
- print("---------------------------------------")
-
- baseline_count = None
-
- for ef in ef_values:
- # Run query and measure time
- start_time = time.time()
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr, 'FILTER-EF', ef])
-
- query_results = self.redis.execute_command(*cmd_args)
- query_time = time.time() - start_time
-
- # Set baseline for comparison
- if baseline_count is None:
- baseline_count = len(query_results)
-
- recall_rate = len(query_results) / max(1, baseline_count) if baseline_count > 0 else 1.0
-
- notes = ""
- if ef == 5000:
- notes = "Baseline"
- elif recall_rate < 0.5:
- notes = "Low recall!"
-
- print(f"{ef:9} | {len(query_results):7} | {query_time*1000:.1f} | {notes}")
- results.append((ef, len(query_results), query_time))
-
- # If we have enough results at highest EF, check that recall improves with higher EF
- if results[-1][1] >= 5: # At least 5 results for highest EF
- # Extract result counts
- result_counts = [r[1] for r in results]
-
- # The last result (highest EF) should typically find more results than the first (lowest EF)
- # but we use a soft assertion to avoid flaky tests
- assert result_counts[-1] >= result_counts[0], \
- f"Higher FILTER-EF should find at least as many results: {result_counts[-1]} vs {result_counts[0]}"
-
- print("\nFILTER-EF parameter comparison completed successfully")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py b/examples/redis-unstable/modules/vector-sets/tests/large_scale.py
deleted file mode 100644
index eac5dca..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/large_scale.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-import random
-
-class LargeScale(TestCase):
- def getname(self):
- return "Large Scale Comparison"
-
- def estimated_runtime(self):
- return 10
-
- def test(self):
- dim = 300
- count = 20000
- k = 50
-
- # Fill Redis and get reference data for comparison
- random.seed(42) # Make test deterministic
- data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
-
- # Generate query vector
- query_vec = generate_random_vector(dim)
-
- # Get results from Redis with good exploration factor
- redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec],
- 'COUNT', k, 'WITHSCORES', 'EF', 500)
-
- # Convert Redis results to dict
- redis_results = {}
- for i in range(0, len(redis_raw), 2):
- key = redis_raw[i].decode()
- score = float(redis_raw[i+1])
- redis_results[key] = score
-
- # Get results from linear scan
- linear_results = data.find_k_nearest(query_vec, k)
- linear_items = {name: score for name, score in linear_results}
-
- # Compare overlap
- redis_set = set(redis_results.keys())
- linear_set = set(linear_items.keys())
- overlap = len(redis_set & linear_set)
-
- # If test fails, print comparison for debugging
- if overlap < k * 0.7:
- data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k)
-
- assert overlap >= k * 0.7, \
- f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%"
-
- # Verify scores for common items
- for item in redis_set & linear_set:
- redis_score = redis_results[item]
- linear_score = linear_items[item]
- assert abs(redis_score - linear_score) < 0.01, \
- f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py b/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py
deleted file mode 100644
index d0f3f09..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/memory_usage.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class MemoryUsageTest(TestCase):
- def getname(self):
- return "[regression] MEMORY USAGE with attributes"
-
- def test(self):
- # Generate random vectors
- vec1 = generate_random_vector(4)
- vec2 = generate_random_vector(4)
- vec_bytes1 = struct.pack('4f', *vec1)
- vec_bytes2 = struct.pack('4f', *vec2)
-
- # Add vectors to the key, one with attribute, one without
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
-
- # Get memory usage for the key
- try:
- memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
- # If we got here without exception, the command worked
- assert memory_usage > 0, "MEMORY USAGE should return a positive value"
-
- # Add more attributes to increase complexity
- self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', '{"color":"blue","size":10}')
-
- # Check memory usage again
- new_memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
- assert new_memory_usage > 0, "MEMORY USAGE should still return a positive value after setting attributes"
-
- # Memory usage should be higher after adding attributes
- assert new_memory_usage > memory_usage, "Memory usage increase after adding attributes"
-
- except Exception as e:
- raise AssertionError(f"MEMORY USAGE command failed: {str(e)}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/node_update.py b/examples/redis-unstable/modules/vector-sets/tests/node_update.py
deleted file mode 100644
index 53aa2dd..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/node_update.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-import math
-import random
-
-class VectorUpdateAndClusters(TestCase):
- def getname(self):
- return "VADD vector update with cluster relocation"
-
- def estimated_runtime(self):
- return 2.0 # Should take around 2 seconds
-
- def generate_cluster_vector(self, base_vec, noise=0.1):
- """Generate a vector that's similar to base_vec with some noise."""
- vec = [x + random.gauss(0, noise) for x in base_vec]
- # Normalize
- norm = math.sqrt(sum(x*x for x in vec))
- return [x/norm for x in vec]
-
- def test(self):
- dim = 128
- vectors_per_cluster = 5000
-
- # Create two very different base vectors for our clusters
- cluster1_base = generate_random_vector(dim)
- cluster2_base = [-x for x in cluster1_base] # Opposite direction
-
- # Add vectors from first cluster
- for i in range(vectors_per_cluster):
- vec = self.generate_cluster_vector(cluster1_base)
- vec_bytes = struct.pack(f'{dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
- f'{self.test_key}:cluster1:{i}')
-
- # Add vectors from second cluster
- for i in range(vectors_per_cluster):
- vec = self.generate_cluster_vector(cluster2_base)
- vec_bytes = struct.pack(f'{dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
- f'{self.test_key}:cluster2:{i}')
-
- # Pick a test vector from cluster1
- test_key = f'{self.test_key}:cluster1:0'
-
- # Verify it's in cluster1 using VSIM
- initial_vec = self.generate_cluster_vector(cluster1_base)
- results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in initial_vec],
- 'COUNT', 100, 'WITHSCORES')
-
- # Count how many cluster1 items are in top results
- cluster1_count = sum(1 for i in range(0, len(results), 2)
- if b'cluster1' in results[i])
- assert cluster1_count > 80, "Initial clustering check failed"
-
- # Now update the test vector to be in cluster2
- new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05)
- vec_bytes = struct.pack(f'{dim}f', *new_vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key)
-
- # Verify the embedding was actually updated using VEMB
- emb_result = self.redis.execute_command('VEMB', self.test_key, test_key)
- updated_vec = [float(x) for x in emb_result]
-
- # Verify updated vector matches what we inserted
- dot_product = sum(a*b for a,b in zip(updated_vec, new_vec))
- similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) *
- math.sqrt(sum(x*x for x in new_vec)))
- assert similarity > 0.9, "Vector was not properly updated"
-
- # Verify it's now in cluster2 using VSIM
- results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in cluster2_base],
- 'COUNT', 100, 'WITHSCORES')
-
- # Verify our updated vector is among top results
- found = False
- for i in range(0, len(results), 2):
- if results[i].decode() == test_key:
- found = True
- similarity = float(results[i+1])
- assert similarity > 0.80, f"Updated vector has low similarity: {similarity}"
- break
-
- assert found, "Updated vector not found in cluster2 proximity"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/persistence.py b/examples/redis-unstable/modules/vector-sets/tests/persistence.py
deleted file mode 100644
index 79730f4..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/persistence.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-import random
-
-class HNSWPersistence(TestCase):
- def getname(self):
- return "HNSW Persistence"
-
- def estimated_runtime(self):
- return 30
-
- def _verify_results(self, key, dim, query_vec, reduced_dim=None):
- """Run a query and return results dict"""
- k = 10
- args = ['VSIM', key]
-
- if reduced_dim:
- args.extend(['VALUES', dim])
- args.extend([str(x) for x in query_vec])
- else:
- args.extend(['VALUES', dim])
- args.extend([str(x) for x in query_vec])
-
- args.extend(['COUNT', k, 'WITHSCORES'])
- results = self.redis.execute_command(*args)
-
- results_dict = {}
- for i in range(0, len(results), 2):
- key = results[i].decode()
- score = float(results[i+1])
- results_dict[key] = score
- return results_dict
-
- def test(self):
- # Setup dimensions
- dim = 128
- reduced_dim = 32
- count = 5000
- random.seed(42)
-
- # Create two datasets - one normal and one with dimension reduction
- normal_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:normal", count, dim)
- projected_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:projected",
- count, dim, reduced_dim)
-
- # Generate query vectors we'll use before and after reload
- query_vec_normal = generate_random_vector(dim)
- query_vec_projected = generate_random_vector(dim)
-
- # Get initial results for both sets
- initial_normal = self._verify_results(f"{self.test_key}:normal",
- dim, query_vec_normal)
- initial_projected = self._verify_results(f"{self.test_key}:projected",
- dim, query_vec_projected, reduced_dim)
-
- # Force Redis to save and reload the dataset
- self.redis.execute_command('DEBUG', 'RELOAD')
-
- # Verify results after reload
- reloaded_normal = self._verify_results(f"{self.test_key}:normal",
- dim, query_vec_normal)
- reloaded_projected = self._verify_results(f"{self.test_key}:projected",
- dim, query_vec_projected, reduced_dim)
-
- # Verify normal vectors results
- assert len(initial_normal) == len(reloaded_normal), \
- "Normal vectors: Result count mismatch before/after reload"
-
- for key in initial_normal:
- assert key in reloaded_normal, f"Normal vectors: Missing item after reload: {key}"
- assert abs(initial_normal[key] - reloaded_normal[key]) < 0.0001, \
- f"Normal vectors: Score mismatch for {key}: " + \
- f"before={initial_normal[key]:.6f}, after={reloaded_normal[key]:.6f}"
-
- # Verify projected vectors results
- assert len(initial_projected) == len(reloaded_projected), \
- "Projected vectors: Result count mismatch before/after reload"
-
- for key in initial_projected:
- assert key in reloaded_projected, \
- f"Projected vectors: Missing item after reload: {key}"
- assert abs(initial_projected[key] - reloaded_projected[key]) < 0.0001, \
- f"Projected vectors: Score mismatch for {key}: " + \
- f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}"
-
- self.redis.delete(f"{self.test_key}:normal")
- self.redis.delete(f"{self.test_key}:projected")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/reduce.py b/examples/redis-unstable/modules/vector-sets/tests/reduce.py
deleted file mode 100644
index e39164f..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/reduce.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from test import TestCase, fill_redis_with_vectors, generate_random_vector
-
-class Reduce(TestCase):
- def getname(self):
- return "Dimension Reduction"
-
- def estimated_runtime(self):
- return 0.2
-
- def test(self):
- original_dim = 100
- reduced_dim = 80
- count = 1000
- k = 50 # Number of nearest neighbors to check
-
- # Fill Redis with vectors using REDUCE and get reference data
- data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim)
-
- # Verify dimension is reduced
- dim = self.redis.execute_command('VDIM', self.test_key)
- assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}"
-
- # Generate query vector and get nearest neighbors using Redis
- query_vec = generate_random_vector(original_dim)
- redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES',
- original_dim, *[str(x) for x in query_vec],
- 'COUNT', k, 'WITHSCORES')
-
- # Convert Redis results to dict
- redis_results = {}
- for i in range(0, len(redis_raw), 2):
- key = redis_raw[i].decode()
- score = float(redis_raw[i+1])
- redis_results[key] = score
-
- # Get results from linear scan with original vectors
- linear_results = data.find_k_nearest(query_vec, k)
- linear_items = {name: score for name, score in linear_results}
-
- # Compare overlap between reduced and non-reduced results
- redis_set = set(redis_results.keys())
- linear_set = set(linear_items.keys())
- overlap = len(redis_set & linear_set)
- overlap_ratio = overlap / k
-
- # With random projection, we expect some loss of accuracy but should
- # maintain at least some similarity structure.
- # Note that gaussian distribution is the worse with this test, so
- # in real world practice, things will be better.
- min_expected_overlap = 0.1 # At least 10% overlap in top-k
- assert overlap_ratio >= min_expected_overlap, \
- f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}"
-
- # For items that appear in both results, scores should be reasonably correlated
- common_items = redis_set & linear_set
- for item in common_items:
- redis_score = redis_results[item]
- linear_score = linear_items[item]
- # Allow for some deviation due to dimensionality reduction
- assert abs(redis_score - linear_score) < 0.2, \
- f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
-
- # If test fails, print comparison for debugging
- if overlap_ratio < min_expected_overlap:
- print("\nLow overlap in results. Details:")
- print("\nTop results from linear scan (original vectors):")
- for name, score in linear_results:
- print(f"{name}: {score:.3f}")
- print("\nTop results from Redis (reduced vectors):")
- for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True):
- print(f"{item}: {score:.3f}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/replication.py b/examples/redis-unstable/modules/vector-sets/tests/replication.py
deleted file mode 100644
index 91dfdf7..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/replication.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-import random
-import time
-
-class ComprehensiveReplicationTest(TestCase):
- def getname(self):
- return "Comprehensive Replication Test with mixed operations"
-
- def estimated_runtime(self):
- # This test will take longer than the default 100ms
- return 20.0 # 20 seconds estimate
-
- def test(self):
- # Setup replication between primary and replica
- assert self.setup_replication(), "Failed to setup replication"
-
- # Test parameters
- num_vectors = 5000
- vector_dim = 8
- delete_probability = 0.1
- cas_probability = 0.3
-
- # Keep track of added items for potential deletion
- added_items = []
-
- # Add vectors and occasionally delete
- for i in range(num_vectors):
- # Generate a random vector
- vec = generate_random_vector(vector_dim)
- vec_bytes = struct.pack(f'{vector_dim}f', *vec)
- item_name = f"{self.test_key}:item:{i}"
-
- # Decide whether to use CAS or not
- use_cas = random.random() < cas_probability
-
- if use_cas and added_items:
- # Get an existing item for CAS reference (if available)
- cas_item = random.choice(added_items)
- try:
- # Add with CAS
- result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
- item_name, 'CAS')
- # Only add to our list if actually added (CAS might fail)
- if result == 1:
- added_items.append(item_name)
- except Exception as e:
- print(f" CAS VADD failed: {e}")
- else:
- try:
- # Add without CAS
- result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, item_name)
- # Only add to our list if actually added
- if result == 1:
- added_items.append(item_name)
- except Exception as e:
- print(f" VADD failed: {e}")
-
- # Randomly delete items (with 10% probability)
- if random.random() < delete_probability and added_items:
- try:
- # Select a random item to delete
- item_to_delete = random.choice(added_items)
- # Delete the item using VREM (not VDEL)
- self.redis.execute_command('VREM', self.test_key, item_to_delete)
- # Remove from our list
- added_items.remove(item_to_delete)
- except Exception as e:
- print(f" VREM failed: {e}")
-
- # Allow time for replication to complete
- time.sleep(2.0)
-
- # Verify final VCARD matches
- primary_card = self.redis.execute_command('VCARD', self.test_key)
- replica_card = self.replica.execute_command('VCARD', self.test_key)
- assert primary_card == replica_card, f"Final VCARD mismatch: primary={primary_card}, replica={replica_card}"
-
- # Verify VDIM matches
- primary_dim = self.redis.execute_command('VDIM', self.test_key)
- replica_dim = self.replica.execute_command('VDIM', self.test_key)
- assert primary_dim == replica_dim, f"VDIM mismatch: primary={primary_dim}, replica={replica_dim}"
-
- # Verify digests match using DEBUG DIGEST
- primary_digest = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
- replica_digest = self.replica.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
- assert primary_digest == replica_digest, f"Digest mismatch: primary={primary_digest}, replica={replica_digest}"
-
- # Print summary
- print(f"\n Added and maintained {len(added_items)} vectors with dimension {vector_dim}")
- print(f" Final vector count: {primary_card}")
- print(f" Final digest: {primary_digest[0].decode()}")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/threading_config.py b/examples/redis-unstable/modules/vector-sets/tests/threading_config.py
deleted file mode 100644
index dfc931a..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/threading_config.py
+++ /dev/null
@@ -1,249 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-
-class ThreadingConfigTest(TestCase):
- """
- Test suite for vset-force-single-threaded-execution configuration.
-
- This test validates the behavior of VADD and VSIM commands under different
- threading configurations. The new configuration is MUTABLE and BINARY:
- - false (0): Multi-threaded execution enabled (default)
- - true (1): Force single-threaded execution
-
- Key behaviors tested:
- - VADD with and without CAS option under both threading modes
- - VSIM with and without NOTHREAD option under both threading modes
- - Configuration reading, validation, and runtime modification
- - Thread behavior switching (multi-threaded vs forced single-threaded)
- """
-
- def getname(self):
- return "vset-force-single-threaded-execution configuration testing"
-
- def estimated_runtime(self):
- return 0.5 # Updated for mutable config testing with mode switching
-
- def get_config_value(self):
- """Get current vset-force-single-threaded-execution config value"""
- try:
- result = self.redis.execute_command('CONFIG', 'GET', 'vset-force-single-threaded-execution')
- if len(result) >= 2:
- # Redis returns 'yes'/'no' for boolean configs
- return result[1].decode() if isinstance(result[1], bytes) else result[1]
- return None
- except Exception:
- return None
-
- def set_config_value(self, value):
- """Set vset-force-single-threaded-execution config value"""
- try:
- # Convert boolean to yes/no string
- str_value = 'yes' if value else 'no'
- result = self.redis.execute_command('CONFIG', 'SET', 'vset-force-single-threaded-execution', str_value)
- return result == b'OK' or result == 'OK'
- except Exception as e:
- print(f"Failed to set config: {e}")
- return False
-
- def test_config_access_and_mutability(self):
- """Test 1: Configuration access and mutability"""
- # Get initial value
- initial_value = self.get_config_value()
- assert initial_value is not None, "Should be able to read vset-force-single-threaded-execution config"
- assert initial_value in ['yes', 'no'], f"Config value should be yes/no, got {initial_value}"
-
- # Test mutability by toggling the value
- new_value = 'no' if initial_value == 'yes' else 'yes'
- assert self.set_config_value(new_value == 'yes'), "Should be able to change config value"
-
- # Verify the change
- current_value = self.get_config_value()
- assert current_value == new_value, f"Config should be {new_value}, got {current_value}"
-
- # Restore original value
- assert self.set_config_value(initial_value == 'yes'), "Should be able to restore original value"
-
- return initial_value == 'yes'
-
- def test_vadd_without_cas(self, force_single_threaded=False):
- """Test 2: VADD command without CAS option"""
- # Set threading mode
- self.set_config_value(force_single_threaded)
-
- # Clear test data to avoid dimension conflicts
- self.redis.delete(self.test_key)
-
- dim = 64
- vec = generate_random_vector(dim)
- vec_bytes = struct.pack(f'{dim}f', *vec)
-
- result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
- assert result == 1, f"VADD should return 1 for new item, got {result}"
-
- # Verify the vector was added
- card = self.redis.execute_command('VCARD', self.test_key)
- assert card == 1, f"VCARD should return 1, got {card}"
-
- def test_vadd_with_cas(self, force_single_threaded=False):
- """Test 3: VADD command with CAS option"""
- # Set threading mode
- self.set_config_value(force_single_threaded)
-
- # Clear test data to avoid dimension conflicts
- self.redis.delete(self.test_key)
-
- dim = 64
- vec = generate_random_vector(dim)
- vec_bytes = struct.pack(f'{dim}f', *vec)
-
- # First insertion with CAS should succeed
- result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS')
- assert result == 1, f"First VADD with CAS should return 1, got {result}"
-
- # Second insertion of same item with CAS should return 0
- result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:cas', 'CAS')
- assert result == 0, f"Duplicate VADD with CAS should return 0, got {result}"
-
- def test_vsim_without_nothread(self, force_single_threaded=False):
- """Test 4: VSIM command without NOTHREAD"""
- # Set threading mode
- self.set_config_value(force_single_threaded)
-
- # Clear test data to avoid dimension conflicts
- self.redis.delete(self.test_key)
-
- dim = 64
-
- # Add test vectors
- for i in range(5):
- vec = generate_random_vector(dim)
- vec_bytes = struct.pack(f'{dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}')
-
- # Test VSIM without NOTHREAD
- query_vec = generate_random_vector(dim)
- args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3]
- result = self.redis.execute_command(*args)
-
- assert isinstance(result, list), f"VSIM should return a list, got {type(result)}"
- assert len(result) <= 3, f"VSIM should return at most 3 results, got {len(result)}"
-
- def test_vsim_with_nothread(self, force_single_threaded=False):
- """Test 5: VSIM command with NOTHREAD"""
- # Set threading mode
- self.set_config_value(force_single_threaded)
-
- dim = 64
-
- # Ensure we have vectors to search (use existing vectors from previous test)
- card = self.redis.execute_command('VCARD', self.test_key)
- if card == 0:
- # Add test vectors if none exist
- for i in range(5):
- vec = generate_random_vector(dim)
- vec_bytes = struct.pack(f'{dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}')
-
- # Test VSIM with NOTHREAD
- query_vec = generate_random_vector(dim)
- args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 3, 'NOTHREAD']
- result = self.redis.execute_command(*args)
-
- assert isinstance(result, list), f"VSIM with NOTHREAD should return a list, got {type(result)}"
- assert len(result) <= 3, f"VSIM with NOTHREAD should return at most 3 results, got {len(result)}"
-
- def test_threading_mode_comparison(self):
- """Test 6: Compare behavior between threading modes"""
- dim = 64
-
- # Clear test data
- self.redis.delete(self.test_key)
-
- # Test multi-threaded mode (default)
- self.set_config_value(False) # Multi-threaded
- self.test_vadd_without_cas(False)
- self.test_vadd_with_cas(False)
- multi_threaded_card = self.redis.execute_command('VCARD', self.test_key)
-
- # Clear and test single-threaded mode
- self.redis.delete(self.test_key)
- self.set_config_value(True) # Single-threaded
- self.test_vadd_without_cas(True)
- self.test_vadd_with_cas(True)
- single_threaded_card = self.redis.execute_command('VCARD', self.test_key)
-
- # Both modes should produce same results
- assert multi_threaded_card == single_threaded_card, \
- f"Both modes should produce same results: multi={multi_threaded_card}, single={single_threaded_card}"
-
- def test_nothread_override_behavior(self):
- """Test 7: NOTHREAD option should work regardless of config"""
- dim = 64
-
- # Test with both config modes
- for force_single in [False, True]:
- self.set_config_value(force_single)
- self.redis.delete(self.test_key)
-
- # Add test vectors
- for i in range(3):
- vec = generate_random_vector(dim)
- vec_bytes = struct.pack(f'{dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:{i}')
-
- # NOTHREAD should work regardless of config
- query_vec = generate_random_vector(dim)
- args = ['VSIM', self.test_key, 'VALUES', dim] + [str(x) for x in query_vec] + ['COUNT', 2, 'NOTHREAD']
- result = self.redis.execute_command(*args)
-
- assert isinstance(result, list), f"NOTHREAD should work with force_single={force_single}"
- assert len(result) <= 2, f"NOTHREAD should return ≤2 results with force_single={force_single}"
-
- def test(self):
- """Main test method - runs all threading configuration tests"""
- # Get initial configuration
- initial_force_single = self.test_config_access_and_mutability()
- print(f"Initial vset-force-single-threaded-execution: {'yes' if initial_force_single else 'no'}")
-
- # Clear test data
- self.redis.delete(self.test_key)
-
- # Test both threading modes
- print("Testing multi-threaded mode...")
- self.set_config_value(False)
- self.test_vadd_without_cas(False)
- self.test_vadd_with_cas(False)
- self.test_vsim_without_nothread(False)
- self.test_vsim_with_nothread(False)
-
- print("Testing single-threaded mode...")
- self.set_config_value(True)
- self.test_vadd_without_cas(True)
- self.test_vadd_with_cas(True)
- self.test_vsim_without_nothread(True)
- self.test_vsim_with_nothread(True)
-
- # Test mode comparison and NOTHREAD override
- self.test_threading_mode_comparison()
- self.test_nothread_override_behavior()
-
- # Restore initial configuration
- self.set_config_value(initial_force_single)
-
- # Print summary
- self._print_test_summary(initial_force_single)
-
- def _print_test_summary(self, initial_force_single):
- """Print a summary of what was tested"""
- print(f"\nThreading Configuration Test Summary:")
- print(f" Configuration: vset-force-single-threaded-execution")
- print(f" Type: Boolean, Mutable")
- print(f" Initial value: {'yes' if initial_force_single else 'no'}")
- print(f" Tested modes: Both multi-threaded (no) and single-threaded (yes)")
- print(f" VADD: Works correctly in both modes")
- print(f" VADD with CAS: Works correctly in both modes")
- print(f" VSIM: Works correctly in both modes")
- print(f" NOTHREAD option: Overrides config in both modes")
- print(f" Configuration mutability: ✅ Successfully changed at runtime")
- print(f" All tests passed successfully!")
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py b/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py
deleted file mode 100644
index 3cb3508..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/vadd_cas.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from test import TestCase, generate_random_vector
-import threading
-import struct
-import math
-import time
-import random
-from typing import List, Dict
-
-class ConcurrentCASTest(TestCase):
- def getname(self):
- return "Concurrent VADD with CAS"
-
- def estimated_runtime(self):
- return 1.5
-
- def worker(self, vectors: List[List[float]], start_idx: int, end_idx: int,
- dim: int, results: Dict[str, bool]):
- """Worker thread that adds a subset of vectors using VADD CAS"""
- for i in range(start_idx, end_idx):
- vec = vectors[i]
- name = f"{self.test_key}:item:{i}"
- vec_bytes = struct.pack(f'{dim}f', *vec)
-
- # Try to add the vector with CAS
- try:
- result = self.redis.execute_command('VADD', self.test_key, 'FP32',
- vec_bytes, name, 'CAS')
- results[name] = (result == 1) # Store if it was actually added
- except Exception as e:
- results[name] = False
- print(f"Error adding {name}: {e}")
-
- def verify_vector_similarity(self, vec1: List[float], vec2: List[float]) -> float:
- """Calculate cosine similarity between two vectors"""
- dot_product = sum(a*b for a,b in zip(vec1, vec2))
- norm1 = math.sqrt(sum(x*x for x in vec1))
- norm2 = math.sqrt(sum(x*x for x in vec2))
- return dot_product / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else 0
-
- def test(self):
- # Test parameters
- dim = 128
- total_vectors = 5000
- num_threads = 8
- vectors_per_thread = total_vectors // num_threads
-
- # Generate all vectors upfront
- random.seed(42) # For reproducibility
- vectors = [generate_random_vector(dim) for _ in range(total_vectors)]
-
- # Prepare threads and results dictionary
- threads = []
- results = {} # Will store success/failure for each vector
-
- # Launch threads
- for i in range(num_threads):
- start_idx = i * vectors_per_thread
- end_idx = start_idx + vectors_per_thread if i < num_threads-1 else total_vectors
- thread = threading.Thread(target=self.worker,
- args=(vectors, start_idx, end_idx, dim, results))
- threads.append(thread)
- thread.start()
-
- # Wait for all threads to complete
- for thread in threads:
- thread.join()
-
- # Verify cardinality
- card = self.redis.execute_command('VCARD', self.test_key)
- assert card == total_vectors, \
- f"Expected {total_vectors} elements, but found {card}"
-
- # Verify each vector
- num_verified = 0
- for i in range(total_vectors):
- name = f"{self.test_key}:item:{i}"
-
- # Verify the item was successfully added
- assert results[name], f"Vector {name} was not successfully added"
-
- # Get the stored vector
- stored_vec_raw = self.redis.execute_command('VEMB', self.test_key, name)
- stored_vec = [float(x) for x in stored_vec_raw]
-
- # Verify vector dimensions
- assert len(stored_vec) == dim, \
- f"Stored vector dimension mismatch for {name}: {len(stored_vec)} != {dim}"
-
- # Calculate similarity with original vector
- similarity = self.verify_vector_similarity(vectors[i], stored_vec)
- assert similarity > 0.99, \
- f"Low similarity ({similarity}) for {name}"
-
- num_verified += 1
-
- # Final verification
- assert num_verified == total_vectors, \
- f"Only verified {num_verified} out of {total_vectors} vectors"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vemb.py b/examples/redis-unstable/modules/vector-sets/tests/vemb.py
deleted file mode 100644
index 0f4cf77..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/vemb.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from test import TestCase
-import struct
-import math
-
-class VEMB(TestCase):
- def getname(self):
- return "VEMB Command"
-
- def test(self):
- dim = 4
-
- # Add same vector in both formats
- vec = [1, 0, 0, 0]
- norm = math.sqrt(sum(x*x for x in vec))
- vec = [x/norm for x in vec] # Normalize the vector
-
- # Add using FP32
- vec_bytes = struct.pack(f'{dim}f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
-
- # Add using VALUES
- self.redis.execute_command('VADD', self.test_key, 'VALUES', dim,
- *[str(x) for x in vec], f'{self.test_key}:item:2')
-
- # Get both back with VEMB
- result1 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:1')
- result2 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:2')
-
- retrieved_vec1 = [float(x) for x in result1]
- retrieved_vec2 = [float(x) for x in result2]
-
- # Compare both vectors with original (allow for small quantization errors)
- for i in range(dim):
- assert abs(vec[i] - retrieved_vec1[i]) < 0.01, \
- f"FP32 vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec1[i]}"
- assert abs(vec[i] - retrieved_vec2[i]) < 0.01, \
- f"VALUES vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec2[i]}"
-
- # Test non-existent item
- result = self.redis.execute_command('VEMB', self.test_key, 'nonexistent')
- assert result is None, "Non-existent item should return nil"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vismember.py b/examples/redis-unstable/modules/vector-sets/tests/vismember.py
deleted file mode 100644
index eabebca..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/vismember.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class BasicVISMEMBER(TestCase):
- def getname(self):
- return "VISMEMBER basic functionality"
-
- def test(self):
- # Add multiple vectors to the vector set
- vec1 = generate_random_vector(4)
- vec2 = generate_random_vector(4)
- vec_bytes1 = struct.pack('4f', *vec1)
- vec_bytes2 = struct.pack('4f', *vec2)
-
- # Create item keys
- item1 = f'{self.test_key}:item:1'
- item2 = f'{self.test_key}:item:2'
- nonexistent_item = f'{self.test_key}:item:nonexistent'
-
- # Add the vectors
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, item1)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, item2)
-
- # Test VISMEMBER with existing elements
- result1 = self.redis.execute_command('VISMEMBER', self.test_key, item1)
- assert result1 == 1, f"VISMEMBER should return 1 for existing item, got {result1}"
-
- result2 = self.redis.execute_command('VISMEMBER', self.test_key, item2)
- assert result2 == 1, f"VISMEMBER should return 1 for existing item, got {result2}"
-
- # Test VISMEMBER with non-existent element
- result3 = self.redis.execute_command('VISMEMBER', self.test_key, nonexistent_item)
- assert result3 == 0, f"VISMEMBER should return 0 for non-existent item, got {result3}"
-
- # Test VISMEMBER with non-existent key
- nonexistent_key = f'{self.test_key}_nonexistent'
- result4 = self.redis.execute_command('VISMEMBER', nonexistent_key, item1)
- assert result4 == 0, f"VISMEMBER should return 0 for non-existent key, got {result4}"
-
- # Test VISMEMBER after removing an element
- self.redis.execute_command('VREM', self.test_key, item1)
- result5 = self.redis.execute_command('VISMEMBER', self.test_key, item1)
- assert result5 == 0, f"VISMEMBER should return 0 after element removal, got {result5}"
-
- # Verify item2 still exists
- result6 = self.redis.execute_command('VISMEMBER', self.test_key, item2)
- assert result6 == 1, f"VISMEMBER should still return 1 for remaining item, got {result6}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py b/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py
deleted file mode 100644
index 99d2e9a..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/vrand-ping-pong.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class VRANDMEMBERPingPongRegressionTest(TestCase):
- def getname(self):
- return "[regression] VRANDMEMBER ping-pong"
-
- def test(self):
- """
- This test ensures that when only two vectors exist, VRANDMEMBER
- does not get stuck returning only one of them due to the "ping-pong" issue.
- """
- self.redis.delete(self.test_key) # Clean up before test
- dim = 4
-
- # Add exactly two vectors
- vec1_name = "vec1"
- vec1_data = generate_random_vector(dim)
- self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec1_data, vec1_name)
-
- vec2_name = "vec2"
- vec2_data = generate_random_vector(dim)
- self.redis.execute_command('VADD', self.test_key, 'VALUES', dim, *vec2_data, vec2_name)
-
- # Call VRANDMEMBER many times and check for distribution
- iterations = 100
- results = []
- for _ in range(iterations):
- member = self.redis.execute_command('VRANDMEMBER', self.test_key)
- results.append(member.decode())
-
- # Verify that both members were returned, proving it's not stuck
- unique_results = set(results)
-
- assert len(unique_results) == 2, f"Ping-pong test failed: should have returned 2 unique members, but got {len(unique_results)}."
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py b/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py
deleted file mode 100644
index ca9e006..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/vrandmember.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from test import TestCase, generate_random_vector, fill_redis_with_vectors
-import struct
-
-class VRANDMEMBERTest(TestCase):
- def getname(self):
- return "VRANDMEMBER basic functionality"
-
- def test(self):
- # Test with empty key
- result = self.redis.execute_command('VRANDMEMBER', self.test_key)
- assert result is None, "VRANDMEMBER on non-existent key should return NULL"
-
- result = self.redis.execute_command('VRANDMEMBER', self.test_key, 5)
- assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count on non-existent key should return empty array"
-
- # Fill with vectors
- dim = 4
- count = 100
- data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
-
- # Test single random member
- result = self.redis.execute_command('VRANDMEMBER', self.test_key)
- assert result is not None, "VRANDMEMBER should return a random member"
- assert result.decode() in data.names, "Random member should be in the set"
-
- # Test multiple unique members (positive count)
- positive_count = 10
- result = self.redis.execute_command('VRANDMEMBER', self.test_key, positive_count)
- assert isinstance(result, list), "VRANDMEMBER with positive count should return an array"
- assert len(result) == positive_count, f"Should return {positive_count} members"
-
- # Check for uniqueness
- decoded_results = [r.decode() for r in result]
- assert len(decoded_results) == len(set(decoded_results)), "Results should be unique with positive count"
- for item in decoded_results:
- assert item in data.names, "All returned items should be in the set"
-
- # Test more members than in the set
- result = self.redis.execute_command('VRANDMEMBER', self.test_key, count + 10)
- assert len(result) == count, "Should return only the available members when asking for more than exist"
-
- # Test with duplicates (negative count)
- negative_count = -20
- result = self.redis.execute_command('VRANDMEMBER', self.test_key, negative_count)
- assert isinstance(result, list), "VRANDMEMBER with negative count should return an array"
- assert len(result) == abs(negative_count), f"Should return {abs(negative_count)} members"
-
- # Check that all returned elements are valid
- decoded_results = [r.decode() for r in result]
- for item in decoded_results:
- assert item in data.names, "All returned items should be in the set"
-
- # Test with count = 0 (edge case)
- result = self.redis.execute_command('VRANDMEMBER', self.test_key, 0)
- assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count=0 should return empty array"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vrange.py b/examples/redis-unstable/modules/vector-sets/tests/vrange.py
deleted file mode 100644
index 7e57588..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/vrange.py
+++ /dev/null
@@ -1,113 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class BasicVRANGE(TestCase):
- def getname(self):
- return "VRANGE basic functionality and iteration"
-
- def test(self):
- # Add multiple elements with different names for lexicographical ordering
- elements = [
- "apple", "apricot", "banana", "cherry", "date",
- "elderberry", "fig", "grape", "honeydew", "kiwi",
- "lemon", "mango", "nectarine", "orange", "papaya",
- "quince", "raspberry", "strawberry", "tangerine", "watermelon"
- ]
-
- # Add all elements to the vector set
- for elem in elements:
- vec = generate_random_vector(4)
- vec_bytes = struct.pack('4f', *vec)
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, elem)
-
- # Test 1: Basic range with inclusive boundaries
- result = self.redis.execute_command('VRANGE', self.test_key, '[apple', '[grape', '5')
- result = [r.decode() for r in result]
- assert result == ['apple', 'apricot', 'banana', 'cherry', 'date'], f"Expected first 5 elements from apple, got {result}"
-
- # Test 2: Exclusive start boundary
- result = self.redis.execute_command('VRANGE', self.test_key, '(apple', '[cherry', '10')
- result = [r.decode() for r in result]
- assert result == ['apricot', 'banana', 'cherry'], f"Expected elements after apple up to cherry inclusive, got {result}"
-
- # Test 3: Exclusive end boundary
- result = self.redis.execute_command('VRANGE', self.test_key, '[banana', '(cherry', '10')
- result = [r.decode() for r in result]
- assert result == ['banana'], f"Expected only banana (cherry excluded), got {result}"
-
- # Test 4: Using '-' for minimum element
- result = self.redis.execute_command('VRANGE', self.test_key, '-', '[banana', '10')
- result = [r.decode() for r in result]
- assert result[0] == 'apple', "Should start from the first element"
- assert result[-1] == 'banana', "Should end at banana"
-
- # Test 5: Using '+' for maximum element
- result = self.redis.execute_command('VRANGE', self.test_key, '[raspberry', '+', '10')
- result = [r.decode() for r in result]
- assert 'raspberry' in result and 'strawberry' in result and 'tangerine' in result and 'watermelon' in result, "Should include all elements from raspberry onwards"
-
- # Test 6: Full range with '-' and '+'
- result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '100')
- result = [r.decode() for r in result]
- assert len(result) == len(elements), f"Should return all {len(elements)} elements"
- assert result == sorted(elements), "Elements should be in lexicographical order"
-
- # Test 7: Iterator pattern - verify each element appears exactly once
- seen = set()
- batch_size = 3
- current = '-'
-
- while True:
- if current == '-':
- # First iteration
- result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', str(batch_size))
- else:
- # Subsequent iterations - exclusive start from last element
- result = self.redis.execute_command('VRANGE', self.test_key, f'({current}', '+', str(batch_size))
-
- result = [r.decode() for r in result]
-
- if not result:
- break
-
- # Check no duplicates in this batch
- for elem in result:
- assert elem not in seen, f"Element {elem} appeared more than once"
- seen.add(elem)
-
- # Update current to last element
- current = result[-1]
-
- # Break if we got less than requested (end of set)
- if len(result) < batch_size:
- break
-
- # Verify we saw all elements exactly once
- assert seen == set(elements), f"Iterator should visit all elements exactly once. Missing: {set(elements) - seen}, Extra: {seen - set(elements)}"
-
- # Test 8: Count of 0 returns empty array
- result = self.redis.execute_command('VRANGE', self.test_key, '-', '+', '0')
- assert result == [], f"Count of 0 should return empty array, got {result}"
-
- # Test 9: Range with no matching elements
- result = self.redis.execute_command('VRANGE', self.test_key, '[zebra', '+', '10')
- assert result == [], f"Range beyond all elements should return empty array, got {result}"
-
- # Test 10: Non-existent key
- result = self.redis.execute_command('VRANGE', 'nonexistent_key', '-', '+', '10')
- assert result == [], f"Non-existent key should return empty array, got {result}"
-
- # Test 11: Partial word boundaries
- result = self.redis.execute_command('VRANGE', self.test_key, '[app', '[apr', '10')
- result = [r.decode() for r in result]
- assert 'apple' in result, "Should include 'apple' which starts with 'app'"
- assert 'apricot' not in result, "Should not include 'apricot' as it's >= 'apr'"
-
- # Test 12: Single element range
- result = self.redis.execute_command('VRANGE', self.test_key, '[cherry', '[cherry', '10')
- result = [r.decode() for r in result]
- assert result == ['cherry'], f"Inclusive single element range should return that element, got {result}"
-
- # Test 13: Empty range (start > end)
- result = self.redis.execute_command('VRANGE', self.test_key, '[grape', '[apple', '10')
- assert result == [], f"Range where start > end should return empty array, got {result}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py b/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py
deleted file mode 100644
index 25b9689..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/vsim_limit_efsearch.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-
-class VSIMLimitEFSearch(TestCase):
- def getname(self):
- return "VSIM Limit EF Search"
-
- def estimated_runtime(self):
- return 0.2
-
- def test(self):
- dim = 32
- vec = generate_random_vector(dim)
- vec_bytes = struct.pack(f'{dim}f', *vec)
-
- # Add test vector
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
-
- query_vec = generate_random_vector(dim)
-
- # Test EF upper bound (should accept 1000000)
- result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec], 'EF', 1000000)
- assert isinstance(result, list), "EF=1000000 should be accepted"
-
- # Test EF over limit (should reject > 1000000)
- try:
- self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
- *[str(x) for x in query_vec], 'EF', 1000001)
- assert False, "EF=1000001 should be rejected"
- except Exception as e:
- assert "invalid EF" in str(e), f"Expected EF validation error, got: {e}"
diff --git a/examples/redis-unstable/modules/vector-sets/tests/with.py b/examples/redis-unstable/modules/vector-sets/tests/with.py
deleted file mode 100644
index d14a23f..0000000
--- a/examples/redis-unstable/modules/vector-sets/tests/with.py
+++ /dev/null
@@ -1,214 +0,0 @@
-from test import TestCase, generate_random_vector
-import struct
-import json
-import random
-
-class VSIMWithAttribs(TestCase):
- def getname(self):
- return "VSIM WITHATTRIBS/WITHSCORES functionality testing"
-
- def setup(self):
- super().setup()
- self.dim = 8
- self.count = 20
-
- # Create vectors with attributes
- for i in range(self.count):
- vec = generate_random_vector(self.dim)
- vec_bytes = struct.pack(f'{self.dim}f', *vec)
-
- # Item name
- name = f"{self.test_key}:item:{i}"
-
- # Add to Redis
- self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
-
- # Create and add attribute
- if i % 5 == 0:
- # Every 5th item has no attribute (for testing NULL responses)
- continue
-
- category = random.choice(["electronics", "furniture", "clothing"])
- price = random.randint(50, 1000)
- attrs = {"category": category, "price": price, "id": i}
-
- self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs))
-
- def is_numeric(self, value):
- """Check if a value can be converted to float"""
- try:
- if isinstance(value, (int, float)):
- return True
- if isinstance(value, bytes):
- float(value.decode('utf-8'))
- return True
- if isinstance(value, str):
- float(value)
- return True
- return False
- except (ValueError, TypeError):
- return False
-
- def test(self):
- # Create query vector
- query_vec = generate_random_vector(self.dim)
-
- # Test 1: VSIM with no additional options (should be same for RESP2 and RESP3)
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 5])
-
- results_resp2 = self.redis.execute_command(*cmd_args)
- results_resp3 = self.redis3.execute_command(*cmd_args)
-
- # Both should return simple arrays of item names
- assert len(results_resp2) == 5, f"RESP2: Expected 5 results, got {len(results_resp2)}"
- assert len(results_resp3) == 5, f"RESP3: Expected 5 results, got {len(results_resp3)}"
- assert all(isinstance(item, bytes) for item in results_resp2), "RESP2: Results should be byte strings"
- assert all(isinstance(item, bytes) for item in results_resp3), "RESP3: Results should be byte strings"
-
- # Test 2: VSIM with WITHSCORES only
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 5, 'WITHSCORES'])
-
- results_resp2 = self.redis.execute_command(*cmd_args)
- results_resp3 = self.redis3.execute_command(*cmd_args)
-
- # RESP2: Should be a flat array alternating item, score
- assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}"
- for i in range(0, len(results_resp2), 2):
- assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes"
- assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric"
- score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1]
- assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1"
-
- # RESP3: Should be a dict/map with items as keys and scores as DIRECT values (not arrays)
- assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}"
- assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}"
- for item, score in results_resp3.items():
- assert isinstance(item, bytes), f"RESP3: Key should be bytes"
- # Score should be a direct value, NOT an array
- assert not isinstance(score, list), f"RESP3: With single WITH option, value should not be array"
- assert self.is_numeric(score), f"RESP3: Score should be numeric, got {type(score)}"
- score_val = float(score) if isinstance(score, bytes) else score
- assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1"
-
- # Test 3: VSIM with WITHATTRIBS only
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 5, 'WITHATTRIBS'])
-
- results_resp2 = self.redis.execute_command(*cmd_args)
- results_resp3 = self.redis3.execute_command(*cmd_args)
-
- # RESP2: Should be a flat array alternating item, attribute
- assert len(results_resp2) == 10, f"RESP2: Expected 10 elements (5 items × 2), got {len(results_resp2)}"
- for i in range(0, len(results_resp2), 2):
- assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes"
- attr = results_resp2[i+1]
- assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+1} should be None or bytes"
- if attr is not None:
- # Verify it's valid JSON
- json.loads(attr)
-
- # RESP3: Should be a dict/map with items as keys and attributes as DIRECT values (not arrays)
- assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}"
- assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}"
- for item, attr in results_resp3.items():
- assert isinstance(item, bytes), f"RESP3: Key should be bytes"
- # Attribute should be a direct value, NOT an array
- assert not isinstance(attr, list), f"RESP3: With single WITH option, value should not be array"
- assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes"
- if attr is not None:
- # Verify it's valid JSON
- json.loads(attr)
-
- # Test 4: VSIM with both WITHSCORES and WITHATTRIBS
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS'])
-
- results_resp2 = self.redis.execute_command(*cmd_args)
- results_resp3 = self.redis3.execute_command(*cmd_args)
-
- # RESP2: Should be a flat array with pattern: item, score, attribute
- assert len(results_resp2) == 15, f"RESP2: Expected 15 elements (5 items × 3), got {len(results_resp2)}"
- for i in range(0, len(results_resp2), 3):
- assert isinstance(results_resp2[i], bytes), f"RESP2: Item at {i} should be bytes"
- assert self.is_numeric(results_resp2[i+1]), f"RESP2: Score at {i+1} should be numeric"
- score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1]
- assert 0 <= score <= 1, f"RESP2: Score {score} should be between 0 and 1"
- attr = results_resp2[i+2]
- assert attr is None or isinstance(attr, bytes), f"RESP2: Attribute at {i+2} should be None or bytes"
-
- # RESP3: Should be a dict where each value is a 2-element array [score, attribute]
- assert isinstance(results_resp3, dict), f"RESP3: Expected dict, got {type(results_resp3)}"
- assert len(results_resp3) == 5, f"RESP3: Expected 5 entries, got {len(results_resp3)}"
- for item, value in results_resp3.items():
- assert isinstance(item, bytes), f"RESP3: Key should be bytes"
- # With BOTH options, value MUST be an array
- assert isinstance(value, list), f"RESP3: With both WITH options, value should be a list, got {type(value)}"
- assert len(value) == 2, f"RESP3: Value should have 2 elements [score, attr], got {len(value)}"
-
- score, attr = value
- assert self.is_numeric(score), f"RESP3: Score should be numeric"
- score_val = float(score) if isinstance(score, bytes) else score
- assert 0 <= score_val <= 1, f"RESP3: Score {score_val} should be between 0 and 1"
- assert attr is None or isinstance(attr, bytes), f"RESP3: Attribute should be None or bytes"
-
- # Test 5: Verify consistency - same items returned in same order
- cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args.extend([str(x) for x in query_vec])
- cmd_args.extend(['COUNT', 5, 'WITHSCORES', 'WITHATTRIBS'])
-
- results_resp2 = self.redis.execute_command(*cmd_args)
- results_resp3 = self.redis3.execute_command(*cmd_args)
-
- # Extract items from RESP2 (every 3rd element starting from 0)
- items_resp2 = [results_resp2[i] for i in range(0, len(results_resp2), 3)]
-
- # Extract items from RESP3 (keys of the dict)
- items_resp3 = list(results_resp3.keys())
-
- # Verify same items returned
- assert set(items_resp2) == set(items_resp3), "RESP2 and RESP3 should return the same items"
-
- # Build a mapping from items to scores and attributes for comparison
- data_resp2 = {}
- for i in range(0, len(results_resp2), 3):
- item = results_resp2[i]
- score = float(results_resp2[i+1]) if isinstance(results_resp2[i+1], bytes) else results_resp2[i+1]
- attr = results_resp2[i+2]
- data_resp2[item] = (score, attr)
-
- data_resp3 = {}
- for item, value in results_resp3.items():
- score = float(value[0]) if isinstance(value[0], bytes) else value[0]
- attr = value[1]
- data_resp3[item] = (score, attr)
-
- # Verify scores and attributes match for each item
- for item in data_resp2:
- score_resp2, attr_resp2 = data_resp2[item]
- score_resp3, attr_resp3 = data_resp3[item]
-
- assert abs(score_resp2 - score_resp3) < 0.0001, \
- f"Scores for {item} don't match: RESP2={score_resp2}, RESP3={score_resp3}"
- assert attr_resp2 == attr_resp3, \
- f"Attributes for {item} don't match: RESP2={attr_resp2}, RESP3={attr_resp3}"
-
- # Test 6: Test ordering of WITHSCORES and WITHATTRIBS doesn't matter
- cmd_args1 = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args1.extend([str(x) for x in query_vec])
- cmd_args1.extend(['COUNT', 3, 'WITHSCORES', 'WITHATTRIBS'])
-
- cmd_args2 = ['VSIM', self.test_key, 'VALUES', self.dim]
- cmd_args2.extend([str(x) for x in query_vec])
- cmd_args2.extend(['COUNT', 3, 'WITHATTRIBS', 'WITHSCORES']) # Reversed order
-
- results1_resp3 = self.redis3.execute_command(*cmd_args1)
- results2_resp3 = self.redis3.execute_command(*cmd_args2)
-
- # Both should return the same structure
- assert results1_resp3 == results2_resp3, "Order of WITH options shouldn't matter"