redis/tests/large_scale.py

57 lines
2.0 KiB
Python

from test import TestCase, fill_redis_with_vectors, generate_random_vector
import random
class LargeScale(TestCase):
def getname(self):
return "Large Scale Comparison"
def estimated_runtime(self):
return 10
def test(self):
dim = 300
count = 20000
k = 50
# Fill Redis and get reference data for comparison
random.seed(42) # Make test deterministic
data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
# Generate query vector
query_vec = generate_random_vector(dim)
# Get results from Redis with good exploration factor
redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
*[str(x) for x in query_vec],
'COUNT', k, 'WITHSCORES', 'EF', 500)
# Convert Redis results to dict
redis_results = {}
for i in range(0, len(redis_raw), 2):
key = redis_raw[i].decode()
score = float(redis_raw[i+1])
redis_results[key] = score
# Get results from linear scan
linear_results = data.find_k_nearest(query_vec, k)
linear_items = {name: score for name, score in linear_results}
# Compare overlap
redis_set = set(redis_results.keys())
linear_set = set(linear_items.keys())
overlap = len(redis_set & linear_set)
# If test fails, print comparison for debugging
if overlap < k * 0.7:
data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k)
assert overlap >= k * 0.7, \
f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%"
# Verify scores for common items
for item in redis_set & linear_set:
redis_score = redis_results[item]
linear_score = linear_items[item]
assert abs(redis_score - linear_score) < 0.01, \
f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"