mirror of https://mirror.osredm.com/root/redis.git
57 lines
2.0 KiB
Python
57 lines
2.0 KiB
Python
from test import TestCase, fill_redis_with_vectors, generate_random_vector
|
|
import random
|
|
|
|
class LargeScale(TestCase):
|
|
def getname(self):
|
|
return "Large Scale Comparison"
|
|
|
|
def estimated_runtime(self):
|
|
return 10
|
|
|
|
def test(self):
|
|
dim = 300
|
|
count = 20000
|
|
k = 50
|
|
|
|
# Fill Redis and get reference data for comparison
|
|
random.seed(42) # Make test deterministic
|
|
data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
|
|
|
|
# Generate query vector
|
|
query_vec = generate_random_vector(dim)
|
|
|
|
# Get results from Redis with good exploration factor
|
|
redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
|
|
*[str(x) for x in query_vec],
|
|
'COUNT', k, 'WITHSCORES', 'EF', 500)
|
|
|
|
# Convert Redis results to dict
|
|
redis_results = {}
|
|
for i in range(0, len(redis_raw), 2):
|
|
key = redis_raw[i].decode()
|
|
score = float(redis_raw[i+1])
|
|
redis_results[key] = score
|
|
|
|
# Get results from linear scan
|
|
linear_results = data.find_k_nearest(query_vec, k)
|
|
linear_items = {name: score for name, score in linear_results}
|
|
|
|
# Compare overlap
|
|
redis_set = set(redis_results.keys())
|
|
linear_set = set(linear_items.keys())
|
|
overlap = len(redis_set & linear_set)
|
|
|
|
# If test fails, print comparison for debugging
|
|
if overlap < k * 0.7:
|
|
data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k)
|
|
|
|
assert overlap >= k * 0.7, \
|
|
f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%"
|
|
|
|
# Verify scores for common items
|
|
for item in redis_set & linear_set:
|
|
redis_score = redis_results[item]
|
|
linear_score = linear_items[item]
|
|
assert abs(redis_score - linear_score) < 0.01, \
|
|
f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
|