Test: FILTER integration tests, work in progress.

2025-03-02 12:03:49 +01:00 · 2025-03-02 12:03:49 +01:00 · c8b6cbc6e1
parent 1cb927aef6
commit c8b6cbc6e1
1 changed files with 555 additions and 0 deletions
--- a/tests/filter_int.py
+++ b/tests/filter_int.py
@ -0,0 +1,555 @@
+from test import TestCase, generate_random_vector
+import struct
+import random
+import math
+import json
+import time
+
+class VSIMFilterAdvanced(TestCase):
+    def getname(self):
+        return "VSIM FILTER comprehensive functionality testing"
+
+    def estimated_runtime(self):
+        return 15  # This test might take up to 15 seconds for the large dataset
+
+    def setup(self):
+        super().setup()
+        self.dim = 32        # Vector dimension
+        self.count = 5000    # Number of vectors for large tests
+        self.small_count = 50 # Number of vectors for small/quick tests
+
+        # Categories for attributes
+        self.categories = ["electronics", "furniture", "clothing", "books", "food"]
+        self.cities = ["New York", "London", "Tokyo", "Paris", "Berlin", "Sydney", "Toronto", "Singapore"]
+        self.price_ranges = [(10, 50), (50, 200), (200, 1000), (1000, 5000)]
+        self.years = list(range(2000, 2025))
+
+    def create_attributes(self, index):
+        """Create realistic attributes for a vector"""
+        category = random.choice(self.categories)
+        city = random.choice(self.cities)
+        min_price, max_price = random.choice(self.price_ranges)
+        price = round(random.uniform(min_price, max_price), 2)
+        year = random.choice(self.years)
+        in_stock = random.random() > 0.3  # 70% chance of being in stock
+        rating = round(random.uniform(1, 5), 1)
+        views = int(random.expovariate(1/1000))  # Exponential distribution for page views
+        tags = random.sample(["popular", "sale", "new", "limited", "exclusive", "clearance"],
+                           k=random.randint(0, 3))
+
+        # Add some specific patterns for testing
+        # Every 10th item has a specific property combination for testing
+        is_premium = (index % 10 == 0)
+
+        # Create attributes dictionary
+        attrs = {
+            "id": index,
+            "category": category,
+            "location": city,
+            "price": price,
+            "year": year,
+            "in_stock": in_stock,
+            "rating": rating,
+            "views": views,
+            "tags": tags
+        }
+
+        if is_premium:
+            attrs["is_premium"] = True
+            attrs["special_features"] = ["premium", "warranty", "support"]
+
+        # Add sub-categories for more complex filters
+        if category == "electronics":
+            attrs["subcategory"] = random.choice(["phones", "computers", "cameras", "audio"])
+        elif category == "furniture":
+            attrs["subcategory"] = random.choice(["chairs", "tables", "sofas", "beds"])
+        elif category == "clothing":
+            attrs["subcategory"] = random.choice(["shirts", "pants", "dresses", "shoes"])
+
+        # Add some intentionally missing fields for testing
+        if random.random() > 0.9:  # 10% chance of missing price
+            del attrs["price"]
+
+        # Some items have promotion field
+        if random.random() > 0.7:  # 30% chance of having a promotion
+            attrs["promotion"] = random.choice(["discount", "bundle", "gift"])
+
+        # Create invalid JSON for a small percentage of vectors
+        if random.random() > 0.98:  # 2% chance of having invalid JSON
+            return "{{invalid json}}"
+
+        return json.dumps(attrs)
+
+    def create_vectors_with_attributes(self, key, count):
+        """Create vectors and add attributes to them"""
+        vectors = []
+        names = []
+        attribute_map = {}  # To store attributes for verification
+
+        # Create vectors
+        for i in range(count):
+            vec = generate_random_vector(self.dim)
+            vectors.append(vec)
+            name = f"{key}:item:{i}"
+            names.append(name)
+
+            # Add to Redis
+            vec_bytes = struct.pack(f'{self.dim}f', *vec)
+            self.redis.execute_command('VADD', key, 'FP32', vec_bytes, name)
+
+            # Create and add attributes
+            attrs = self.create_attributes(i)
+            self.redis.execute_command('VSETATTR', key, name, attrs)
+
+            # Store attributes for later verification
+            try:
+                attribute_map[name] = json.loads(attrs) if '{' in attrs else None
+            except json.JSONDecodeError:
+                attribute_map[name] = None
+
+        return vectors, names, attribute_map
+
+    def filter_linear_search(self, vectors, names, query_vector, filter_expr, attribute_map, k=10):
+        """Perform a linear search with filtering for verification"""
+        similarities = []
+        query_norm = math.sqrt(sum(x*x for x in query_vector))
+
+        if query_norm == 0:
+            return []
+
+        for i, vec in enumerate(vectors):
+            name = names[i]
+            attributes = attribute_map.get(name)
+
+            # Skip if doesn't match filter
+            if not self.matches_filter(attributes, filter_expr):
+                continue
+
+            vec_norm = math.sqrt(sum(x*x for x in vec))
+            if vec_norm == 0:
+                continue
+
+            dot_product = sum(a*b for a,b in zip(query_vector, vec))
+            cosine_sim = dot_product / (query_norm * vec_norm)
+            distance = 1.0 - cosine_sim
+            redis_similarity = 1.0 - (distance/2.0)
+            similarities.append((name, redis_similarity))
+
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        return similarities[:k]
+
+    def matches_filter(self, attributes, filter_expr):
+        """Simple filter matching for verification - handles basic expressions"""
+        if attributes is None:
+            return False  # No attributes or invalid JSON
+
+        # This is a simplified implementation - in a real test we would implement
+        # a proper expression parser for verification, but for this example we'll
+        # use a set of common patterns that match our test cases
+
+        if filter_expr == '.category == "electronics"':
+            return attributes.get('category') == 'electronics'
+        elif filter_expr == '.price > 1000':
+            return attributes.get('price', 0) > 1000
+        elif filter_expr == '.in_stock':
+            return attributes.get('in_stock', False)
+        elif filter_expr == '.rating >= 4':
+            return attributes.get('rating', 0) >= 4
+        elif filter_expr == '.category == "electronics" and .price < 500':
+            return (attributes.get('category') == 'electronics' and
+                    attributes.get('price', float('inf')) < 500)
+        elif filter_expr == '.is_premium':
+            return attributes.get('is_premium', False)
+        elif filter_expr == '.price > 100 and .price < 1000':
+            price = attributes.get('price', 0)
+            return price > 100 and price < 1000
+
+        # Default case - we can't parse this filter
+        return True
+
+    def safe_decode(self,item):
+        return item.decode() if isinstance(item, bytes) else item
+
+    def calculate_recall(self, redis_results, linear_results, k=10):
+        """Calculate recall (percentage of correct results retrieved)"""
+        redis_set = set(self.safe_decode(item) for item in redis_results)
+        linear_set = set(item[0] for item in linear_results[:k])
+
+        if not linear_set:
+            return 1.0  # If no linear results, consider it perfect recall
+
+        intersection = redis_set.intersection(linear_set)
+        return len(intersection) / len(linear_set)
+
+    def test_recall_with_filter(self, filter_expr, ef=500, filter_ef=None):
+        """Test recall for a given filter expression"""
+        # Create query vector
+        query_vec = generate_random_vector(self.dim)
+
+        # First, get ground truth using linear scan
+        linear_results = self.filter_linear_search(
+            self.vectors, self.names, query_vec, filter_expr, self.attribute_map, k=50)
+
+        # Calculate true selectivity from ground truth
+        true_selectivity = len(linear_results) / len(self.names) if self.names else 0
+
+        # Perform Redis search with filter
+        cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+        cmd_args.extend([str(x) for x in query_vec])
+        cmd_args.extend(['COUNT', 50, 'WITHSCORES', 'EF', ef, 'FILTER', filter_expr])
+        if filter_ef:
+            cmd_args.extend(['FILTER-EF', filter_ef])
+
+        start_time = time.time()
+        redis_results = self.redis.execute_command(*cmd_args)
+        query_time = time.time() - start_time
+
+        # Convert Redis results to dict
+        redis_items = {}
+        for i in range(0, len(redis_results), 2):
+            key = redis_results[i].decode() if isinstance(redis_results[i], bytes) else redis_results[i]
+            score = float(redis_results[i+1])
+            redis_items[key] = score
+
+        # Calculate metrics
+        recall = self.calculate_recall(redis_items.keys(), linear_results)
+        selectivity = len(redis_items) / len(self.names) if redis_items else 0
+
+        # Compare against the true selectivity from linear scan
+        assert abs(selectivity - true_selectivity) < 0.1, \
+            f"Redis selectivity {selectivity:.3f} differs significantly from ground truth {true_selectivity:.3f}"
+
+        # We expect high recall for standard parameters
+        if ef >= 500 and (filter_ef is None or filter_ef >= 1000):
+            assert recall >= 0.7, \
+                f"Low recall {recall:.2f} for filter '{filter_expr}'"
+
+        return recall, selectivity, query_time, len(redis_items)
+
+    def test(self):
+        print(f"\nRunning comprehensive VSIM FILTER tests...")
+
+        # Create a larger dataset for testing
+        print(f"Creating dataset with {self.count} vectors and attributes...")
+        self.vectors, self.names, self.attribute_map = self.create_vectors_with_attributes(
+            self.test_key, self.count)
+
+        # ==== 1. Recall and Precision Testing ====
+        print("Testing recall for various filters...")
+
+        # Test basic filters with different selectivity
+        results = {}
+        results["category"] = self.test_recall_with_filter('.category == "electronics"')
+        results["price_high"] = self.test_recall_with_filter('.price > 1000')
+        results["in_stock"] = self.test_recall_with_filter('.in_stock')
+        results["rating"] = self.test_recall_with_filter('.rating >= 4')
+        results["complex1"] = self.test_recall_with_filter('.category == "electronics" and .price < 500')
+
+        print("Filter | Recall | Selectivity | Time (ms) | Results")
+        print("----------------------------------------------------")
+        for name, (recall, selectivity, time_ms, count) in results.items():
+            print(f"{name:7} | {recall:.3f} | {selectivity:.3f} | {time_ms*1000:.1f} | {count}")
+
+        # ==== 2. Filter Selectivity Performance ====
+        print("\nTesting filter selectivity performance...")
+
+        # High selectivity (very few matches)
+        high_sel_recall, _, high_sel_time, _ = self.test_recall_with_filter('.is_premium')
+
+        # Medium selectivity
+        med_sel_recall, _, med_sel_time, _ = self.test_recall_with_filter('.price > 100 and .price < 1000')
+
+        # Low selectivity (many matches)
+        low_sel_recall, _, low_sel_time, _ = self.test_recall_with_filter('.year > 2000')
+
+        print(f"High selectivity recall: {high_sel_recall:.3f}, time: {high_sel_time*1000:.1f}ms")
+        print(f"Med selectivity recall: {med_sel_recall:.3f}, time: {med_sel_time*1000:.1f}ms")
+        print(f"Low selectivity recall: {low_sel_recall:.3f}, time: {low_sel_time*1000:.1f}ms")
+
+        # ==== 3. FILTER-EF Parameter Testing ====
+        print("\nTesting FILTER-EF parameter...")
+
+        # Test with different FILTER-EF values
+        filter_expr = '.category == "electronics" and .price > 200'
+        ef_values = [100, 500, 2000, 5000]
+
+        print("FILTER-EF | Recall | Time (ms)")
+        print("-----------------------------")
+        for filter_ef in ef_values:
+            recall, _, query_time, _ = self.test_recall_with_filter(
+                filter_expr, ef=500, filter_ef=filter_ef)
+            print(f"{filter_ef:9} | {recall:.3f} | {query_time*1000:.1f}")
+
+        # Assert that higher FILTER-EF generally gives better recall
+        low_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=100)
+        high_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=5000)
+
+        # This might not always be true due to randomness, but generally holds
+        # We use a softer assertion to avoid flaky tests
+        assert high_ef_recall >= low_ef_recall * 0.8, \
+            f"Higher FILTER-EF should generally give better recall: {high_ef_recall:.3f} vs {low_ef_recall:.3f}"
+
+        # ==== 4. Complex Filter Expressions ====
+        print("\nTesting complex filter expressions...")
+
+        # Test a variety of complex expressions
+        complex_filters = [
+            '.price > 100 and (.category == "electronics" or .category == "furniture")',
+            '(.rating > 4 and .in_stock) or (.price < 50 and .views > 1000)',
+            '.category in ["electronics", "clothing"] and .price > 200 and .rating >= 3',
+            '(.category == "electronics" and .subcategory == "phones") or (.category == "furniture" and .price > 1000)',
+            '.year > 2010 and !(.price < 100) and .in_stock'
+        ]
+
+        print("Expression | Results | Time (ms)")
+        print("-----------------------------")
+        for i, expr in enumerate(complex_filters):
+            try:
+                _, _, query_time, result_count = self.test_recall_with_filter(expr)
+                print(f"Complex {i+1} | {result_count:7} | {query_time*1000:.1f}")
+            except Exception as e:
+                print(f"Complex {i+1} | Error: {str(e)}")
+
+        # ==== 5. Attribute Type Testing ====
+        print("\nTesting different attribute types...")
+
+        type_filters = [
+            ('.price > 500', "Numeric"),
+            ('.category == "books"', "String equality"),
+            ('.in_stock', "Boolean"),
+            ('.tags in ["sale", "new"]', "Array membership"),
+            ('.rating * 2 > 8', "Arithmetic")
+        ]
+
+        for expr, type_name in type_filters:
+            try:
+                _, _, query_time, result_count = self.test_recall_with_filter(expr)
+                print(f"{type_name:16} | {expr:30} | {result_count:5} results | {query_time*1000:.1f}ms")
+            except Exception as e:
+                print(f"{type_name:16} | {expr:30} | Error: {str(e)}")
+
+        # ==== 6. Filter + Count Interaction ====
+        print("\nTesting COUNT parameter with filters...")
+
+        filter_expr = '.category == "electronics"'
+        counts = [5, 20, 100]
+
+        for count in counts:
+            query_vec = generate_random_vector(self.dim)
+            cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+            cmd_args.extend([str(x) for x in query_vec])
+            cmd_args.extend(['COUNT', count, 'WITHSCORES', 'FILTER', filter_expr])
+
+            results = self.redis.execute_command(*cmd_args)
+            result_count = len(results) // 2  # Divide by 2 because WITHSCORES returns pairs
+
+            # We expect result count to be at most the requested count
+            assert result_count <= count, f"Got {result_count} results with COUNT {count}"
+            print(f"COUNT {count:3} | Got {result_count:3} results")
+
+        # ==== 7. Edge Cases ====
+        print("\nTesting edge cases...")
+
+        # Test with no matching items
+        no_match_expr = '.category == "nonexistent_category"'
+        results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+                                           *[str(x) for x in generate_random_vector(self.dim)],
+                                           'FILTER', no_match_expr)
+        assert len(results) == 0, f"Expected 0 results for non-matching filter, got {len(results)}"
+        print(f"No matching items: {len(results)} results (expected 0)")
+
+        # Test with invalid filter syntax
+        try:
+            self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+                                     *[str(x) for x in generate_random_vector(self.dim)],
+                                     'FILTER', '.category === "books"')  # Triple equals is invalid
+            assert False, "Expected error for invalid filter syntax"
+        except:
+            print("Invalid filter syntax correctly raised an error")
+
+        # Test with extremely long complex expression
+        long_expr = ' and '.join([f'.rating > {i/10}' for i in range(10)])
+        try:
+            results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
+                                               *[str(x) for x in generate_random_vector(self.dim)],
+                                               'FILTER', long_expr)
+            print(f"Long expression: {len(results)} results")
+        except Exception as e:
+            print(f"Long expression error: {str(e)}")
+
+        print("\nComprehensive VSIM FILTER tests completed successfully")
+
+
+class VSIMFilterSelectivityTest(TestCase):
+    def getname(self):
+        return "VSIM FILTER selectivity performance benchmark"
+
+    def estimated_runtime(self):
+        return 8  # This test might take up to 8 seconds
+
+    def setup(self):
+        super().setup()
+        self.dim = 32
+        self.count = 10000
+        self.test_key = f"{self.test_key}:selectivity"  # Use a different key
+
+    def create_vector_with_age_attribute(self, name, age):
+        """Create a vector with a specific age attribute"""
+        vec = generate_random_vector(self.dim)
+        vec_bytes = struct.pack(f'{self.dim}f', *vec)
+        self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
+        self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps({"age": age}))
+
+    def test(self):
+        print("\nRunning VSIM FILTER selectivity benchmark...")
+
+        # Create a dataset where we control the exact selectivity
+        print(f"Creating controlled dataset with {self.count} vectors...")
+
+        # Create vectors with age attributes from 1 to 100
+        for i in range(self.count):
+            age = (i % 100) + 1  # Ages from 1 to 100
+            name = f"{self.test_key}:item:{i}"
+            self.create_vector_with_age_attribute(name, age)
+
+        # Create a query vector
+        query_vec = generate_random_vector(self.dim)
+
+        # Test filters with different selectivities
+        selectivities = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.99]
+        results = []
+
+        print("\nSelectivity | Filter          | Results | Time (ms)")
+        print("--------------------------------------------------")
+
+        for target_selectivity in selectivities:
+            # Calculate age threshold for desired selectivity
+            # For example, age <= 10 gives 10% selectivity
+            age_threshold = int(target_selectivity * 100)
+            filter_expr = f'.age <= {age_threshold}'
+
+            # Run query and measure time
+            start_time = time.time()
+            cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+            cmd_args.extend([str(x) for x in query_vec])
+            cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr])
+
+            results = self.redis.execute_command(*cmd_args)
+            query_time = time.time() - start_time
+
+            actual_selectivity = len(results) / min(100, int(target_selectivity * self.count))
+            print(f"{target_selectivity:.2f}      | {filter_expr:15} | {len(results):7} | {query_time*1000:.1f}")
+
+            # Add assertion to ensure reasonable performance for different selectivities
+            # For very selective queries (1%), we might need more exploration
+            if target_selectivity <= 0.05:
+                # For very selective queries, ensure we can find some results
+                assert len(results) > 0, f"No results found for {filter_expr}"
+            else:
+                # For less selective queries, performance should be reasonable
+                assert query_time < 1.0, f"Query too slow: {query_time:.3f}s for {filter_expr}"
+
+        print("\nSelectivity benchmark completed successfully")
+
+
+class VSIMFilterComparisonTest(TestCase):
+    def getname(self):
+        return "VSIM FILTER EF parameter comparison"
+
+    def estimated_runtime(self):
+        return 8  # This test might take up to 8 seconds
+
+    def setup(self):
+        super().setup()
+        self.dim = 32
+        self.count = 5000
+        self.test_key = f"{self.test_key}:efparams"  # Use a different key
+
+    def create_dataset(self):
+        """Create a dataset with specific attribute patterns for testing FILTER-EF"""
+        vectors = []
+        names = []
+
+        # Create vectors with category and quality score attributes
+        for i in range(self.count):
+            vec = generate_random_vector(self.dim)
+            name = f"{self.test_key}:item:{i}"
+
+            # Add vector to Redis
+            vec_bytes = struct.pack(f'{self.dim}f', *vec)
+            self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
+
+            # Create attributes - we want a very selective filter
+            # Only 2% of items have category=premium AND quality>90
+            category = "premium" if random.random() < 0.1 else random.choice(["standard", "economy", "basic"])
+            quality = random.randint(1, 100)
+
+            attrs = {
+                "id": i,
+                "category": category,
+                "quality": quality
+            }
+
+            self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs))
+            vectors.append(vec)
+            names.append(name)
+
+        return vectors, names
+
+    def test(self):
+        print("\nRunning VSIM FILTER-EF parameter comparison...")
+
+        # Create dataset
+        vectors, names = self.create_dataset()
+
+        # Create a selective filter that matches ~2% of items
+        filter_expr = '.category == "premium" and .quality > 90'
+
+        # Create query vector
+        query_vec = generate_random_vector(self.dim)
+
+        # Test different FILTER-EF values
+        ef_values = [50, 100, 500, 1000, 5000]
+        results = []
+
+        print("\nFILTER-EF | Results | Time (ms) | Notes")
+        print("---------------------------------------")
+
+        baseline_count = None
+
+        for ef in ef_values:
+            # Run query and measure time
+            start_time = time.time()
+            cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
+            cmd_args.extend([str(x) for x in query_vec])
+            cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr, 'FILTER-EF', ef])
+
+            query_results = self.redis.execute_command(*cmd_args)
+            query_time = time.time() - start_time
+
+            # Set baseline for comparison
+            if baseline_count is None:
+                baseline_count = len(query_results)
+
+            recall_rate = len(query_results) / max(1, baseline_count) if baseline_count > 0 else 1.0
+
+            notes = ""
+            if ef == 5000:
+                notes = "Baseline"
+            elif recall_rate < 0.5:
+                notes = "Low recall!"
+
+            print(f"{ef:9} | {len(query_results):7} | {query_time*1000:.1f} | {notes}")
+            results.append((ef, len(query_results), query_time))
+
+        # If we have enough results at highest EF, check that recall improves with higher EF
+        if results[-1][1] >= 5:  # At least 5 results for highest EF
+            # Extract result counts
+            result_counts = [r[1] for r in results]
+
+            # The last result (highest EF) should typically find more results than the first (lowest EF)
+            # but we use a soft assertion to avoid flaky tests
+            assert result_counts[-1] >= result_counts[0], \
+                f"Higher FILTER-EF should find at least as many results: {result_counts[-1]} vs {result_counts[0]}"
+
+        print("\nFILTER-EF parameter comparison completed successfully")