Update Default value of Oversample for bbq (#127134)

* Unit test to validate default behavior

* adding default value to oversample for bbq

* Fix code style issue

* Update docs/changelog/127134.yaml

* Update changelog

* Adding index version to support only new indices

* Update index version name to better match

* Adding a simple yaml test to verify the yaml functionality for oversample value

* Refactor knn float to add rescore vector by default when index type is one of bbq

* adding yaml tests to verify oversampel default value

* Fixing format issue for not_exists
This commit is contained in:
Samiul Monir 2025-04-28 12:36:03 -04:00 committed by GitHub
parent 352db86c6f
commit cd4fcbff21
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 132 additions and 2 deletions

View File

@ -0,0 +1,5 @@
pr: 127134
summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat
area: Vector Search
type: enhancement
issues: []

View File

@ -568,3 +568,14 @@ setup:
- match: { hits.hits.1._score: $default_rescore1 }
- match: { hits.hits.2._score: $override_score2 }
- match: { hits.hits.2._score: $default_rescore2 }
---
"default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: bbq_hnsw
- match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

View File

@ -339,3 +339,14 @@ setup:
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }
---
"default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: bbq_flat
- match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

View File

@ -495,3 +495,14 @@ setup:
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }
---
"no default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: int4_flat
- not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector

View File

@ -436,3 +436,14 @@ setup:
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }
---
"no default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: int8_flat
- not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector

View File

@ -161,6 +161,7 @@ public class IndexVersions {
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, Version.LUCENE_10_2_1);
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = def(9_024_0_00, Version.LUCENE_10_2_1);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

View File

@ -16,6 +16,7 @@ import java.util.Set;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
/**
* Spec for mapper-related features.
@ -66,7 +67,8 @@ public class MapperFeatures implements FeatureSpecification {
RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING,
DateFieldMapper.INVALID_DATE_FIX,
NPE_ON_DIMS_UPDATE_FIX,
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ
);
}
}

View File

@ -117,11 +117,15 @@ public class DenseVectorFieldMapper extends FieldMapper {
public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS;
public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS =
IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS;
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector");
public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature(
"mapper.dense_vector.rescore_zero_vector"
);
public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature(
"mapper.dense_vector.default_oversample_value_for_bbq"
);
public static final String CONTENT_TYPE = "dense_vector";
public static final short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions
@ -131,6 +135,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
// vector
public static final int MAGNITUDE_BYTES = 4;
public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed
public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value
private static DenseVectorFieldMapper toType(FieldMapper in) {
return (DenseVectorFieldMapper) in;
@ -1462,6 +1467,9 @@ public class DenseVectorFieldMapper extends FieldMapper {
RescoreVector rescoreVector = null;
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
}
}
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector);
@ -1483,6 +1491,9 @@ public class DenseVectorFieldMapper extends FieldMapper {
RescoreVector rescoreVector = null;
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
}
}
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
return new BBQFlatIndexOptions(rescoreVector);
@ -2311,6 +2322,10 @@ public class DenseVectorFieldMapper extends FieldMapper {
ElementType getElementType() {
return elementType;
}
IndexOptions getIndexOptions() {
return indexOptions;
}
}
private final IndexOptions indexOptions;

View File

@ -1022,6 +1022,60 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
}
}
public void testDefaultOversampleValue() throws IOException {
{
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
b.field("type", "dense_vector");
b.field("dims", 128);
b.field("index", true);
b.field("similarity", "dot_product");
b.startObject("index_options");
b.field("type", "bbq_hnsw");
b.endObject();
}));
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper
.fieldType()
.getIndexOptions();
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
}
{
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
b.field("type", "dense_vector");
b.field("dims", 128);
b.field("index", true);
b.field("similarity", "dot_product");
b.startObject("index_options");
b.field("type", "bbq_flat");
b.endObject();
}));
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper
.fieldType()
.getIndexOptions();
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
}
{
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
b.field("type", "dense_vector");
b.field("dims", 128);
b.field("index", true);
b.field("similarity", "dot_product");
b.startObject("index_options");
b.field("type", "int8_hnsw");
b.endObject();
}));
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper
.fieldType()
.getIndexOptions();
assertNull(indexOptions.rescoreVector);
}
}
public void testDims() {
{
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {

View File

@ -46,6 +46,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT;
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
import static org.hamcrest.Matchers.containsString;
@ -144,7 +145,7 @@ abstract class AbstractKnnVectorQueryBuilderTestCase extends AbstractQueryTestCa
fieldName,
k,
numCands,
randomRescoreVectorBuilder(),
isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(),
randomFloat()
);
@ -161,6 +162,14 @@ abstract class AbstractKnnVectorQueryBuilderTestCase extends AbstractQueryTestCa
return queryBuilder;
}
private boolean isIndextypeBBQ() {
return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat");
}
protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() {
return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false));
}
protected RescoreVectorBuilder randomRescoreVectorBuilder() {
if (randomBoolean()) {
return null;