Update Default value of Oversample for bbq (#127134)
* Unit test to validate default behavior * adding default value to oversample for bbq * Fix code style issue * Update docs/changelog/127134.yaml * Update changelog * Adding index version to support only new indices * Update index version name to better match * Adding a simple yaml test to verify the yaml functionality for oversample value * Refactor knn float to add rescore vector by default when index type is one of bbq * adding yaml tests to verify oversampel default value * Fixing format issue for not_exists
This commit is contained in:
parent
352db86c6f
commit
cd4fcbff21
|
@ -0,0 +1,5 @@
|
|||
pr: 127134
|
||||
summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat
|
||||
area: Vector Search
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -568,3 +568,14 @@ setup:
|
|||
- match: { hits.hits.1._score: $default_rescore1 }
|
||||
- match: { hits.hits.2._score: $override_score2 }
|
||||
- match: { hits.hits.2._score: $default_rescore2 }
|
||||
|
||||
---
|
||||
"default oversample value":
|
||||
- requires:
|
||||
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
|
||||
reason: "Needs default_oversample_value_for_bbq feature"
|
||||
- do:
|
||||
indices.get_mapping:
|
||||
index: bbq_hnsw
|
||||
|
||||
- match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }
|
||||
|
|
|
@ -339,3 +339,14 @@ setup:
|
|||
- match: { hits.hits.0._score: $rescore_score0 }
|
||||
- match: { hits.hits.1._score: $rescore_score1 }
|
||||
- match: { hits.hits.2._score: $rescore_score2 }
|
||||
|
||||
---
|
||||
"default oversample value":
|
||||
- requires:
|
||||
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
|
||||
reason: "Needs default_oversample_value_for_bbq feature"
|
||||
- do:
|
||||
indices.get_mapping:
|
||||
index: bbq_flat
|
||||
|
||||
- match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }
|
||||
|
|
|
@ -495,3 +495,14 @@ setup:
|
|||
- match: { hits.hits.0._score: $rescore_score0 }
|
||||
- match: { hits.hits.1._score: $rescore_score1 }
|
||||
- match: { hits.hits.2._score: $rescore_score2 }
|
||||
|
||||
---
|
||||
"no default oversample value":
|
||||
- requires:
|
||||
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
|
||||
reason: "Needs default_oversample_value_for_bbq feature"
|
||||
- do:
|
||||
indices.get_mapping:
|
||||
index: int4_flat
|
||||
|
||||
- not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector
|
||||
|
|
|
@ -436,3 +436,14 @@ setup:
|
|||
- match: { hits.hits.0._score: $rescore_score0 }
|
||||
- match: { hits.hits.1._score: $rescore_score1 }
|
||||
- match: { hits.hits.2._score: $rescore_score2 }
|
||||
|
||||
---
|
||||
"no default oversample value":
|
||||
- requires:
|
||||
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
|
||||
reason: "Needs default_oversample_value_for_bbq feature"
|
||||
- do:
|
||||
indices.get_mapping:
|
||||
index: int8_flat
|
||||
|
||||
- not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector
|
||||
|
|
|
@ -161,6 +161,7 @@ public class IndexVersions {
|
|||
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
|
||||
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0);
|
||||
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, Version.LUCENE_10_2_1);
|
||||
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = def(9_024_0_00, Version.LUCENE_10_2_1);
|
||||
/*
|
||||
* STOP! READ THIS FIRST! No, really,
|
||||
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
|
||||
|
|
|
@ -16,6 +16,7 @@ import java.util.Set;
|
|||
|
||||
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING;
|
||||
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING;
|
||||
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
|
||||
|
||||
/**
|
||||
* Spec for mapper-related features.
|
||||
|
@ -66,7 +67,8 @@ public class MapperFeatures implements FeatureSpecification {
|
|||
RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING,
|
||||
DateFieldMapper.INVALID_DATE_FIX,
|
||||
NPE_ON_DIMS_UPDATE_FIX,
|
||||
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING
|
||||
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
|
||||
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -117,11 +117,15 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS;
|
||||
public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS =
|
||||
IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS;
|
||||
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
|
||||
|
||||
public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector");
|
||||
public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature(
|
||||
"mapper.dense_vector.rescore_zero_vector"
|
||||
);
|
||||
public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature(
|
||||
"mapper.dense_vector.default_oversample_value_for_bbq"
|
||||
);
|
||||
|
||||
public static final String CONTENT_TYPE = "dense_vector";
|
||||
public static final short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions
|
||||
|
@ -131,6 +135,7 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
// vector
|
||||
public static final int MAGNITUDE_BYTES = 4;
|
||||
public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed
|
||||
public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value
|
||||
|
||||
private static DenseVectorFieldMapper toType(FieldMapper in) {
|
||||
return (DenseVectorFieldMapper) in;
|
||||
|
@ -1462,6 +1467,9 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
RescoreVector rescoreVector = null;
|
||||
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
|
||||
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
|
||||
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
|
||||
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
|
||||
}
|
||||
}
|
||||
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
|
||||
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector);
|
||||
|
@ -1483,6 +1491,9 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
RescoreVector rescoreVector = null;
|
||||
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
|
||||
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
|
||||
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
|
||||
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
|
||||
}
|
||||
}
|
||||
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
|
||||
return new BBQFlatIndexOptions(rescoreVector);
|
||||
|
@ -2311,6 +2322,10 @@ public class DenseVectorFieldMapper extends FieldMapper {
|
|||
ElementType getElementType() {
|
||||
return elementType;
|
||||
}
|
||||
|
||||
IndexOptions getIndexOptions() {
|
||||
return indexOptions;
|
||||
}
|
||||
}
|
||||
|
||||
private final IndexOptions indexOptions;
|
||||
|
|
|
@ -1022,6 +1022,60 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testDefaultOversampleValue() throws IOException {
|
||||
{
|
||||
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
|
||||
b.field("type", "dense_vector");
|
||||
b.field("dims", 128);
|
||||
b.field("index", true);
|
||||
b.field("similarity", "dot_product");
|
||||
b.startObject("index_options");
|
||||
b.field("type", "bbq_hnsw");
|
||||
b.endObject();
|
||||
}));
|
||||
|
||||
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
|
||||
DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper
|
||||
.fieldType()
|
||||
.getIndexOptions();
|
||||
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
|
||||
}
|
||||
{
|
||||
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
|
||||
b.field("type", "dense_vector");
|
||||
b.field("dims", 128);
|
||||
b.field("index", true);
|
||||
b.field("similarity", "dot_product");
|
||||
b.startObject("index_options");
|
||||
b.field("type", "bbq_flat");
|
||||
b.endObject();
|
||||
}));
|
||||
|
||||
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
|
||||
DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper
|
||||
.fieldType()
|
||||
.getIndexOptions();
|
||||
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
|
||||
}
|
||||
{
|
||||
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
|
||||
b.field("type", "dense_vector");
|
||||
b.field("dims", 128);
|
||||
b.field("index", true);
|
||||
b.field("similarity", "dot_product");
|
||||
b.startObject("index_options");
|
||||
b.field("type", "int8_hnsw");
|
||||
b.endObject();
|
||||
}));
|
||||
|
||||
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
|
||||
DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper
|
||||
.fieldType()
|
||||
.getIndexOptions();
|
||||
assertNull(indexOptions.rescoreVector);
|
||||
}
|
||||
}
|
||||
|
||||
public void testDims() {
|
||||
{
|
||||
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
|
||||
|
|
|
@ -46,6 +46,7 @@ import java.util.Set;
|
|||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
|
||||
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT;
|
||||
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
|
@ -144,7 +145,7 @@ abstract class AbstractKnnVectorQueryBuilderTestCase extends AbstractQueryTestCa
|
|||
fieldName,
|
||||
k,
|
||||
numCands,
|
||||
randomRescoreVectorBuilder(),
|
||||
isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(),
|
||||
randomFloat()
|
||||
);
|
||||
|
||||
|
@ -161,6 +162,14 @@ abstract class AbstractKnnVectorQueryBuilderTestCase extends AbstractQueryTestCa
|
|||
return queryBuilder;
|
||||
}
|
||||
|
||||
private boolean isIndextypeBBQ() {
|
||||
return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat");
|
||||
}
|
||||
|
||||
protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() {
|
||||
return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false));
|
||||
}
|
||||
|
||||
protected RescoreVectorBuilder randomRescoreVectorBuilder() {
|
||||
if (randomBoolean()) {
|
||||
return null;
|
||||
|
|
Loading…
Reference in New Issue