From b50bb6b5fb5d3a30a22e3636b529ed9af15d7ecc Mon Sep 17 00:00:00 2001 From: Samiul Monir <150824886+Samiul-TheSoccerFan@users.noreply.github.com> Date: Wed, 11 Jun 2025 14:31:11 -0400 Subject: [PATCH] Adding support to exclude semantic_text subfields (#127664) * Adding support to exclude semantic_text subfields * Update docs/changelog/127664.yaml * Updating changelog file * remove duplicate test from yaml file * Adding support to exclude semantic_text subfields from mapper builders * Adding support for generic field types * refactoring to use builder and setting exclude value from semantic_text mapper * update in semantic_text mapper and fetcher to incorporate the support functionality * Fix code style issue * adding node feature for yaml tests * Adding more restrictive checks on yaml tests and few refactoring * Returns metadata fields from metadata mappers * returns all source fields for fieldcaps * gather all fields and iterate to process for fieldcaps api * revert back all changes from MappedFieldtype and subclasses * revert back exclude logic from semantic_text mapper * fix lint issues * fix lint issues * Adding runtime fields into fieldCaps * Fix linting issue * removing unused functions that used in previous implementation * fix multifield tests failure * getting alias fields for field caps * adding support for query time runtime fields * [CI] Auto commit changes from spotless * Fix empty mapping fieldCaps call * Address passthrough behavior for mappers * Fix SearchAsYoutype mapper failures * rename abstract method to have more meaningful name * Rename mapper function to match its functionality * Adding filtering for infernece subfields * revert back previous implementation changes * Adding yaml test for field caps not filtering multi-field * Fixing yaml test * Adding comment why .infernece filter is added --------- Co-authored-by: elasticsearchmachine Co-authored-by: Elastic Machine --- docs/changelog/127664.yaml | 5 ++ .../fieldcaps/FieldCapabilitiesFetcher.java | 10 +++ .../xpack/inference/InferenceFeatures.java | 4 +- .../mapper/SemanticTextFieldMapper.java | 3 + .../10_semantic_text_field_mapping.yml | 73 +++++++++++++++++++ .../10_semantic_text_field_mapping_bwc.yml | 23 ++++++ 6 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/127664.yaml diff --git a/docs/changelog/127664.yaml b/docs/changelog/127664.yaml new file mode 100644 index 000000000000..6290607b987f --- /dev/null +++ b/docs/changelog/127664.yaml @@ -0,0 +1,5 @@ +pr: 127664 +summary: Exclude `semantic_text` subfields from field capabilities API +area: "Mapping" +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java index e6d7af11d06c..dc73be9ed755 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java @@ -9,6 +9,7 @@ package org.elasticsearch.action.fieldcaps; +import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.cluster.metadata.MappingMetadata; import org.elasticsearch.core.Booleans; import org.elasticsearch.core.Nullable; @@ -30,6 +31,7 @@ import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.tasks.CancellableTask; import java.io.IOException; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -256,6 +258,14 @@ class FieldCapabilitiesFetcher { Set acceptedTypes = Set.of(fieldTypes); fcf = ft -> acceptedTypes.contains(ft.familyTypeName()); } + + // Exclude internal ".inference" subfields of semantic_text fields from the field capabilities response + Collection inferenceFields = context.getMappingLookup().inferenceFields().values(); + for (InferenceFieldMetadata inferenceField : inferenceFields) { + Predicate next = ft -> ft.name().startsWith(inferenceField.getName() + ".inference") == false; + fcf = fcf == null ? next : fcf.and(next); + } + for (String filter : filters) { if ("parent".equals(filter) || "-parent".equals(filter)) { continue; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 669e29ba7deb..d724bae4ecb6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankR import java.util.Set; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX; import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; @@ -59,7 +60,8 @@ public class InferenceFeatures implements FeatureSpecification { SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT, TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS, SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG, - SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER + SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER, + SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index d15414e34aef..8e4f3ed4e67a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -134,6 +134,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final NodeFeature SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS = new NodeFeature("semantic_text.skip_inference_fields"); public static final NodeFeature SEMANTIC_TEXT_BIT_VECTOR_SUPPORT = new NodeFeature("semantic_text.bit_vector_support"); public static final NodeFeature SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG = new NodeFeature("semantic_text.support_chunking_config"); + public static final NodeFeature SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS = new NodeFeature( + "semantic_text.exclude_sub_fields_from_field_caps" + ); public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml index fcbeab9262b2..a1c2663b22cc 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml @@ -359,3 +359,76 @@ setup: index: test-always-include-inference-id-index - exists: test-always-include-inference-id-index.mappings.properties.semantic_field.inference_id + +--- +"Field caps exclude chunks and embedding fields": + - requires: + cluster_features: "semantic_text.exclude_sub_fields_from_field_caps" + reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0 + + - do: + field_caps: + include_empty_fields: true + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - exists: fields.sparse_field + - exists: fields.dense_field + - not_exists: fields.sparse_field.inference.chunks.embeddings + - not_exists: fields.sparse_field.inference.chunks.offset + - not_exists: fields.sparse_field.inference.chunks + - not_exists: fields.sparse_field.inference + - not_exists: fields.dense_field.inference.chunks.embeddings + - not_exists: fields.dense_field.inference.chunks.offset + - not_exists: fields.dense_field.inference.chunks + - not_exists: fields.dense_field.inference + +--- +"Field caps does not exclude multi-fields under semantic_text": + - requires: + cluster_features: "semantic_text.exclude_sub_fields_from_field_caps" + reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0 + - do: + indices.create: + index: test-multi-field-index + body: + settings: + index: + mapping: + semantic_text: + use_legacy_format: false + mappings: + properties: + sparse_field: + type: semantic_text + inference_id: sparse-inference-id + fields: + sparse_keyword_field: + type: keyword + dense_field: + type: semantic_text + inference_id: dense-inference-id + fields: + dense_keyword_field: + type: keyword + + - do: + field_caps: + include_empty_fields: true + index: test-multi-field-index + fields: "*" + + - match: { indices: [ "test-multi-field-index" ] } + - exists: fields.sparse_field + - exists: fields.dense_field + - exists: fields.sparse_field\.sparse_keyword_field + - exists: fields.dense_field\.dense_keyword_field + - not_exists: fields.sparse_field.inference.chunks.embeddings + - not_exists: fields.sparse_field.inference.chunks.offset + - not_exists: fields.sparse_field.inference.chunks + - not_exists: fields.sparse_field.inference + - not_exists: fields.dense_field.inference.chunks.embeddings + - not_exists: fields.dense_field.inference.chunks.offset + - not_exists: fields.dense_field.inference.chunks + - not_exists: fields.dense_field.inference diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml index 7a0c5b912de2..fa935ac450f8 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml @@ -307,3 +307,26 @@ setup: another_field: type: keyword +--- +"Field caps exclude chunks embedding and text fields": + - requires: + cluster_features: "semantic_text.exclude_sub_fields_from_field_caps" + reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0 + + - do: + field_caps: + include_empty_fields: true + index: test-index + fields: "*" + + - match: { indices: [ "test-index" ] } + - exists: fields.sparse_field + - exists: fields.dense_field + - not_exists: fields.sparse_field.inference.chunks.embeddings + - not_exists: fields.sparse_field.inference.chunks.text + - not_exists: fields.sparse_field.inference.chunks + - not_exists: fields.sparse_field.inference + - not_exists: fields.dense_field.inference.chunks.embeddings + - not_exists: fields.dense_field.inference.chunks.text + - not_exists: fields.dense_field.inference.chunks + - not_exists: fields.dense_field.inference