ESQL Add esql hash function (#117989)

This change introduces esql hash(alg, input) function that relies on the Java MessageDigest to compute the hash.
This commit is contained in:
Ievgen Degtiarenko 2024-12-18 09:56:42 +01:00 committed by GitHub
parent 140beb1184
commit 7cf28a910e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 991 additions and 2 deletions

View File

@ -0,0 +1,5 @@
pr: 117989
summary: ESQL Add esql hash function
area: ES|QL
type: enhancement
issues: []

View File

@ -0,0 +1,5 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
*Description*
Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512.

View File

@ -0,0 +1,82 @@
{
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
"type" : "eval",
"name" : "hash",
"description" : "Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512.",
"signatures" : [
{
"params" : [
{
"name" : "algorithm",
"type" : "keyword",
"optional" : false,
"description" : "Hash algorithm to use."
},
{
"name" : "input",
"type" : "keyword",
"optional" : false,
"description" : "Input to hash."
}
],
"variadic" : false,
"returnType" : "keyword"
},
{
"params" : [
{
"name" : "algorithm",
"type" : "keyword",
"optional" : false,
"description" : "Hash algorithm to use."
},
{
"name" : "input",
"type" : "text",
"optional" : false,
"description" : "Input to hash."
}
],
"variadic" : false,
"returnType" : "keyword"
},
{
"params" : [
{
"name" : "algorithm",
"type" : "text",
"optional" : false,
"description" : "Hash algorithm to use."
},
{
"name" : "input",
"type" : "keyword",
"optional" : false,
"description" : "Input to hash."
}
],
"variadic" : false,
"returnType" : "keyword"
},
{
"params" : [
{
"name" : "algorithm",
"type" : "text",
"optional" : false,
"description" : "Hash algorithm to use."
},
{
"name" : "input",
"type" : "text",
"optional" : false,
"description" : "Input to hash."
}
],
"variadic" : false,
"returnType" : "keyword"
}
],
"preview" : false,
"snapshot_only" : false
}

View File

@ -0,0 +1,7 @@
<!--
This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
-->
### HASH
Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512.

View File

@ -0,0 +1,14 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
[discrete]
[[esql-hash]]
=== `HASH`
*Syntax*
[.text-center]
image::esql/functions/signature/hash.svg[Embedded,opts=inline]
include::../parameters/hash.asciidoc[]
include::../description/hash.asciidoc[]
include::../types/hash.asciidoc[]

View File

@ -0,0 +1,9 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
*Parameters*
`algorithm`::
Hash algorithm to use.
`input`::
Input to hash.

View File

@ -0,0 +1 @@
<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="432" height="46" viewbox="0 0 432 46"><defs><style type="text/css">#guide .c{fill:none;stroke:#222222;}#guide .k{fill:#000000;font-family:Roboto Mono,Sans-serif;font-size:20px;}#guide .s{fill:#e4f4ff;stroke:#222222;}#guide .syn{fill:#8D8D8D;font-family:Roboto Mono,Sans-serif;font-size:20px;}</style></defs><path class="c" d="M0 31h5m68 0h10m32 0h10m128 0h10m32 0h10m80 0h10m32 0h5"/><rect class="s" x="5" y="5" width="68" height="36"/><text class="k" x="15" y="31">HASH</text><rect class="s" x="83" y="5" width="32" height="36" rx="7"/><text class="syn" x="93" y="31">(</text><rect class="s" x="125" y="5" width="128" height="36" rx="7"/><text class="k" x="135" y="31">algorithm</text><rect class="s" x="263" y="5" width="32" height="36" rx="7"/><text class="syn" x="273" y="31">,</text><rect class="s" x="305" y="5" width="80" height="36" rx="7"/><text class="k" x="315" y="31">input</text><rect class="s" x="395" y="5" width="32" height="36" rx="7"/><text class="syn" x="405" y="31">)</text></svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -13,6 +13,7 @@
* <<esql-concat>>
* <<esql-ends_with>>
* <<esql-from_base64>>
* <<esql-hash>>
* <<esql-left>>
* <<esql-length>>
* <<esql-locate>>
@ -37,6 +38,7 @@ include::layout/byte_length.asciidoc[]
include::layout/concat.asciidoc[]
include::layout/ends_with.asciidoc[]
include::layout/from_base64.asciidoc[]
include::layout/hash.asciidoc[]
include::layout/left.asciidoc[]
include::layout/length.asciidoc[]
include::layout/locate.asciidoc[]

View File

@ -0,0 +1,12 @@
// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
*Supported types*
[%header.monospaced.styled,format=dsv,separator=|]
|===
algorithm | input | result
keyword | keyword | keyword
keyword | text | keyword
text | keyword | keyword
text | text | keyword
|===

View File

@ -0,0 +1,105 @@
hash
required_capability: hash_function
FROM sample_data
| WHERE message != "Connection error"
| EVAL md5 = hash("md5", message), sha256 = hash("sha256", message)
| KEEP message, md5, sha256;
ignoreOrder:true
message:keyword | md5:keyword | sha256:keyword
Connected to 10.1.0.1 | abd7d1ce2bb636842a29246b3512dcae | 6d8372129ad78770f7185554dd39864749a62690216460752d6c075fa38ad85c
Connected to 10.1.0.2 | 8f8f1cb60832d153f5b9ec6dc828b93f | b0db24720f15857091b3c99f4c4833586d0ea3229911b8777efb8d917cf27e9a
Connected to 10.1.0.3 | 912b6dc13503165a15de43304bb77c78 | 75b0480188db8acc4d5cc666a51227eb2bc5b989cd8ca912609f33e0846eff57
Disconnected | ef70e46fd3bbc21e3e1f0b6815e750c0 | 04dfac3671b494ad53fcd152f7a14511bfb35747278aad8ce254a0d6e4ba4718
;
hashOfConvertedType
required_capability: hash_function
FROM sample_data
| WHERE message != "Connection error"
| EVAL input = event_duration::STRING, md5 = hash("md5", input), sha256 = hash("sha256", input)
| KEEP message, input, md5, sha256;
ignoreOrder:true
message:keyword | input:keyword | md5:keyword | sha256:keyword
Connected to 10.1.0.1 | 1756467 | c4fc1c57ee9b1d2b2023b70c8c167b54 | 8376a50a7ba7e6bd1bf9ad0c32d27d2f49fd0fa422573f98f239e21048b078f3
Connected to 10.1.0.2 | 2764889 | 8e8cf005e11a7b5df1d9478a4715a444 | 1031f2bef8eaecbf47319505422300b27ea1f7c38b6717d41332325062f9a56a
Connected to 10.1.0.3 | 3450233 | 09f2c64f5a55e9edf8ffbad336b561d8 | f77d7545769c4ecc85092f4f0b7ec8c20f467e4beb15fe67ca29f9aa8e9a6900
Disconnected | 1232382 | 6beac1485638d51e13c2c53990a2f611 | 9a03c1274a3ebb6c1cb85d170ce0a6fdb9d2232724e06b9f5e7cb9274af3cad6
;
hashOfEmptyInput
required_capability: hash_function
ROW input="" | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input);
input:keyword | md5:keyword | sha256:keyword
| d41d8cd98f00b204e9800998ecf8427e | e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
;
hashOfNullInput
required_capability: hash_function
ROW input=null::STRING | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input);
input:keyword | md5:keyword | sha256:keyword
null | null | null
;
hashWithNullAlgorithm
required_capability: hash_function
ROW input="input" | EVAL hash = hash(null, input);
input:keyword | hash:keyword
input | null
;
hashWithMv
required_capability: hash_function
ROW input=["foo", "bar"] | mv_expand input | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input);
input:keyword | md5:keyword | sha256:keyword
foo | acbd18db4cc2f85cedef654fccc4a4d8 | 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae
bar | 37b51d194a7513e45b56f6524f2d51f2 | fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9
;
hashWithNestedFunctions
required_capability: hash_function
ROW input=["foo", "bar"] | EVAL hash = concat(hash("md5", mv_concat(input, "-")), "-", hash("sha256", mv_concat(input, "-")));
input:keyword | hash:keyword
["foo", "bar"] | e5f9ec048d1dbe19c70f720e002f9cb1-7d89c4f517e3bd4b5e8e76687937005b602ea00c5cba3e25ef1fc6575a55103e
;
hashWithConvertedTypes
required_capability: hash_function
ROW input=42 | EVAL md5 = hash("md5", input::STRING), sha256 = hash("sha256", to_string(input));
input:integer | md5:keyword | sha256:keyword
42 | a1d0c6e83f027327d8461063f4ac58a6 | 73475cb40a568e8da8a045ced110137e159f890ac4da883b6b17dc651b3a8049
;
hashWithStats
required_capability: hash_function
FROM sample_data
| EVAL md5="md5"
| STATS count = count(*) by hash(md5, message)
| WHERE count > 1;
count:long | hash(md5, message):keyword
3 | 2e92ae79ff32b37fee4368a594792183
;

View File

@ -0,0 +1,142 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
import java.lang.IllegalArgumentException;
import java.lang.Override;
import java.lang.String;
import java.util.function.Function;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.compute.operator.Warnings;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.xpack.esql.core.tree.Source;
/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link Hash}.
* This class is generated. Do not edit it.
*/
public final class HashConstantEvaluator implements EvalOperator.ExpressionEvaluator {
private final Source source;
private final BreakingBytesRefBuilder scratch;
private final Hash.HashFunction algorithm;
private final EvalOperator.ExpressionEvaluator input;
private final DriverContext driverContext;
private Warnings warnings;
public HashConstantEvaluator(Source source, BreakingBytesRefBuilder scratch,
Hash.HashFunction algorithm, EvalOperator.ExpressionEvaluator input,
DriverContext driverContext) {
this.source = source;
this.scratch = scratch;
this.algorithm = algorithm;
this.input = input;
this.driverContext = driverContext;
}
@Override
public Block eval(Page page) {
try (BytesRefBlock inputBlock = (BytesRefBlock) input.eval(page)) {
BytesRefVector inputVector = inputBlock.asVector();
if (inputVector == null) {
return eval(page.getPositionCount(), inputBlock);
}
return eval(page.getPositionCount(), inputVector).asBlock();
}
}
public BytesRefBlock eval(int positionCount, BytesRefBlock inputBlock) {
try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
BytesRef inputScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
if (inputBlock.isNull(p)) {
result.appendNull();
continue position;
}
if (inputBlock.getValueCount(p) != 1) {
if (inputBlock.getValueCount(p) > 1) {
warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value"));
}
result.appendNull();
continue position;
}
result.appendBytesRef(Hash.processConstant(this.scratch, this.algorithm, inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch)));
}
return result.build();
}
}
public BytesRefVector eval(int positionCount, BytesRefVector inputVector) {
try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) {
BytesRef inputScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
result.appendBytesRef(Hash.processConstant(this.scratch, this.algorithm, inputVector.getBytesRef(p, inputScratch)));
}
return result.build();
}
}
@Override
public String toString() {
return "HashConstantEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]";
}
@Override
public void close() {
Releasables.closeExpectNoException(scratch, input);
}
private Warnings warnings() {
if (warnings == null) {
this.warnings = Warnings.createWarnings(
driverContext.warningsMode(),
source.source().getLineNumber(),
source.source().getColumnNumber(),
source.text()
);
}
return warnings;
}
static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final Source source;
private final Function<DriverContext, BreakingBytesRefBuilder> scratch;
private final Function<DriverContext, Hash.HashFunction> algorithm;
private final EvalOperator.ExpressionEvaluator.Factory input;
public Factory(Source source, Function<DriverContext, BreakingBytesRefBuilder> scratch,
Function<DriverContext, Hash.HashFunction> algorithm,
EvalOperator.ExpressionEvaluator.Factory input) {
this.source = source;
this.scratch = scratch;
this.algorithm = algorithm;
this.input = input;
}
@Override
public HashConstantEvaluator get(DriverContext context) {
return new HashConstantEvaluator(source, scratch.apply(context), algorithm.apply(context), input.get(context), context);
}
@Override
public String toString() {
return "HashConstantEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]";
}
}
}

View File

@ -0,0 +1,174 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
import java.lang.IllegalArgumentException;
import java.lang.Override;
import java.lang.String;
import java.security.NoSuchAlgorithmException;
import java.util.function.Function;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.compute.operator.Warnings;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.xpack.esql.core.tree.Source;
/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link Hash}.
* This class is generated. Do not edit it.
*/
public final class HashEvaluator implements EvalOperator.ExpressionEvaluator {
private final Source source;
private final BreakingBytesRefBuilder scratch;
private final EvalOperator.ExpressionEvaluator algorithm;
private final EvalOperator.ExpressionEvaluator input;
private final DriverContext driverContext;
private Warnings warnings;
public HashEvaluator(Source source, BreakingBytesRefBuilder scratch,
EvalOperator.ExpressionEvaluator algorithm, EvalOperator.ExpressionEvaluator input,
DriverContext driverContext) {
this.source = source;
this.scratch = scratch;
this.algorithm = algorithm;
this.input = input;
this.driverContext = driverContext;
}
@Override
public Block eval(Page page) {
try (BytesRefBlock algorithmBlock = (BytesRefBlock) algorithm.eval(page)) {
try (BytesRefBlock inputBlock = (BytesRefBlock) input.eval(page)) {
BytesRefVector algorithmVector = algorithmBlock.asVector();
if (algorithmVector == null) {
return eval(page.getPositionCount(), algorithmBlock, inputBlock);
}
BytesRefVector inputVector = inputBlock.asVector();
if (inputVector == null) {
return eval(page.getPositionCount(), algorithmBlock, inputBlock);
}
return eval(page.getPositionCount(), algorithmVector, inputVector);
}
}
}
public BytesRefBlock eval(int positionCount, BytesRefBlock algorithmBlock,
BytesRefBlock inputBlock) {
try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
BytesRef algorithmScratch = new BytesRef();
BytesRef inputScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
if (algorithmBlock.isNull(p)) {
result.appendNull();
continue position;
}
if (algorithmBlock.getValueCount(p) != 1) {
if (algorithmBlock.getValueCount(p) > 1) {
warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value"));
}
result.appendNull();
continue position;
}
if (inputBlock.isNull(p)) {
result.appendNull();
continue position;
}
if (inputBlock.getValueCount(p) != 1) {
if (inputBlock.getValueCount(p) > 1) {
warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value"));
}
result.appendNull();
continue position;
}
try {
result.appendBytesRef(Hash.process(this.scratch, algorithmBlock.getBytesRef(algorithmBlock.getFirstValueIndex(p), algorithmScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch)));
} catch (NoSuchAlgorithmException e) {
warnings().registerException(e);
result.appendNull();
}
}
return result.build();
}
}
public BytesRefBlock eval(int positionCount, BytesRefVector algorithmVector,
BytesRefVector inputVector) {
try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
BytesRef algorithmScratch = new BytesRef();
BytesRef inputScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
try {
result.appendBytesRef(Hash.process(this.scratch, algorithmVector.getBytesRef(p, algorithmScratch), inputVector.getBytesRef(p, inputScratch)));
} catch (NoSuchAlgorithmException e) {
warnings().registerException(e);
result.appendNull();
}
}
return result.build();
}
}
@Override
public String toString() {
return "HashEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]";
}
@Override
public void close() {
Releasables.closeExpectNoException(scratch, algorithm, input);
}
private Warnings warnings() {
if (warnings == null) {
this.warnings = Warnings.createWarnings(
driverContext.warningsMode(),
source.source().getLineNumber(),
source.source().getColumnNumber(),
source.text()
);
}
return warnings;
}
static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final Source source;
private final Function<DriverContext, BreakingBytesRefBuilder> scratch;
private final EvalOperator.ExpressionEvaluator.Factory algorithm;
private final EvalOperator.ExpressionEvaluator.Factory input;
public Factory(Source source, Function<DriverContext, BreakingBytesRefBuilder> scratch,
EvalOperator.ExpressionEvaluator.Factory algorithm,
EvalOperator.ExpressionEvaluator.Factory input) {
this.source = source;
this.scratch = scratch;
this.algorithm = algorithm;
this.input = input;
}
@Override
public HashEvaluator get(DriverContext context) {
return new HashEvaluator(source, scratch.apply(context), algorithm.get(context), input.get(context), context);
}
@Override
public String toString() {
return "HashEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]";
}
}
}

View File

@ -449,6 +449,11 @@ public class EsqlCapabilities {
*/
KQL_FUNCTION(Build.current().isSnapshot()),
/**
* Hash function
*/
HASH_FUNCTION,
/**
* Don't optimize CASE IS NOT NULL function by not requiring the fields to be not null as well.
* https://github.com/elastic/elasticsearch/issues/112704

View File

@ -129,6 +129,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
@ -327,6 +328,7 @@ public class EsqlFunctionRegistry {
def(ByteLength.class, ByteLength::new, "byte_length"),
def(Concat.class, Concat::new, "concat"),
def(EndsWith.class, EndsWith::new, "ends_with"),
def(Hash.class, Hash::new, "hash"),
def(LTrim.class, LTrim::new, "ltrim"),
def(Left.class, Left::new, "left"),
def(Length.class, Length::new, "length"),

View File

@ -34,6 +34,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Repeat;
@ -64,6 +65,7 @@ public class ScalarFunctionWritables {
entries.add(E.ENTRY);
entries.add(EndsWith.ENTRY);
entries.add(Greatest.ENTRY);
entries.add(Hash.ENTRY);
entries.add(Hypot.ENTRY);
entries.add(In.ENTRY);
entries.add(InsensitiveEquals.ENTRY);

View File

@ -0,0 +1,217 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.ann.Fixed;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.List;
import java.util.function.Function;
import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
public class Hash extends EsqlScalarFunction {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Hash", Hash::new);
private final Expression algorithm;
private final Expression input;
@FunctionInfo(
returnType = "keyword",
description = "Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512."
)
public Hash(
Source source,
@Param(name = "algorithm", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression algorithm,
@Param(name = "input", type = { "keyword", "text" }, description = "Input to hash.") Expression input
) {
super(source, List.of(algorithm, input));
this.algorithm = algorithm;
this.input = input;
}
private Hash(StreamInput in) throws IOException {
this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class));
}
@Override
public void writeTo(StreamOutput out) throws IOException {
source().writeTo(out);
out.writeNamedWriteable(algorithm);
out.writeNamedWriteable(input);
}
@Override
public String getWriteableName() {
return ENTRY.name;
}
@Override
public DataType dataType() {
return DataType.KEYWORD;
}
@Override
protected TypeResolution resolveType() {
if (childrenResolved() == false) {
return new TypeResolution("Unresolved children");
}
TypeResolution resolution = isString(algorithm, sourceText(), FIRST);
if (resolution.unresolved()) {
return resolution;
}
return isString(input, sourceText(), SECOND);
}
@Override
public boolean foldable() {
return algorithm.foldable() && input.foldable();
}
@Evaluator(warnExceptions = NoSuchAlgorithmException.class)
static BytesRef process(
@Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch,
BytesRef algorithm,
BytesRef input
) throws NoSuchAlgorithmException {
return hash(scratch, MessageDigest.getInstance(algorithm.utf8ToString()), input);
}
@Evaluator(extraName = "Constant")
static BytesRef processConstant(
@Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch,
@Fixed(scope = THREAD_LOCAL) HashFunction algorithm,
BytesRef input
) {
return hash(scratch, algorithm.digest, input);
}
private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest algorithm, BytesRef input) {
algorithm.reset();
algorithm.update(input.bytes, input.offset, input.length);
var digest = algorithm.digest();
scratch.clear();
scratch.grow(digest.length * 2);
appendUtf8HexDigest(scratch, digest);
return scratch.bytesRefView();
}
private static final byte[] ASCII_HEX_BYTES = new byte[] { 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 97, 98, 99, 100, 101, 102 };
/**
* This function allows to append hex bytes dirrectly to the {@link BreakingBytesRefBuilder}
* bypassing unnecessary array allocations and byte array copying.
*/
private static void appendUtf8HexDigest(BreakingBytesRefBuilder scratch, byte[] bytes) {
for (byte b : bytes) {
scratch.append(ASCII_HEX_BYTES[b >> 4 & 0xf]);
scratch.append(ASCII_HEX_BYTES[b & 0xf]);
}
}
@Override
public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
if (algorithm.foldable()) {
try {
// hash function is created here in order to validate the algorithm is valid before evaluator is created
var hf = HashFunction.create((BytesRef) algorithm.fold());
return new HashConstantEvaluator.Factory(
source(),
context -> new BreakingBytesRefBuilder(context.breaker(), "hash"),
new Function<>() {
@Override
public HashFunction apply(DriverContext context) {
return hf.copy();
}
@Override
public String toString() {
return hf.toString();
}
},
toEvaluator.apply(input)
);
} catch (NoSuchAlgorithmException e) {
throw new InvalidArgumentException(e, "invalid algorithm for [{}]: {}", sourceText(), e.getMessage());
}
} else {
return new HashEvaluator.Factory(
source(),
context -> new BreakingBytesRefBuilder(context.breaker(), "hash"),
toEvaluator.apply(algorithm),
toEvaluator.apply(input)
);
}
}
@Override
public Expression replaceChildren(List<Expression> newChildren) {
return new Hash(source(), newChildren.get(0), newChildren.get(1));
}
@Override
protected NodeInfo<? extends Expression> info() {
return NodeInfo.create(this, Hash::new, children().get(0), children().get(1));
}
public record HashFunction(String algorithm, MessageDigest digest) {
public static HashFunction create(BytesRef literal) throws NoSuchAlgorithmException {
var algorithm = literal.utf8ToString();
var digest = MessageDigest.getInstance(algorithm);
return new HashFunction(algorithm, digest);
}
public HashFunction copy() {
try {
return new HashFunction(algorithm, MessageDigest.getInstance(algorithm));
} catch (NoSuchAlgorithmException e) {
assert false : "Algorithm should be valid at this point";
throw new IllegalStateException(e);
}
}
@Override
public String toString() {
return algorithm;
}
}
Expression algorithm() {
return algorithm;
}
Expression input() {
return input;
}
}

View File

@ -14,10 +14,15 @@ import org.elasticsearch.xpack.esql.expression.function.ReferenceAttributeTests;
import org.elasticsearch.xpack.esql.plan.AbstractNodeSerializationTests;
public abstract class AbstractExpressionSerializationTests<T extends Expression> extends AbstractNodeSerializationTests<T> {
public static Expression randomChild() {
return ReferenceAttributeTests.randomReferenceAttribute(false);
}
public static Expression mutateExpression(Expression expression) {
return randomValueOtherThan(expression, AbstractExpressionSerializationTests::randomChild);
}
@Override
protected final NamedWriteableRegistry getNamedWriteableRegistry() {
return new NamedWriteableRegistry(ExpressionWritables.getNamedWriteables());

View File

@ -0,0 +1,27 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests;
import java.io.IOException;
public class HashSerializationTests extends AbstractExpressionSerializationTests<Hash> {
@Override
protected Hash createTestInstance() {
return new Hash(randomSource(), randomChild(), randomChild());
}
@Override
protected Hash mutateInstance(Hash instance) throws IOException {
return randomBoolean()
? new Hash(instance.source(), mutateExpression(instance.algorithm()), instance.input())
: new Hash(instance.source(), instance.algorithm(), mutateExpression(instance.input()));
}
}

View File

@ -0,0 +1,66 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.common.util.PageCacheRecycler;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.junit.After;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import static org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase.evaluator;
import static org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase.field;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.startsWith;
public class HashStaticTests extends ESTestCase {
public void testInvalidAlgorithmLiteral() {
Source source = new Source(0, 0, "hast(\"invalid\", input)");
DriverContext driverContext = driverContext();
InvalidArgumentException e = expectThrows(
InvalidArgumentException.class,
() -> evaluator(
new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("input", DataType.KEYWORD))
).get(driverContext)
);
assertThat(e.getMessage(), startsWith("invalid algorithm for [hast(\"invalid\", input)]: invalid MessageDigest not available"));
}
/**
* The following fields and methods were borrowed from AbstractScalarFunctionTestCase
*/
private final List<CircuitBreaker> breakers = Collections.synchronizedList(new ArrayList<>());
private DriverContext driverContext() {
BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking();
CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
breakers.add(breaker);
return new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays));
}
@After
public void allMemoryReleased() {
for (CircuitBreaker breaker : breakers) {
assertThat(breaker.getUsed(), equalTo(0L));
}
}
}

View File

@ -0,0 +1,107 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HexFormat;
import java.util.List;
import java.util.function.Supplier;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
public class HashTests extends AbstractScalarFunctionTestCase {
public HashTests(@Name("TestCase") Supplier<TestCaseSupplier.TestCase> testCaseSupplier) {
this.testCase = testCaseSupplier.get();
}
@ParametersFactory
public static Iterable<Object[]> parameters() {
List<TestCaseSupplier> cases = new ArrayList<>();
for (String algorithm : List.of("MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512")) {
cases.addAll(createTestCases(algorithm));
}
cases.add(new TestCaseSupplier("Invalid algorithm", List.of(DataType.KEYWORD, DataType.KEYWORD), () -> {
var input = randomAlphaOfLength(10);
return new TestCaseSupplier.TestCase(
List.of(
new TestCaseSupplier.TypedData(new BytesRef("invalid"), DataType.KEYWORD, "algorithm"),
new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "input")
),
"HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]",
DataType.KEYWORD,
is(nullValue())
).withWarning("Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.")
.withWarning("Line -1:-1: java.security.NoSuchAlgorithmException: invalid MessageDigest not available")
.withFoldingException(InvalidArgumentException.class, "invalid algorithm for []: invalid MessageDigest not available");
}));
return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases, (v, p) -> "string");
}
private static List<TestCaseSupplier> createTestCases(String algorithm) {
return List.of(
createTestCase(algorithm, false, DataType.KEYWORD, DataType.KEYWORD),
createTestCase(algorithm, false, DataType.KEYWORD, DataType.TEXT),
createTestCase(algorithm, false, DataType.TEXT, DataType.KEYWORD),
createTestCase(algorithm, false, DataType.TEXT, DataType.TEXT),
createTestCase(algorithm, true, DataType.KEYWORD, DataType.KEYWORD),
createTestCase(algorithm, true, DataType.KEYWORD, DataType.TEXT),
createTestCase(algorithm, true, DataType.TEXT, DataType.KEYWORD),
createTestCase(algorithm, true, DataType.TEXT, DataType.TEXT)
);
}
private static TestCaseSupplier createTestCase(String algorithm, boolean forceLiteral, DataType algorithmType, DataType inputType) {
return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> {
var input = randomFrom(TestCaseSupplier.stringCases(inputType)).get();
return new TestCaseSupplier.TestCase(
List.of(createTypedData(algorithm, forceLiteral, algorithmType, "algorithm"), input),
forceLiteral
? "HashConstantEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=0]]"
: "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]",
DataType.KEYWORD,
equalTo(new BytesRef(hash(algorithm, BytesRefs.toString(input.data()))))
);
});
}
private static TestCaseSupplier.TypedData createTypedData(String value, boolean forceLiteral, DataType type, String name) {
var data = new TestCaseSupplier.TypedData(new BytesRef(value), type, name);
return forceLiteral ? data.forceLiteral() : data;
}
private static String hash(String algorithm, String input) {
try {
return HexFormat.of().formatHex(MessageDigest.getInstance(algorithm).digest(input.getBytes(StandardCharsets.UTF_8)));
} catch (NoSuchAlgorithmException e) {
throw new IllegalArgumentException("Unknown algorithm: " + algorithm);
}
}
@Override
protected Expression build(Source source, List<Expression> args) {
return new Hash(source, args.get(0), args.get(1));
}
}

View File

@ -92,7 +92,7 @@ setup:
- gt: {esql.functions.to_long: $functions_to_long}
- match: {esql.functions.coalesce: $functions_coalesce}
# Testing for the entire function set isn't feasbile, so we just check that we return the correct count as an approximation.
- length: {esql.functions: 129} # check the "sister" test below for a likely update to the same esql.functions length check
- length: {esql.functions: 130} # check the "sister" test below for a likely update to the same esql.functions length check
---
"Basic ESQL usage output (telemetry) non-snapshot version":
@ -163,4 +163,4 @@ setup:
- match: {esql.functions.cos: $functions_cos}
- gt: {esql.functions.to_long: $functions_to_long}
- match: {esql.functions.coalesce: $functions_coalesce}
- length: {esql.functions: 125} # check the "sister" test above for a likely update to the same esql.functions length check
- length: {esql.functions: 126} # check the "sister" test above for a likely update to the same esql.functions length check