From 034e4b6ffd4fc35db76063cb5807a898c839fa4c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 3 Mar 2025 15:26:54 -0500 Subject: [PATCH 1/2] ESQL: Begin documenting MV behaviors on non-aggs Most functions turn multivalue fields into `null`, but a few "obvious" ones do the "obvious" things. This starts to document these behaviors. --- .../src/main/resources/date.csv-spec | 59 +++++++++++++++++++ .../src/main/resources/docs.csv-spec | 13 ---- .../src/main/resources/math.csv-spec | 25 +++++++- .../src/main/resources/null.csv-spec | 13 ++++ .../function/scalar/conditional/Greatest.java | 7 ++- .../function/scalar/conditional/Least.java | 7 ++- .../function/scalar/date/DateDiff.java | 4 +- .../function/scalar/date/DateFormat.java | 2 +- .../function/scalar/date/DateParse.java | 4 +- .../function/scalar/nulls/Coalesce.java | 4 +- 10 files changed, 114 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec index 1cb4784923e10..e0dc318c6c6b8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec @@ -461,6 +461,22 @@ ROW end_23=TO_DATETIME("2023-12-31T23:59:59.999Z"), // end::evalDateDiffYearForDocs-result[] ; +dateDiffMv +// tag::date-diff-mv[] +ROW lhs=TO_DATETIME(["2024-01-01", "2024-02-01"]), + rhs=TO_DATETIME("2024-03-01") +| EVAL diff=DATE_DIFF("year", lhs, rhs) +// end::date-diff-mv[] +; +warning:Line 3:13: evaluation of [DATE_DIFF(\"year\", lhs, rhs)] failed, treating result as null. Only first 20 failures recorded. +warning:Line 3:13: java.lang.IllegalArgumentException: single-value function encountered multi-value + +// tag::date-diff-mv-result[] + lhs:date | rhs:date | diff:integer +["2024-01-01", "2024-02-01"]| 2024-03-01T00:00:00.000Z| null +// end::date-diff-mv-result[] +; + evalDateParseWithSimpleDate row a = "2023-02-01" | eval b = date_parse("yyyy-MM-dd", a) | keep b; @@ -619,6 +635,34 @@ emp_no:integer | new_date:datetime | birth_date:datetime | bool: 10050 | 1958-05-21T00:00:00.000Z | 1958-05-21T00:00:00.000Z | true ; +dateParseSimple +// tag::date-parse[] +ROW v = "2022-05-06" +| EVAL date = DATE_PARSE("yyyy-MM-dd", v) +// end::date-parse[] +; + +//tag::date-parse-result[] + v:keyword | date:date +2022-05-06 | 2022-05-06T00:00:00.000Z +// end::date-parse-result[] +; + +dateParseMv +// tag::date-parse-mv[] +ROW v = ["2022-05-06", "2022-06-06"] +| EVAL date = DATE_PARSE("yyyy-MM-dd", v) +// end::date-parse-mv[] +; +warning:Line 2:15: evaluation of [DATE_PARSE(\"yyyy-MM-dd\", v)] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:15: java.lang.IllegalArgumentException: single-value function encountered multi-value + +//tag::date-parse-mv-result[] + v:keyword | date:date +["2022-05-06", "2022-06-06"]| null +// end::date-parse-mv-result[] +; + dateFields from employees | where emp_no == 10049 or emp_no == 10050 | eval year = date_extract("year", birth_date), month = date_extract("month_of_year", birth_date), day = date_extract("day_of_month", birth_date) @@ -1061,6 +1105,21 @@ a:integer | df:keyword 1 | 1989-06-02 ; +dateFormatMv +// tag::date-format-mv[] +ROW v = TO_DATETIME(["2024-01-01", "2024-02-01"]) +| EVAL fmt = DATE_FORMAT("yyyy-MM-dd", v) +// end::date-format-mv[] +; +warning:Line 2:14: evaluation of [DATE_FORMAT(\"yyyy-MM-dd\", v)] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:14: java.lang.IllegalArgumentException: single-value function encountered multi-value + +// tag::date-format-mv-result[] + v:date | fmt:keyword +["2024-01-01", "2024-02-01"] | null +// end::date-format-mv-result[] +; + docsDateTrunc // tag::docsDateTrunc[] FROM employees diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index aa89c775da4cf..a65216d02366b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -310,19 +310,6 @@ Saniya |Kalloufi |2.1 |6.9 // end::round-result[] ; -dateParse -// tag::dateParse[] -ROW date_string = "2022-05-06" -| EVAL date = DATE_PARSE("yyyy-MM-dd", date_string) -// end::dateParse[] -; - -//tag::dateParse-result[] -date_string:keyword | date:date -2022-05-06 | 2022-05-06T00:00:00.000Z -// end::dateParse-result[] -; - docsReplace //tag::replaceString[] ROW str = "Hello World" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/math.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/math.csv-spec index f2d5451c16316..18356a2903a43 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/math.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/math.csv-spec @@ -1492,6 +1492,19 @@ l:integer -10000 ; +leastMv +// tag::least-mv[] +ROW a = 10, b = [20, 1] +| EVAL g = LEAST(a, b) +// end::least-mv[] +; + +// tag::least-mv-result[] +a:integer | b:integer | g:integer + 10 | [20, 1] | 1 +// end::least-mv-result[] +; + greatest // tag::greatest[] @@ -1521,10 +1534,16 @@ g:integer ; greatestMv -ROW g=GREATEST([10, 4], 1); +// tag::greatest-mv[] +ROW a = 10, b = [20, 1] +| EVAL g = GREATEST(a, b) +// end::greatest-mv[] +; -g:integer -10 +// tag::greatest-mv-result[] +a:integer | b:integer | g:integer + 10 | [20, 1] | 20 +// end::greatest-mv-result[] ; leastGreatestMany diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec index 7bf3bc7613e01..430637af7a1b3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec @@ -69,6 +69,19 @@ a:null | b:keyword | COALESCE(a, b):keyword // end::coalesce-result[] ; +coalesceMv#[skip:-8.12.99,reason:expression spaces are maintained since 8.13] +// tag::coalesce-mv[] +ROW a=null, b=["1", "2"] +| EVAL COALESCE(a, b) +// end::coalesce-mv[] +; + +// tag::coalesce-mv-result[] +a:null | b:keyword | COALESCE(a, b):keyword + null |["1", "2"] | ["1", "2"] +// end::coalesce-mv-result[] +; + coalesce FROM employees | EVAL first_name = COALESCE(first_name, "X") diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java index abc2ea85198fa..3157b6dba43d4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java @@ -35,7 +35,8 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; /** - * Returns the maximum value of multiple columns. + * Returns the maximum value of multiple columns. This will correctly return the + * greatest value across multivalue fields. */ public class Greatest extends EsqlScalarFunction implements OptionalArgument { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Greatest", Greatest::new); @@ -48,7 +49,9 @@ public class Greatest extends EsqlScalarFunction implements OptionalArgument { + "except it is intended to run on multiple columns at once.", note = "When run on `keyword` or `text` fields, this returns the last string in alphabetical order. " + "When run on `boolean` columns this will return `true` if any values are `true`.", - examples = @Example(file = "math", tag = "greatest") + examples = { @Example(file = "math", tag = "greatest"), @Example(description = """ + Returns the maximum value from multivalued fields. + """, file = "math", tag = "greatest-mv"), } ) public Greatest( Source source, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java index a49fff0aa888b..9cc14a87ad6c8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java @@ -35,7 +35,8 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; /** - * Returns the minimum value of multiple columns. + * Returns the minimum value of multiple columns. This will correctly return the + * least value across multivalue fields. */ public class Least extends EsqlScalarFunction implements OptionalArgument { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Least", Least::new); @@ -46,7 +47,9 @@ public class Least extends EsqlScalarFunction implements OptionalArgument { returnType = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "version" }, description = "Returns the minimum value from multiple columns. " + "This is similar to <> except it is intended to run on multiple columns at once.", - examples = @Example(file = "math", tag = "least") + examples = { @Example(file = "math", tag = "least"), @Example(description = """ + Returns the minimum value from multivalued fields. + """, file = "math", tag = "least-mv"), } ) public Least( Source source, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java index f3da7e07f09c9..0e37412310dd9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java @@ -164,7 +164,9 @@ public static Part resolve(String dateTimeUnit) { examples = { @Example(file = "date", tag = "docsDateDiff"), @Example(description = """ When subtracting in calendar units - like year, month a.s.o. - only the fully elapsed units are counted. To avoid this and obtain also remainders, simply switch to the next smaller unit and do the date math accordingly. - """, file = "date", tag = "evalDateDiffYearForDocs") } + """, file = "date", tag = "evalDateDiffYearForDocs"), @Example(description = """ + If any column is multivalued, this will return a `null` result. + """, file = "date", tag = "date-diff-mv") } ) public DateDiff( Source source, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java index d30e99794a44e..4201504e13af8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateFormat.java @@ -54,7 +54,7 @@ public class DateFormat extends EsqlConfigurationFunction implements OptionalArg @FunctionInfo( returnType = "keyword", description = "Returns a string representation of a date, in the provided format.", - examples = @Example(file = "date", tag = "docsDateFormat") + examples = { @Example(file = "date", tag = "docsDateFormat"), @Example(file = "date", tag = "docsDateFormat"), } ) public DateFormat( Source source, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateParse.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateParse.java index 7c38b54ed232b..52eff6e9a29ff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateParse.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateParse.java @@ -51,7 +51,9 @@ public class DateParse extends EsqlScalarFunction implements OptionalArgument { @FunctionInfo( returnType = "date", description = "Returns a date by parsing the second argument using the format specified in the first argument.", - examples = @Example(file = "docs", tag = "dateParse") + examples = { @Example(file = "date", tag = "date-parse"), @Example(description = """ + If any column is multivalued, this will return a `null` result. + """, file = "date", tag = "date-parse-mv"), } ) public DateParse( Source source, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java index a426a14b0a319..d9370ceaf834b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java @@ -70,7 +70,9 @@ public class Coalesce extends EsqlScalarFunction implements OptionalArgument { "long", "version" }, description = "Returns the first of its arguments that is not null. If all arguments are null, it returns `null`.", - examples = { @Example(file = "null", tag = "coalesce") } + examples = { @Example(file = "null", tag = "coalesce"), @Example(description = """ + COALESCE keeps multivalued fields. + """, file = "null", tag = "coalesce-mv") } ) public Coalesce( Source source, From 0a13132100d19770e2e1dde7853d48dcb62ca6a9 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 3 Mar 2025 15:44:25 -0500 Subject: [PATCH 2/2] NOCOMMIT We need to decide something important before mering. --- .../xpack/esql/expression/function/scalar/date/DateDiff.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java index 0e37412310dd9..df641965bd999 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/date/DateDiff.java @@ -167,7 +167,7 @@ public static Part resolve(String dateTimeUnit) { """, file = "date", tag = "evalDateDiffYearForDocs"), @Example(description = """ If any column is multivalued, this will return a `null` result. """, file = "date", tag = "date-diff-mv") } - ) + ) // NOCOMMIT can we reserve the right to change the null-and-warn behavior? public DateDiff( Source source, @Param(name = "unit", type = { "keyword", "text" }, description = "Time difference unit") Expression unit,