From ddd9eb48e79c641dffcfdf126e67aeb056863cc5 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 5 Nov 2025 09:39:33 -0800 Subject: [PATCH 01/14] Fix binning udf resolution Signed-off-by: Kai Huang --- .../sql/calcite/utils/PPLOperandTypes.java | 44 ++++++ .../binning/handlers/CountBinHandler.java | 12 +- .../binning/handlers/MinSpanBinHandler.java | 12 +- .../binning/handlers/NumericSpanHelper.java | 6 +- .../binning/handlers/RangeBinHandler.java | 13 +- .../function/BuiltinFunctionName.java | 6 + .../expression/function/PPLFuncImpTable.java | 8 + .../udf/binning/WidthBucketFunction.java | 2 +- .../rest-api-spec/test/issues/4740.yml | 144 ++++++++++++++++++ .../sql/ppl/calcite/CalcitePPLBinTest.java | 52 ++++++- 10 files changed, 285 insertions(+), 14 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index e2bdc2f02e3..f6188d4c6ff 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -134,6 +134,50 @@ private PPLOperandTypes() {} SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC)); + public static final UDFOperandMetadata WIDTH_BUCKET_OPERAND = + UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + OperandTypes.family( + SqlTypeFamily.NUMERIC, + SqlTypeFamily.INTEGER, + SqlTypeFamily.NUMERIC, + SqlTypeFamily.NUMERIC) + .or( + OperandTypes.family( + SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, + SqlTypeFamily.TIMESTAMP)) + .or( + OperandTypes.family( + SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.INTEGER, + SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.TIMESTAMP)) + .or( + OperandTypes.family( + SqlTypeFamily.DATE, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, + SqlTypeFamily.DATE)) + .or( + OperandTypes.family( + SqlTypeFamily.DATE, + SqlTypeFamily.INTEGER, + SqlTypeFamily.DATE, + SqlTypeFamily.DATE)) + .or( + OperandTypes.family( + SqlTypeFamily.TIME, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, + SqlTypeFamily.TIME)) + .or( + OperandTypes.family( + SqlTypeFamily.TIME, + SqlTypeFamily.INTEGER, + SqlTypeFamily.TIME, + SqlTypeFamily.TIME))); public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC_NUMERIC_NUMERIC = UDFOperandMetadata.wrap( OperandTypes.family( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java index 7422a26f0b7..d680c453ff2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java @@ -16,7 +16,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for bins-based (count) binning operations. */ public class CountBinHandler implements BinHandler { @@ -49,8 +50,13 @@ public RexNode createExpression( // WIDTH_BUCKET(field_value, num_bins, data_range, max_value) RexNode numBins = context.relBuilder.literal(requestedBins); - return context.rexBuilder.makeCall( - PPLBuiltinOperators.WIDTH_BUCKET, fieldExpr, numBins, dataRange, maxValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.WIDTH_BUCKET, + fieldExpr, + numBins, + dataRange, + maxValue); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java index 16e11b7abce..1236caa942e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java @@ -18,7 +18,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for minspan-based binning operations. */ public class MinSpanBinHandler implements BinHandler { @@ -60,8 +61,13 @@ public RexNode createExpression( // MINSPAN_BUCKET(field_value, min_span, data_range, max_value) RexNode minSpanParam = context.relBuilder.literal(minspan); - return context.rexBuilder.makeCall( - PPLBuiltinOperators.MINSPAN_BUCKET, fieldExpr, minSpanParam, dataRange, maxValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MINSPAN_BUCKET, + fieldExpr, + minSpanParam, + dataRange, + maxValue); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java index 76494dc0435..e14acfb729d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java @@ -7,7 +7,8 @@ import org.apache.calcite.rex.RexNode; import org.opensearch.sql.calcite.CalcitePlanContext; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Helper for creating numeric span expressions. */ public class NumericSpanHelper { @@ -32,6 +33,7 @@ private RexNode createExpression( RexNode fieldExpr, RexNode spanValue, CalcitePlanContext context) { // SPAN_BUCKET(field_value, span_value) - return context.rexBuilder.makeCall(PPLBuiltinOperators.SPAN_BUCKET, fieldExpr, spanValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SPAN_BUCKET, fieldExpr, spanValue); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java index aa726cb9dbb..585e9234dc6 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java @@ -13,7 +13,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for range-based binning (start/end parameters only). */ public class RangeBinHandler implements BinHandler { @@ -43,8 +44,14 @@ public RexNode createExpression( RexNode endParam = convertParameter(rangeBin.getEnd(), context, visitor); // Use RANGE_BUCKET with data bounds and user parameters - return context.rexBuilder.makeCall( - PPLBuiltinOperators.RANGE_BUCKET, fieldExpr, dataMin, dataMax, startParam, endParam); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.RANGE_BUCKET, + fieldExpr, + dataMin, + dataMax, + startParam, + endParam); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index ced98022ca9..73c5e613415 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -59,6 +59,12 @@ public enum BuiltinFunctionName { TAN(FunctionName.of("tan")), SPAN(FunctionName.of("span")), + /** Binning Functions. */ + SPAN_BUCKET(FunctionName.of("span_bucket")), + WIDTH_BUCKET(FunctionName.of("width_bucket")), + MINSPAN_BUCKET(FunctionName.of("minspan_bucket")), + RANGE_BUCKET(FunctionName.of("range_bucket")), + /** Collection functions */ ARRAY(FunctionName.of("array")), ARRAY_LENGTH(FunctionName.of("array_length")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 0c98ce2e699..cc76b3553c0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -136,6 +136,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINSPAN_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_DAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_HOUR; @@ -166,6 +167,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RADIANS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RANGE_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH; @@ -189,6 +191,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SINH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SQRT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_POP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_SAMP; @@ -230,6 +233,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKDAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKOFYEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WIDTH_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.XOR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; @@ -849,6 +853,10 @@ void populate() { registerOperator(EXPM1, PPLBuiltinOperators.EXPM1); registerOperator(RINT, PPLBuiltinOperators.RINT); registerOperator(SPAN, PPLBuiltinOperators.SPAN); + registerOperator(SPAN_BUCKET, PPLBuiltinOperators.SPAN_BUCKET); + registerOperator(WIDTH_BUCKET, PPLBuiltinOperators.WIDTH_BUCKET); + registerOperator(MINSPAN_BUCKET, PPLBuiltinOperators.MINSPAN_BUCKET); + registerOperator(RANGE_BUCKET, PPLBuiltinOperators.RANGE_BUCKET); registerOperator(E, PPLBuiltinOperators.E); registerOperator(CONV, PPLBuiltinOperators.CONV); registerOperator(MOD, PPLBuiltinOperators.MOD); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java index c1962266248..08daf9c314b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java @@ -66,7 +66,7 @@ public static boolean dateRelatedType(RelDataType type) { @Override public UDFOperandMetadata getOperandMetadata() { - return PPLOperandTypes.NUMERIC_NUMERIC_NUMERIC_NUMERIC; + return PPLOperandTypes.WIDTH_BUCKET_OPERAND; } public static class WidthBucketImplementor implements NotNullImplementor { diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml new file mode 100644 index 00000000000..cf580579c40 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml @@ -0,0 +1,144 @@ +setup: + - do: + indices.create: + index: test_binning_4740 + body: + mappings: + properties: + "@timestamp": + type: date + "age": + type: integer + "balance": + type: double + "name": + type: keyword + - do: + bulk: + index: test_binning_4740 + refresh: true + body: + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:00:00.000Z","age":25,"balance":1000.0,"name":"Alice"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:05:00.000Z","age":30,"balance":2000.0,"name":"Bob"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:10:00.000Z","age":35,"balance":3000.0,"name":"Charlie"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:15:00.000Z","age":40,"balance":4000.0,"name":"David"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:20:00.000Z","age":45,"balance":5000.0,"name":"Eve"}' + +--- +"bin with numeric field using WIDTH_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age bins=2 | stats count() by age | sort age + + - is_true: datarows + - gte: { total: 1 } + - gte: { size: 1 } + +--- +"bin with timestamp field using WIDTH_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin @timestamp bins=3 | stats count() by @timestamp | sort @timestamp + + - is_true: datarows + - gte: { total: 1 } + - gte: { size: 1 } + +--- +"bin with numeric span using SPAN_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age span=10 | stats count() by age | sort age + + - is_true: datarows + - gte: { total: 1 } + - gte: { size: 1 } + +--- +"bin with timestamp span using SPAN_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin @timestamp span=10m | stats count() by @timestamp | sort @timestamp + + - is_true: datarows + - gte: { total: 1 } + - gte: { size: 1 } + +--- +"bin with minspan using MINSPAN_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin balance minspan=1000 | stats count() by balance | sort balance + + - is_true: datarows + - gte: { total: 1 } + - gte: { size: 1 } + +--- +"bin with start and end using RANGE_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age start=20 end=50 | stats count() by age | sort age + + - is_true: datarows + - gte: { total: 1 } + - gte: { size: 1 } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index ddb22092c99..e9c12d5954d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -39,8 +39,7 @@ public void testBinWithBins() { String ppl = "source=EMP | bin SAL bins=10"; RelNode root = getRelNode(ppl); - // Note: WIDTH_BUCKET uses window functions without ROWS UNBOUNDED PRECEDING in the actual - // output + // Note: WIDTH_BUCKET uses window functions and now properly resolves via PPLFuncImpTable verifyLogical( root, "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], " @@ -48,6 +47,15 @@ public void testBinWithBins() { + "-(MAX($5) OVER (), MIN($5) OVER ()), " + "MAX($5) OVER ())])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `WIDTH_BUCKET`(`SAL`," + + " 10, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) -" + + " (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))," + + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))" + + " `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -64,6 +72,15 @@ public void testBinWithMinspan() { + "-(MAX($5) OVER (), MIN($5) OVER ()), " + "MAX($5) OVER ())])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`," + + " `MINSPAN_BUCKET`(`SAL`, 1.000E2, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)) `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -79,6 +96,37 @@ public void testBinWithStartEnd() { + "MIN($5) OVER (), MAX($5) OVER (), " + "1000, 5000)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `RANGE_BUCKET`(`SAL`," + + " MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 1000," + + " 5000) `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testBinWithTimestampFieldUsingBins() { + String ppl = "source=products_temporal | bin SYS_START bins=10"; + RelNode root = getRelNode(ppl); + + // WIDTH_BUCKET with timestamp field - tests the fix for issue #4740 + // The third parameter (data_range) is a STRING interval, not numeric + verifyLogical( + root, + "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3], SYS_START=[WIDTH_BUCKET($2, 10, " + + "-(MAX($2) OVER (), MIN($2) OVER ()), " + + "MAX($2) OVER ())])\n" + + " LogicalTableScan(table=[[scott, products_temporal]])\n"); + + String expectedSparkSql = + "SELECT `ID`, `SUPPLIER`, `SYS_END`, `WIDTH_BUCKET`(`SYS_START`, 10, (MAX(`SYS_START`)" + + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SYS_START`)" + + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SYS_START`)" + + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) `SYS_START`\n" + + "FROM `scott`.`products_temporal`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test From e7507d376cf8e26ecc488968af7f187f104f9eda Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 5 Nov 2025 11:08:31 -0800 Subject: [PATCH 02/14] spotless fix Signed-off-by: Kai Huang --- .../org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index e9c12d5954d..09401b6ea0a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -121,8 +121,8 @@ public void testBinWithTimestampFieldUsingBins() { + " LogicalTableScan(table=[[scott, products_temporal]])\n"); String expectedSparkSql = - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `WIDTH_BUCKET`(`SYS_START`, 10, (MAX(`SYS_START`)" - + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SYS_START`)" + "SELECT `ID`, `SUPPLIER`, `SYS_END`, `WIDTH_BUCKET`(`SYS_START`, 10, (MAX(`SYS_START`) OVER" + + " (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SYS_START`)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SYS_START`)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) `SYS_START`\n" + "FROM `scott`.`products_temporal`"; From fb38725a703b51c5efc309b5ca887dc8a37cfb07 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 5 Nov 2025 11:17:59 -0800 Subject: [PATCH 03/14] update tes Signed-off-by: Kai Huang --- .../java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index 09401b6ea0a..a482e2f2895 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -111,7 +111,7 @@ public void testBinWithTimestampFieldUsingBins() { String ppl = "source=products_temporal | bin SYS_START bins=10"; RelNode root = getRelNode(ppl); - // WIDTH_BUCKET with timestamp field - tests the fix for issue #4740 + // WIDTH_BUCKET with timestamp field // The third parameter (data_range) is a STRING interval, not numeric verifyLogical( root, From d6b35d519a334eabd9926deadbdad729a53e7982 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 5 Nov 2025 13:43:07 -0800 Subject: [PATCH 04/14] add comments Signed-off-by: Kai Huang --- .../sql/calcite/utils/PPLOperandTypes.java | 59 +++++++++++++++++-- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index f6188d4c6ff..5ef831e7be5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -134,50 +134,97 @@ private PPLOperandTypes() {} SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC)); + + /** + * Operand type checker for WIDTH_BUCKET function. + * + *

WIDTH_BUCKET(field_value, num_bins, data_range, max_value) requires multiple type variants + * because the data_range parameter (result of MAX(field) - MIN(field)) has different types + * depending on the type system in use: + * + *

1. Numeric fields: (NUMERIC, INTEGER, NUMERIC, NUMERIC) - Standard binning for numeric data + * (e.g., `bin age bins=10`) + * + *

2. Timestamp fields with OpenSearch type system: (TIMESTAMP, INTEGER, STRING, TIMESTAMP) - + * OpenSearchTypeFactory treats (TIMESTAMP - TIMESTAMP) as INTERVAL, which is represented as + * VARCHAR/STRING (e.g., "PT5H" for 5 hours). This variant is used in: + * + *

    + *
  • Production queries with real OpenSearch data + *
  • Integration tests (e.g., CalciteBinCommandIT with pushdown enabled) + *
+ * + *

3. Timestamp fields with Calcite default type system: (TIMESTAMP, INTEGER, TIMESTAMP, + * TIMESTAMP) - Calcite's SCOTT schema TypeFactory treats (TIMESTAMP - TIMESTAMP) as returning a + * TIMESTAMP value representing the duration. This variant is used in: + * + *

    + *
  • Unit tests with CalciteAssert.SchemaSpec (e.g., CalcitePPLBinTest using + * SCOTT_WITH_TEMPORAL) + *
+ * + *

The same code (SqlStdOperatorTable.MINUS on two TIMESTAMP values) produces different result + * types depending on which TypeFactory is active, requiring both variants to support all use + * cases. + */ public static final UDFOperandMetadata WIDTH_BUCKET_OPERAND = UDFOperandMetadata.wrap( (CompositeOperandTypeChecker) + // 1. Numeric fields: bin age span=10 OperandTypes.family( SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC) + // 2. Timestamp fields with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) .or( OperandTypes.family( SqlTypeFamily.TIMESTAMP, SqlTypeFamily.INTEGER, - SqlTypeFamily.CHARACTER, + SqlTypeFamily.CHARACTER, // TIMESTAMP - TIMESTAMP = INTERVAL (as STRING) SqlTypeFamily.TIMESTAMP)) + // 3. Timestamp fields with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) .or( OperandTypes.family( SqlTypeFamily.TIMESTAMP, SqlTypeFamily.INTEGER, - SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.TIMESTAMP, // TIMESTAMP - TIMESTAMP = TIMESTAMP (duration) SqlTypeFamily.TIMESTAMP)) + // DATE field with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) .or( OperandTypes.family( SqlTypeFamily.DATE, SqlTypeFamily.INTEGER, - SqlTypeFamily.CHARACTER, + SqlTypeFamily.CHARACTER, // DATE - DATE = INTERVAL (as STRING) SqlTypeFamily.DATE)) + // DATE field with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) .or( OperandTypes.family( SqlTypeFamily.DATE, SqlTypeFamily.INTEGER, - SqlTypeFamily.DATE, + SqlTypeFamily.DATE, // DATE - DATE = DATE (duration) SqlTypeFamily.DATE)) + // TIME field with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) .or( OperandTypes.family( SqlTypeFamily.TIME, SqlTypeFamily.INTEGER, - SqlTypeFamily.CHARACTER, + SqlTypeFamily.CHARACTER, // TIME - TIME = INTERVAL (as STRING) SqlTypeFamily.TIME)) + // TIME field with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) .or( OperandTypes.family( SqlTypeFamily.TIME, SqlTypeFamily.INTEGER, - SqlTypeFamily.TIME, + SqlTypeFamily.TIME, // TIME - TIME = TIME (duration) SqlTypeFamily.TIME))); + public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC_NUMERIC_NUMERIC = UDFOperandMetadata.wrap( OperandTypes.family( From cc941d70faaf6237f4ab38082d60ba0a34526adb Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 5 Nov 2025 14:03:44 -0800 Subject: [PATCH 05/14] removal Signed-off-by: Kai Huang --- .../sql/calcite/utils/PPLOperandTypes.java | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index 5ef831e7be5..95a683c1d83 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -135,38 +135,6 @@ private PPLOperandTypes() {} SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC)); - /** - * Operand type checker for WIDTH_BUCKET function. - * - *

WIDTH_BUCKET(field_value, num_bins, data_range, max_value) requires multiple type variants - * because the data_range parameter (result of MAX(field) - MIN(field)) has different types - * depending on the type system in use: - * - *

1. Numeric fields: (NUMERIC, INTEGER, NUMERIC, NUMERIC) - Standard binning for numeric data - * (e.g., `bin age bins=10`) - * - *

2. Timestamp fields with OpenSearch type system: (TIMESTAMP, INTEGER, STRING, TIMESTAMP) - - * OpenSearchTypeFactory treats (TIMESTAMP - TIMESTAMP) as INTERVAL, which is represented as - * VARCHAR/STRING (e.g., "PT5H" for 5 hours). This variant is used in: - * - *

    - *
  • Production queries with real OpenSearch data - *
  • Integration tests (e.g., CalciteBinCommandIT with pushdown enabled) - *
- * - *

3. Timestamp fields with Calcite default type system: (TIMESTAMP, INTEGER, TIMESTAMP, - * TIMESTAMP) - Calcite's SCOTT schema TypeFactory treats (TIMESTAMP - TIMESTAMP) as returning a - * TIMESTAMP value representing the duration. This variant is used in: - * - *

    - *
  • Unit tests with CalciteAssert.SchemaSpec (e.g., CalcitePPLBinTest using - * SCOTT_WITH_TEMPORAL) - *
- * - *

The same code (SqlStdOperatorTable.MINUS on two TIMESTAMP values) produces different result - * types depending on which TypeFactory is active, requiring both variants to support all use - * cases. - */ public static final UDFOperandMetadata WIDTH_BUCKET_OPERAND = UDFOperandMetadata.wrap( (CompositeOperandTypeChecker) From cc2232211d234c05151c8ae31233ca38479ef992 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 5 Nov 2025 17:03:39 -0800 Subject: [PATCH 06/14] update yaml test Signed-off-by: Kai Huang --- .../rest-api-spec/test/issues/4740.yml | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml index cf580579c40..1f94649e835 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml @@ -42,11 +42,10 @@ setup: Content-Type: 'application/json' ppl: body: - query: source=test_binning_4740 | bin age bins=2 | stats count() by age | sort age + query: source=test_binning_4740 | bin age bins=3 | stats count() by age | sort age - - is_true: datarows - - gte: { total: 1 } - - gte: { size: 1 } + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } --- "bin with timestamp field using WIDTH_BUCKET - issue 4740": @@ -63,9 +62,8 @@ setup: body: query: source=test_binning_4740 | bin @timestamp bins=3 | stats count() by @timestamp | sort @timestamp - - is_true: datarows - - gte: { total: 1 } - - gte: { size: 1 } + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "@timestamp", "type": "timestamp" } ] } + - match: { "datarows": [ [ 2, "2024-01-01 00:00:00" ], [ 2, "2024-01-01 00:10:00" ], [ 1, "2024-01-01 00:20:00" ] ] } --- "bin with numeric span using SPAN_BUCKET - issue 4740": @@ -82,9 +80,8 @@ setup: body: query: source=test_binning_4740 | bin age span=10 | stats count() by age | sort age - - is_true: datarows - - gte: { total: 1 } - - gte: { size: 1 } + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } --- "bin with timestamp span using SPAN_BUCKET - issue 4740": @@ -101,9 +98,8 @@ setup: body: query: source=test_binning_4740 | bin @timestamp span=10m | stats count() by @timestamp | sort @timestamp - - is_true: datarows - - gte: { total: 1 } - - gte: { size: 1 } + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "@timestamp", "type": "timestamp" } ] } + - match: { "datarows": [ [ 2, "2024-01-01 00:00:00" ], [ 2, "2024-01-01 00:10:00" ], [ 1, "2024-01-01 00:20:00" ] ] } --- "bin with minspan using MINSPAN_BUCKET - issue 4740": @@ -120,9 +116,8 @@ setup: body: query: source=test_binning_4740 | bin balance minspan=1000 | stats count() by balance | sort balance - - is_true: datarows - - gte: { total: 1 } - - gte: { size: 1 } + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "balance", "type": "string" } ] } + - match: { "datarows": [ [ 1, "1000-2000" ], [ 1, "2000-3000" ], [ 1, "3000-4000" ], [ 1, "4000-5000" ], [ 1, "5000-6000" ] ] } --- "bin with start and end using RANGE_BUCKET - issue 4740": @@ -139,6 +134,5 @@ setup: body: query: source=test_binning_4740 | bin age start=20 end=50 | stats count() by age | sort age - - is_true: datarows - - gte: { total: 1 } - - gte: { size: 1 } + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } From a93dcf8b84b0b394e69baebfda00e7111ba6677e Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 6 Nov 2025 09:43:17 -0800 Subject: [PATCH 07/14] rerun Signed-off-by: Kai Huang --- .../org/opensearch/sql/calcite/utils/PPLOperandTypes.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index 95a683c1d83..dfdebb233c9 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -158,7 +158,7 @@ private PPLOperandTypes() {} OperandTypes.family( SqlTypeFamily.TIMESTAMP, SqlTypeFamily.INTEGER, - SqlTypeFamily.TIMESTAMP, // TIMESTAMP - TIMESTAMP = TIMESTAMP (duration) + SqlTypeFamily.TIMESTAMP, // TIMESTAMP - TIMESTAMP = TIMESTAMP SqlTypeFamily.TIMESTAMP)) // DATE field with OpenSearch type system // Used in: Production + Integration tests (CalciteBinCommandIT) @@ -174,7 +174,7 @@ private PPLOperandTypes() {} OperandTypes.family( SqlTypeFamily.DATE, SqlTypeFamily.INTEGER, - SqlTypeFamily.DATE, // DATE - DATE = DATE (duration) + SqlTypeFamily.DATE, // DATE - DATE = DATE SqlTypeFamily.DATE)) // TIME field with OpenSearch type system // Used in: Production + Integration tests (CalciteBinCommandIT) @@ -190,7 +190,7 @@ private PPLOperandTypes() {} OperandTypes.family( SqlTypeFamily.TIME, SqlTypeFamily.INTEGER, - SqlTypeFamily.TIME, // TIME - TIME = TIME (duration) + SqlTypeFamily.TIME, // TIME - TIME = TIME SqlTypeFamily.TIME))); public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC_NUMERIC_NUMERIC = From d0ddb196fdf969c26c18184d4af3c2058798c541 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 6 Nov 2025 16:28:42 -0800 Subject: [PATCH 08/14] update support for type coercion Signed-off-by: Kai Huang --- .../calcite/utils/binning/BinnableField.java | 22 +-- .../binning/handlers/CountBinHandler.java | 5 +- .../expression/function/PPLFuncImpTable.java | 5 + .../calcite/remote/CalciteBinCommandIT.java | 139 ------------------ .../rest-api-spec/test/issues/4740.yml | 48 +----- .../sql/ppl/calcite/CalcitePPLBinTest.java | 31 ---- 6 files changed, 18 insertions(+), 232 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java index c8c73ce3a99..d8a8b081eb7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java @@ -9,14 +9,10 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexNode; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; -import org.opensearch.sql.exception.SemanticCheckException; /** - * Represents a validated field that supports binning operations. The existence of this class - * guarantees that validation has been run - the field is either numeric or time-based. - * - *

This design encodes validation in the type system, preventing downstream code from forgetting - * to validate or running validation multiple times. + * Represents a field that supports binning operations. Supports numeric, time-based, and string + * fields. Type coercion for string fields is handled automatically by Calcite's type system. */ @Getter public class BinnableField { @@ -27,13 +23,12 @@ public class BinnableField { private final boolean isNumeric; /** - * Creates a validated BinnableField. Throws SemanticCheckException if the field is neither - * numeric nor time-based. + * Creates a BinnableField for binning operations. Supports numeric, time-based, and string + * fields. Type coercion for string fields is handled by Calcite's type system. * * @param fieldExpr The Rex expression for the field * @param fieldType The relational data type of the field * @param fieldName The name of the field (for error messages) - * @throws SemanticCheckException if the field is neither numeric nor time-based */ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) { this.fieldExpr = fieldExpr; @@ -42,15 +37,6 @@ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) this.isTimeBased = OpenSearchTypeFactory.isTimeBasedType(fieldType); this.isNumeric = OpenSearchTypeFactory.isNumericType(fieldType); - - // Validation: field must be either numeric or time-based - if (!isNumeric && !isTimeBased) { - throw new SemanticCheckException( - String.format( - "Cannot apply binning: field '%s' is non-numeric and not time-related, expected" - + " numeric or time-related type", - fieldName)); - } } /** diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java index d680c453ff2..bfc91a9a2de 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java @@ -6,7 +6,6 @@ package org.opensearch.sql.calcite.utils.binning.handlers; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.CountBin; @@ -41,7 +40,9 @@ public RexNode createExpression( // Calculate data range using window functions RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Convert start/end parameters RexNode startValue = convertParameter(countBin.getStart(), context); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index cc76b3553c0..5cbbc209a3f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -711,6 +711,11 @@ void populate() { SUBTRACT, SqlStdOperatorTable.MINUS, PPLTypeChecker.wrapFamily((FamilyOperandTypeChecker) OperandTypes.NUMERIC_NUMERIC)); + // Add DATETIME-DATETIME variant for timestamp binning support + registerOperator( + SUBTRACT, + SqlStdOperatorTable.MINUS, + PPLTypeChecker.family(SqlTypeFamily.DATETIME, SqlTypeFamily.DATETIME)); registerOperator(MULTIPLY, SqlStdOperatorTable.MULTIPLY); registerOperator(MULTIPLYFUNCTION, SqlStdOperatorTable.MULTIPLY); registerOperator(TRUNCATE, SqlStdOperatorTable.TRUNCATE); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index ef8abc3cba0..1b4225958b3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -507,145 +507,6 @@ public void testBinWithNonExistentField() { errorMessage.contains("non_existent_field") || errorMessage.contains("not found")); } - @Test - public void testBinWithMinspanOnNonNumericField() { - // Test that bin command with minspan throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format( - "source=%s | bin firstname minspan=10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'firstname' is non-numeric and not time-related, expected" - + " numeric or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithSpanOnNonNumericField() { - // Test that bin command with span throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin lastname span=5 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'lastname' is non-numeric and not time-related, expected" - + " numeric or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithBinsOnNonNumericField() { - // Test that bin command with bins throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin state bins=10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'state' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithStartEndOnNonNumericField() { - // Test that bin command with start/end throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format( - "source=%s | bin city start=0 end=100 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'city' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinDefaultOnNonNumericField() { - // Test that default bin (no parameters) throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery(String.format("source=%s | bin email | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'email' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinLogSpanOnNonNumericField() { - // Test that bin command with log span throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin gender span=log10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'gender' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - @Test public void testBinSpanWithStartEndNeverShrinkRange() throws IOException { JSONObject result = diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml index 1f94649e835..a5d5a171b67 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml @@ -8,7 +8,7 @@ setup: "@timestamp": type: date "age": - type: integer + type: keyword "balance": type: double "name": @@ -19,15 +19,15 @@ setup: refresh: true body: - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:00:00.000Z","age":25,"balance":1000.0,"name":"Alice"}' + - '{"@timestamp":"2024-01-01T00:00:00.000Z","age":"25","balance":1000.0,"name":"Alice"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:05:00.000Z","age":30,"balance":2000.0,"name":"Bob"}' + - '{"@timestamp":"2024-01-01T00:05:00.000Z","age":"30","balance":2000.0,"name":"Bob"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:10:00.000Z","age":35,"balance":3000.0,"name":"Charlie"}' + - '{"@timestamp":"2024-01-01T00:10:00.000Z","age":"35","balance":3000.0,"name":"Charlie"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:15:00.000Z","age":40,"balance":4000.0,"name":"David"}' + - '{"@timestamp":"2024-01-01T00:15:00.000Z","age":"40","balance":4000.0,"name":"David"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:20:00.000Z","age":45,"balance":5000.0,"name":"Eve"}' + - '{"@timestamp":"2024-01-01T00:20:00.000Z","age":"45","balance":5000.0,"name":"Eve"}' --- "bin with numeric field using WIDTH_BUCKET - issue 4740": @@ -47,24 +47,6 @@ setup: - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } ---- -"bin with timestamp field using WIDTH_BUCKET - issue 4740": - - skip: - features: - - headers - - allowed_warnings - - do: - allowed_warnings: - - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' - headers: - Content-Type: 'application/json' - ppl: - body: - query: source=test_binning_4740 | bin @timestamp bins=3 | stats count() by @timestamp | sort @timestamp - - - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "@timestamp", "type": "timestamp" } ] } - - match: { "datarows": [ [ 2, "2024-01-01 00:00:00" ], [ 2, "2024-01-01 00:10:00" ], [ 1, "2024-01-01 00:20:00" ] ] } - --- "bin with numeric span using SPAN_BUCKET - issue 4740": - skip: @@ -83,24 +65,6 @@ setup: - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } ---- -"bin with timestamp span using SPAN_BUCKET - issue 4740": - - skip: - features: - - headers - - allowed_warnings - - do: - allowed_warnings: - - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' - headers: - Content-Type: 'application/json' - ppl: - body: - query: source=test_binning_4740 | bin @timestamp span=10m | stats count() by @timestamp | sort @timestamp - - - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "@timestamp", "type": "timestamp" } ] } - - match: { "datarows": [ [ 2, "2024-01-01 00:00:00" ], [ 2, "2024-01-01 00:10:00" ], [ 1, "2024-01-01 00:20:00" ] ] } - --- "bin with minspan using MINSPAN_BUCKET - issue 4740": - skip: diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index a482e2f2895..01ee5c6694a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -8,7 +8,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; -import org.opensearch.sql.exception.SemanticCheckException; public class CalcitePPLBinTest extends CalcitePPLAbstractTest { @@ -167,34 +166,4 @@ public void testBinWithAligntime() { + " 3600 / 1) * 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } - - @Test(expected = SemanticCheckException.class) - public void testBinWithMinspanOnNonNumericField() { - String ppl = "source=EMP | bin ENAME minspan=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithSpanOnNonNumericField() { - String ppl = "source=EMP | bin JOB span=5"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithBinsOnNonNumericField() { - String ppl = "source=EMP | bin ENAME bins=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithStartEndOnNonNumericField() { - String ppl = "source=EMP | bin JOB start=1 end=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinDefaultOnNonNumericField() { - String ppl = "source=EMP | bin ENAME"; - getRelNode(ppl); // Should throw SemanticCheckException - } } From 5f5c9e9d791fa6d72b7c6a5e57379fd809e17c42 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 6 Nov 2025 16:38:00 -0800 Subject: [PATCH 09/14] update doc Signed-off-by: Kai Huang --- docs/user/ppl/cmd/bin.rst | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst index 1ebdc3f897e..11a338b598c 100644 --- a/docs/user/ppl/cmd/bin.rst +++ b/docs/user/ppl/cmd/bin.rst @@ -16,13 +16,13 @@ bin Description ============ -| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric field and generates a new field with values that represent the lower bound of each bucket. +| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. String fields containing numeric values are automatically converted to numeric types through type coercion. Syntax ============ bin [span=] [minspan=] [bins=] [aligntime=(earliest | latest | )] [start=] [end=] -* field: mandatory. The numeric field to bin. +* field: mandatory. The field to bin. Accepts numeric fields, time-based fields, or string fields containing numeric values (automatically coerced to numeric type). * span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. * minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. * bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. @@ -530,3 +530,22 @@ PPL query:: | 28.0-29.0 | 13 | +-----------+----------------+ + +Example 20: Type coercion with string fields +============================================== + +The ``bin`` command automatically converts string fields containing numeric values to numeric types, enabling binning operations on keyword/text fields. + +PPL query:: + + os> source=accounts | eval age_str = CAST(age AS STRING) | bin age_str bins=3 | stats count() by age_str | sort age_str; + fetched rows / total rows = 2/2 + +---------+---------+ + | count() | age_str | + |---------+---------| + | 1 | 20-30 | + | 3 | 30-40 | + +---------+---------+ + +In this example, even though ``age_str`` is a string field (keyword type), the bin command automatically coerces it to numeric values for binning. + From ed6d2089da9c03a4ce3d091ae431f56044742073 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 6 Nov 2025 17:15:45 -0800 Subject: [PATCH 10/14] enhance error handling and doc Signed-off-by: Kai Huang --- .../calcite/utils/OpenSearchTypeFactory.java | 14 ++++++++++---- .../calcite/utils/binning/BinnableField.java | 16 ++++++++++------ .../udf/binning/MinspanBucketFunction.java | 9 ++++++++- .../udf/binning/RangeBucketFunction.java | 9 ++++++++- .../udf/binning/SpanBucketFunction.java | 9 ++++++++- .../udf/binning/WidthBucketFunction.java | 9 ++++++++- docs/user/ppl/cmd/bin.rst | 8 ++++---- .../calcite/remote/CalciteBinCommandIT.java | 18 ++++++++++++++++++ .../sql/ppl/calcite/CalcitePPLBinTest.java | 15 +++++++++++++++ 9 files changed, 89 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 914f5acdfc7..dc386fd33ad 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -338,12 +338,13 @@ public static boolean isUserDefinedType(RelDataType type) { } /** - * Checks if the RelDataType represents a numeric field. Supports both standard SQL numeric types - * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL) and OpenSearch UDT numeric - * types. + * Checks if the RelDataType represents a numeric field or a string field that can be coerced to + * numeric. Supports standard SQL numeric types (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, + * DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric types, and string types (VARCHAR, CHAR) which + * can contain numeric values and will be automatically coerced. * * @param fieldType the RelDataType to check - * @return true if the type is numeric, false otherwise + * @return true if the type is numeric or a coercible string type, false otherwise */ public static boolean isNumericType(RelDataType fieldType) { // Check standard SQL numeric types @@ -359,6 +360,11 @@ public static boolean isNumericType(RelDataType fieldType) { return true; } + // Check string types (VARCHAR, CHAR) which can be coerced to numeric for binning + if (sqlType == SqlTypeName.VARCHAR || sqlType == SqlTypeName.CHAR) { + return true; + } + // Check for OpenSearch UDT numeric types if (isUserDefinedType(fieldType)) { AbstractExprRelDataType exprType = (AbstractExprRelDataType) fieldType; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java index d8a8b081eb7..a4e924b631c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java @@ -9,11 +9,9 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexNode; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.exception.SemanticCheckException; -/** - * Represents a field that supports binning operations. Supports numeric, time-based, and string - * fields. Type coercion for string fields is handled automatically by Calcite's type system. - */ +/** Represents a field that supports binning operations. */ @Getter public class BinnableField { private final RexNode fieldExpr; @@ -23,12 +21,12 @@ public class BinnableField { private final boolean isNumeric; /** - * Creates a BinnableField for binning operations. Supports numeric, time-based, and string - * fields. Type coercion for string fields is handled by Calcite's type system. + * Creates a BinnableField. Validates that the field type is compatible with binning operations. * * @param fieldExpr The Rex expression for the field * @param fieldType The relational data type of the field * @param fieldName The name of the field (for error messages) + * @throws SemanticCheckException if the field type is not supported for binning */ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) { this.fieldExpr = fieldExpr; @@ -37,6 +35,12 @@ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) this.isTimeBased = OpenSearchTypeFactory.isTimeBasedType(fieldType); this.isNumeric = OpenSearchTypeFactory.isNumericType(fieldType); + + // Reject truly unsupported types (e.g., BOOLEAN, ARRAY, MAP) + if (!isNumeric && !isTimeBased) { + throw new SemanticCheckException( + String.format("Cannot apply binning to field '%s': unsupported type", fieldName)); + } } /** diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java index 11e1a33afbd..d0e24b2d003 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java @@ -73,7 +73,14 @@ public Expression implement( /** Minspan bucket calculation. */ public static String calculateMinspanBucket( Number fieldValue, Number minSpanParam, Number dataRange, Number maxValue) { - if (fieldValue == null || minSpanParam == null || dataRange == null || maxValue == null) { + // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) + if (dataRange == null || maxValue == null) { + throw new IllegalArgumentException( + "Cannot apply binning: field contains non-numeric string values. " + + "Only numeric types or string types with valid numeric values are supported."); + } + + if (fieldValue == null || minSpanParam == null) { return null; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java index a8f2625b20f..a1671ef1d9a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java @@ -79,7 +79,14 @@ public Expression implement( /** Range bucket calculation with expansion algorithm and magnitude-based width. */ public static String calculateRangeBucket( Number fieldValue, Number dataMin, Number dataMax, Number startParam, Number endParam) { - if (fieldValue == null || dataMin == null || dataMax == null) { + // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) + if (dataMin == null || dataMax == null) { + throw new IllegalArgumentException( + "Cannot apply binning: field contains non-numeric string values. " + + "Only numeric types or string types with valid numeric values are supported."); + } + + if (fieldValue == null) { return null; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java index 6970e485525..3330ad44137 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java @@ -66,7 +66,14 @@ public Expression implement( /** Span bucket calculation. */ public static String calculateSpanBucket(Number fieldValue, Number spanParam) { - if (fieldValue == null || spanParam == null) { + // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) + if (fieldValue == null) { + throw new IllegalArgumentException( + "Cannot apply binning: field contains non-numeric string values. " + + "Only numeric types or string types with valid numeric values are supported."); + } + + if (spanParam == null) { return null; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java index 08daf9c314b..3f5fbe796b9 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java @@ -91,7 +91,14 @@ public Expression implement( /** Width bucket calculation using nice number algorithm. */ public static String calculateWidthBucket( Number fieldValue, Number numBinsParam, Number dataRange, Number maxValue) { - if (fieldValue == null || numBinsParam == null || dataRange == null || maxValue == null) { + // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) + if (dataRange == null || maxValue == null) { + throw new IllegalArgumentException( + "Cannot apply binning: field contains non-numeric string values. " + + "Only numeric types or string types with valid numeric values are supported."); + } + + if (fieldValue == null || numBinsParam == null) { return null; } diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst index 11a338b598c..6484fea9224 100644 --- a/docs/user/ppl/cmd/bin.rst +++ b/docs/user/ppl/cmd/bin.rst @@ -16,13 +16,13 @@ bin Description ============ -| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. String fields containing numeric values are automatically converted to numeric types through type coercion. +| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. String fields containing valid numeric values (e.g., "25", "30.5") are automatically converted to numeric types through type coercion. Syntax ============ bin [span=] [minspan=] [bins=] [aligntime=(earliest | latest | )] [start=] [end=] -* field: mandatory. The field to bin. Accepts numeric fields, time-based fields, or string fields containing numeric values (automatically coerced to numeric type). +* field: mandatory. The field to bin. Accepts numeric fields, time-based fields, or string fields containing valid numeric values (e.g., "25", "30.5"). String fields with numeric values are automatically coerced to numeric type. Non-numeric strings will result in an error. * span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. * minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. * bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. @@ -534,7 +534,7 @@ PPL query:: Example 20: Type coercion with string fields ============================================== -The ``bin`` command automatically converts string fields containing numeric values to numeric types, enabling binning operations on keyword/text fields. +The ``bin`` command automatically converts string fields containing valid numeric values to numeric types, enabling binning operations on keyword/text fields. Only strings that represent valid numbers (e.g., "25", "30.5", "100") are supported. Non-numeric strings will cause an error. PPL query:: @@ -547,5 +547,5 @@ PPL query:: | 3 | 30-40 | +---------+---------+ -In this example, even though ``age_str`` is a string field (keyword type), the bin command automatically coerces it to numeric values for binning. +In this example, even though ``age_str`` is a string field (keyword type) containing numeric values like "28", "32", etc., the bin command automatically coerces these numeric strings to numeric values for binning. If ``age_str`` contained non-numeric strings like "young" or "old", the query would fail. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index 1b4225958b3..a9c01273784 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.legacy.TestsConstants.*; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -1067,4 +1068,21 @@ public void testBinWithEvalCreatedDottedFieldName() throws IOException { rows(false, "go", "opentelemetry", 16, 1, "12-14"), rows(true, "rust", "opentelemetry", 12, 1, "14-16")); } + + @Test + public void testBinOnNonNumericStringField() { + // Test that binning on a string field with non-numeric values fails with clear error + ResponseException exception = + assertThrows( + ResponseException.class, + () -> { + executeQuery( + String.format("source=%s | bin firstname bins=3 | head 1", TEST_INDEX_ACCOUNT)); + }); + + String errorMessage = exception.getMessage(); + assertTrue( + "Error should indicate non-numeric string values", + errorMessage.contains("non-numeric string values")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index 01ee5c6694a..48b7bb17f3f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -5,9 +5,13 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; +import org.opensearch.sql.exception.SemanticCheckException; public class CalcitePPLBinTest extends CalcitePPLAbstractTest { @@ -166,4 +170,15 @@ public void testBinWithAligntime() { + " 3600 / 1) * 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } + + @Test + public void testBinOnNonBinnableType() { + // Test that binning on truly unsupported types (not numeric, time, or string) fails + String ppl = "source=products_temporal | eval bool_field = true | bin bool_field bins=3"; + + SemanticCheckException exception = + assertThrows(SemanticCheckException.class, () -> getRelNode(ppl)); + assertTrue(exception.getMessage().contains("Cannot apply binning")); + assertTrue(exception.getMessage().contains("unsupported type")); + } } From 84e09c2865435b345d5b2e7eaee206f5f1f66f63 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 6 Nov 2025 18:04:05 -0800 Subject: [PATCH 11/14] fixes Signed-off-by: Kai Huang --- .../function/udf/binning/MinspanBucketFunction.java | 2 +- .../function/udf/binning/RangeBucketFunction.java | 2 +- .../expression/function/udf/binning/SpanBucketFunction.java | 2 +- .../function/udf/binning/WidthBucketFunction.java | 2 +- .../opensearch/sql/calcite/remote/CalciteBinCommandIT.java | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java index d0e24b2d003..5591e0a3451 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java @@ -38,7 +38,7 @@ public class MinspanBucketFunction extends ImplementorUDF { public MinspanBucketFunction() { - super(new MinspanBucketImplementor(), NullPolicy.ANY); + super(new MinspanBucketImplementor(), NullPolicy.NONE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java index a1671ef1d9a..222228f795a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java @@ -42,7 +42,7 @@ public class RangeBucketFunction extends ImplementorUDF { public RangeBucketFunction() { - super(new RangeBucketImplementor(), NullPolicy.ANY); + super(new RangeBucketImplementor(), NullPolicy.NONE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java index 3330ad44137..bbfafb1cc1a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java @@ -36,7 +36,7 @@ public class SpanBucketFunction extends ImplementorUDF { public SpanBucketFunction() { - super(new SpanBucketImplementor(), NullPolicy.ANY); + super(new SpanBucketImplementor(), NullPolicy.NONE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java index 3f5fbe796b9..98faa7a5824 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java @@ -43,7 +43,7 @@ public class WidthBucketFunction extends ImplementorUDF { public WidthBucketFunction() { - super(new WidthBucketImplementor(), NullPolicy.ANY); + super(new WidthBucketImplementor(), NullPolicy.NONE); } @Override diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index a9c01273784..7ff8e55f0a0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -1076,13 +1076,13 @@ public void testBinOnNonNumericStringField() { assertThrows( ResponseException.class, () -> { - executeQuery( - String.format("source=%s | bin firstname bins=3 | head 1", TEST_INDEX_ACCOUNT)); + executeQuery(String.format("source=%s | bin firstname bins=3", TEST_INDEX_ACCOUNT)); }); + // Verify the error message contains information about non-numeric string values String errorMessage = exception.getMessage(); assertTrue( - "Error should indicate non-numeric string values", + "Error message should indicate non-numeric string values: " + errorMessage, errorMessage.contains("non-numeric string values")); } } From 192b9db879364090517b090c12e5de35013ffee9 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 7 Nov 2025 13:40:14 -0800 Subject: [PATCH 12/14] fixes Signed-off-by: Kai Huang --- .../calcite/utils/OpenSearchTypeFactory.java | 11 ++++--- .../binning/handlers/DefaultBinHandler.java | 16 +++++++--- .../binning/handlers/MinSpanBinHandler.java | 5 ++-- .../udf/binning/MinspanBucketFunction.java | 11 ++----- .../udf/binning/RangeBucketFunction.java | 11 ++----- .../udf/binning/SpanBucketFunction.java | 11 ++----- .../udf/binning/WidthBucketFunction.java | 11 ++----- docs/user/ppl/cmd/bin.rst | 10 ++----- .../calcite/remote/CalciteBinCommandIT.java | 17 ----------- .../rest-api-spec/test/issues/4740.yml | 30 +++++++++++++++---- 10 files changed, 55 insertions(+), 78 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index dc386fd33ad..d225a797285 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -338,13 +338,12 @@ public static boolean isUserDefinedType(RelDataType type) { } /** - * Checks if the RelDataType represents a numeric field or a string field that can be coerced to - * numeric. Supports standard SQL numeric types (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, - * DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric types, and string types (VARCHAR, CHAR) which - * can contain numeric values and will be automatically coerced. + * Checks if the RelDataType represents a numeric type. Supports standard SQL numeric types + * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric + * types, and string types (VARCHAR, CHAR). * * @param fieldType the RelDataType to check - * @return true if the type is numeric or a coercible string type, false otherwise + * @return true if the type is numeric or string, false otherwise */ public static boolean isNumericType(RelDataType fieldType) { // Check standard SQL numeric types @@ -360,7 +359,7 @@ public static boolean isNumericType(RelDataType fieldType) { return true; } - // Check string types (VARCHAR, CHAR) which can be coerced to numeric for binning + // Check string types (VARCHAR, CHAR) if (sqlType == SqlTypeName.VARCHAR || sqlType == SqlTypeName.CHAR) { return true; } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java index e68477a9566..376e458049a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java @@ -16,6 +16,8 @@ import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; import org.opensearch.sql.calcite.utils.binning.RangeFormatter; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for default binning when no parameters are specified. */ public class DefaultBinHandler implements BinHandler { @@ -45,7 +47,9 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex // Calculate data range RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Calculate magnitude-based width RexNode log10Range = context.relBuilder.call(SqlStdOperatorTable.LOG10, dataRange); @@ -60,17 +64,21 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex // Calculate bin value RexNode binStartValue = calculateBinValue(fieldExpr, widthInt, context); RexNode binEndValue = - context.relBuilder.call(SqlStdOperatorTable.PLUS, binStartValue, widthInt); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, binStartValue, widthInt); return RangeFormatter.createRangeString(binStartValue, binEndValue, context); } private RexNode calculateBinValue(RexNode fieldExpr, RexNode width, CalcitePlanContext context) { - RexNode divided = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, fieldExpr, width); + RexNode divided = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, fieldExpr, width); RexNode floored = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); - return context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, floored, width); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, floored, width); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java index 1236caa942e..a193d7e5c91 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java @@ -9,7 +9,6 @@ import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.MinSpanBin; @@ -52,7 +51,9 @@ public RexNode createExpression( // Calculate data range using window functions RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Convert start/end parameters RexNode startValue = convertParameter(minSpanBin.getStart(), context); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java index 5591e0a3451..11e1a33afbd 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java @@ -38,7 +38,7 @@ public class MinspanBucketFunction extends ImplementorUDF { public MinspanBucketFunction() { - super(new MinspanBucketImplementor(), NullPolicy.NONE); + super(new MinspanBucketImplementor(), NullPolicy.ANY); } @Override @@ -73,14 +73,7 @@ public Expression implement( /** Minspan bucket calculation. */ public static String calculateMinspanBucket( Number fieldValue, Number minSpanParam, Number dataRange, Number maxValue) { - // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) - if (dataRange == null || maxValue == null) { - throw new IllegalArgumentException( - "Cannot apply binning: field contains non-numeric string values. " - + "Only numeric types or string types with valid numeric values are supported."); - } - - if (fieldValue == null || minSpanParam == null) { + if (fieldValue == null || minSpanParam == null || dataRange == null || maxValue == null) { return null; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java index 222228f795a..a8f2625b20f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java @@ -42,7 +42,7 @@ public class RangeBucketFunction extends ImplementorUDF { public RangeBucketFunction() { - super(new RangeBucketImplementor(), NullPolicy.NONE); + super(new RangeBucketImplementor(), NullPolicy.ANY); } @Override @@ -79,14 +79,7 @@ public Expression implement( /** Range bucket calculation with expansion algorithm and magnitude-based width. */ public static String calculateRangeBucket( Number fieldValue, Number dataMin, Number dataMax, Number startParam, Number endParam) { - // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) - if (dataMin == null || dataMax == null) { - throw new IllegalArgumentException( - "Cannot apply binning: field contains non-numeric string values. " - + "Only numeric types or string types with valid numeric values are supported."); - } - - if (fieldValue == null) { + if (fieldValue == null || dataMin == null || dataMax == null) { return null; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java index bbfafb1cc1a..6970e485525 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java @@ -36,7 +36,7 @@ public class SpanBucketFunction extends ImplementorUDF { public SpanBucketFunction() { - super(new SpanBucketImplementor(), NullPolicy.NONE); + super(new SpanBucketImplementor(), NullPolicy.ANY); } @Override @@ -66,14 +66,7 @@ public Expression implement( /** Span bucket calculation. */ public static String calculateSpanBucket(Number fieldValue, Number spanParam) { - // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) - if (fieldValue == null) { - throw new IllegalArgumentException( - "Cannot apply binning: field contains non-numeric string values. " - + "Only numeric types or string types with valid numeric values are supported."); - } - - if (spanParam == null) { + if (fieldValue == null || spanParam == null) { return null; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java index 98faa7a5824..08daf9c314b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java @@ -43,7 +43,7 @@ public class WidthBucketFunction extends ImplementorUDF { public WidthBucketFunction() { - super(new WidthBucketImplementor(), NullPolicy.NONE); + super(new WidthBucketImplementor(), NullPolicy.ANY); } @Override @@ -91,14 +91,7 @@ public Expression implement( /** Width bucket calculation using nice number algorithm. */ public static String calculateWidthBucket( Number fieldValue, Number numBinsParam, Number dataRange, Number maxValue) { - // Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL) - if (dataRange == null || maxValue == null) { - throw new IllegalArgumentException( - "Cannot apply binning: field contains non-numeric string values. " - + "Only numeric types or string types with valid numeric values are supported."); - } - - if (fieldValue == null || numBinsParam == null) { + if (fieldValue == null || numBinsParam == null || dataRange == null || maxValue == null) { return null; } diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst index 6484fea9224..13fad6527fb 100644 --- a/docs/user/ppl/cmd/bin.rst +++ b/docs/user/ppl/cmd/bin.rst @@ -16,13 +16,13 @@ bin Description ============ -| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. String fields containing valid numeric values (e.g., "25", "30.5") are automatically converted to numeric types through type coercion. +| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. Syntax ============ bin [span=] [minspan=] [bins=] [aligntime=(earliest | latest | )] [start=] [end=] -* field: mandatory. The field to bin. Accepts numeric fields, time-based fields, or string fields containing valid numeric values (e.g., "25", "30.5"). String fields with numeric values are automatically coerced to numeric type. Non-numeric strings will result in an error. +* field: mandatory. The field to bin. Accepts numeric or time-based fields. * span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. * minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. * bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. @@ -531,11 +531,9 @@ PPL query:: +-----------+----------------+ -Example 20: Type coercion with string fields +Example 20: Binning with string fields ============================================== -The ``bin`` command automatically converts string fields containing valid numeric values to numeric types, enabling binning operations on keyword/text fields. Only strings that represent valid numbers (e.g., "25", "30.5", "100") are supported. Non-numeric strings will cause an error. - PPL query:: os> source=accounts | eval age_str = CAST(age AS STRING) | bin age_str bins=3 | stats count() by age_str | sort age_str; @@ -547,5 +545,3 @@ PPL query:: | 3 | 30-40 | +---------+---------+ -In this example, even though ``age_str`` is a string field (keyword type) containing numeric values like "28", "32", etc., the bin command automatically coerces these numeric strings to numeric values for binning. If ``age_str`` contained non-numeric strings like "young" or "old", the query would fail. - diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index 7ff8e55f0a0..bb326dc39a7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -1068,21 +1068,4 @@ public void testBinWithEvalCreatedDottedFieldName() throws IOException { rows(false, "go", "opentelemetry", 16, 1, "12-14"), rows(true, "rust", "opentelemetry", 12, 1, "14-16")); } - - @Test - public void testBinOnNonNumericStringField() { - // Test that binning on a string field with non-numeric values fails with clear error - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery(String.format("source=%s | bin firstname bins=3", TEST_INDEX_ACCOUNT)); - }); - - // Verify the error message contains information about non-numeric string values - String errorMessage = exception.getMessage(); - assertTrue( - "Error message should indicate non-numeric string values: " + errorMessage, - errorMessage.contains("non-numeric string values")); - } } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml index a5d5a171b67..5fdb4198abe 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml @@ -10,7 +10,7 @@ setup: "age": type: keyword "balance": - type: double + type: keyword "name": type: keyword - do: @@ -19,15 +19,15 @@ setup: refresh: true body: - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:00:00.000Z","age":"25","balance":1000.0,"name":"Alice"}' + - '{"@timestamp":"2024-01-01T00:00:00.000Z","age":"25","balance":"1000.0","name":"Alice"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:05:00.000Z","age":"30","balance":2000.0,"name":"Bob"}' + - '{"@timestamp":"2024-01-01T00:05:00.000Z","age":"30","balance":"2000.0","name":"Bob"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:10:00.000Z","age":"35","balance":3000.0,"name":"Charlie"}' + - '{"@timestamp":"2024-01-01T00:10:00.000Z","age":"35","balance":"3000.0","name":"Charlie"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:15:00.000Z","age":"40","balance":4000.0,"name":"David"}' + - '{"@timestamp":"2024-01-01T00:15:00.000Z","age":"40","balance":"4000.0","name":"David"}' - '{"index":{}}' - - '{"@timestamp":"2024-01-01T00:20:00.000Z","age":"45","balance":5000.0,"name":"Eve"}' + - '{"@timestamp":"2024-01-01T00:20:00.000Z","age":"45","balance":"5000.0","name":"Eve"}' --- "bin with numeric field using WIDTH_BUCKET - issue 4740": @@ -100,3 +100,21 @@ setup: - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } + +--- +"bin with default binning (no parameters) on string field - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin balance | stats count() by balance | sort balance + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "balance", "type": "string" } ] } + - match: { "datarows": [ [ 1, "1000.0-2000.0" ], [ 1, "2000.0-3000.0" ], [ 1, "3000.0-4000.0" ], [ 1, "4000.0-5000.0" ], [ 1, "5000.0-6000.0" ] ] } From 7320c3e21a31e1bb197bf495768a49b4afec2ac0 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 7 Nov 2025 13:56:58 -0800 Subject: [PATCH 13/14] fixes Signed-off-by: Kai Huang --- .../utils/binning/handlers/LogSpanHelper.java | 26 ++++++-- .../utils/binning/time/AlignmentHandler.java | 47 ++++++++----- .../utils/binning/time/DaySpanHandler.java | 5 +- .../utils/binning/time/MonthSpanHandler.java | 63 ++++++++++++------ .../binning/time/StandardTimeSpanHandler.java | 66 ++++++++++++------- 5 files changed, 141 insertions(+), 66 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java index 9bad71c52f9..660530bb251 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java @@ -11,6 +11,8 @@ import org.opensearch.sql.calcite.utils.binning.BinConstants; import org.opensearch.sql.calcite.utils.binning.RangeFormatter; import org.opensearch.sql.calcite.utils.binning.SpanInfo; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Helper for creating logarithmic span expressions. */ public class LogSpanHelper { @@ -31,14 +33,19 @@ public RexNode createLogSpanExpression( RexNode adjustedField = fieldExpr; if (coefficient != 1.0) { adjustedField = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, fieldExpr, context.relBuilder.literal(coefficient)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + fieldExpr, + context.relBuilder.literal(coefficient)); } // Calculate log_base(adjusted_field) RexNode lnField = context.relBuilder.call(SqlStdOperatorTable.LN, adjustedField); RexNode lnBase = context.relBuilder.literal(Math.log(base)); - RexNode logValue = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, lnField, lnBase); + RexNode logValue = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, lnField, lnBase); // Get bin number RexNode binNumber = context.relBuilder.call(SqlStdOperatorTable.FLOOR, logValue); @@ -49,15 +56,20 @@ public RexNode createLogSpanExpression( RexNode basePowerBin = context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binNumber); RexNode lowerBound = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBin); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBin); RexNode binPlusOne = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, binNumber, context.relBuilder.literal(1.0)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + binNumber, + context.relBuilder.literal(1.0)); RexNode basePowerBinPlusOne = context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binPlusOne); RexNode upperBound = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBinPlusOne); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBinPlusOne); // Create range string RexNode rangeStr = RangeFormatter.createRangeString(lowerBound, upperBound, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java index 7bdcf269593..7227bc732b3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java @@ -8,7 +8,9 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for time alignment operations (@d, @d+offset, epoch alignment). */ public class AlignmentHandler { @@ -30,18 +32,22 @@ public static RexNode createEpochAlignedSpan( // SPL Universal Formula: bin_start = reference + floor((timestamp - reference) / span) * span RexNode timeOffset = - context.relBuilder.call(SqlStdOperatorTable.MINUS, epochSeconds, referenceTimestamp); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, epochSeconds, referenceTimestamp); RexNode binNumber = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, timeOffset, intervalLiteral)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, timeOffset, intervalLiteral)); RexNode binOffset = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.PLUS, referenceTimestamp, binOffset); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, referenceTimestamp, binOffset); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } @@ -75,19 +81,24 @@ public static RexNode createTimeModifierAlignedSpan( RexNode daysSinceEpoch = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, earliestTimestamp, secondsPerDay)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + earliestTimestamp, + secondsPerDay)); RexNode startOfEarliestDay = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, daysSinceEpoch, secondsPerDay); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, daysSinceEpoch, secondsPerDay); // Calculate alignment reference point RexNode alignmentReference; if (offsetMillis != 0) { long offsetSeconds = offsetMillis / 1000L; alignmentReference = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, startOfEarliestDay, context.relBuilder.literal(offsetSeconds)); } else { @@ -96,27 +107,33 @@ public static RexNode createTimeModifierAlignedSpan( // Apply SPL Universal Formula RexNode timeOffset = - context.relBuilder.call(SqlStdOperatorTable.MINUS, epochSeconds, alignmentReference); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, epochSeconds, alignmentReference); RexNode binNumber = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, timeOffset, intervalLiteral)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, timeOffset, intervalLiteral)); RexNode binOffset = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.PLUS, alignmentReference, binOffset); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, alignmentReference, binOffset); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } else { // No day alignment RexNode divided = - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, epochSeconds, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, epochSeconds, intervalLiteral); RexNode binNumber = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java index d233d14c42c..d3f160eb15a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for day-based time spans. */ public class DaySpanHandler { @@ -38,6 +40,7 @@ private RexNode calculateBinStart(RexNode value, int interval, CalcitePlanContex RexNode intervalLiteral = context.relBuilder.literal(interval); RexNode positionInCycle = context.relBuilder.call(SqlStdOperatorTable.MOD, value, intervalLiteral); - return context.relBuilder.call(SqlStdOperatorTable.MINUS, value, positionInCycle); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, value, positionInCycle); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java index 7317ef565c9..a92dea0b5d5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for month-based time spans using SPL Monthly Binning Algorithm. */ public class MonthSpanHandler { @@ -36,12 +38,17 @@ public RexNode createExpression( context.rexBuilder.makeCall( PPLBuiltinOperators.MAKEDATE, binStartYear, - context.rexBuilder.makeCall( - SqlStdOperatorTable.PLUS, - context.rexBuilder.makeCall( - SqlStdOperatorTable.MULTIPLY, - context.rexBuilder.makeCall( - SqlStdOperatorTable.MINUS, binStartMonth, context.relBuilder.literal(1)), + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + binStartMonth, + context.relBuilder.literal(1)), context.relBuilder.literal(31)), context.relBuilder.literal(1))); @@ -52,38 +59,52 @@ public RexNode createExpression( private RexNode calculateMonthsSinceEpoch( RexNode inputYear, RexNode inputMonth, CalcitePlanContext context) { RexNode yearsSinceEpoch = - context.relBuilder.call( - SqlStdOperatorTable.MINUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, inputYear, context.relBuilder.literal(BinConstants.UNIX_EPOCH_YEAR)); RexNode monthsFromYears = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, yearsSinceEpoch, context.relBuilder.literal(12)); - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + yearsSinceEpoch, + context.relBuilder.literal(12)); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, monthsFromYears, - context.relBuilder.call( - SqlStdOperatorTable.MINUS, inputMonth, context.relBuilder.literal(1))); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + inputMonth, + context.relBuilder.literal(1))); } private RexNode calculateBinStart(RexNode value, int interval, CalcitePlanContext context) { RexNode intervalLiteral = context.relBuilder.literal(interval); RexNode positionInCycle = context.relBuilder.call(SqlStdOperatorTable.MOD, value, intervalLiteral); - return context.relBuilder.call(SqlStdOperatorTable.MINUS, value, positionInCycle); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, value, positionInCycle); } private RexNode calculateBinStartYear(RexNode binStartMonths, CalcitePlanContext context) { - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, context.relBuilder.literal(BinConstants.UNIX_EPOCH_YEAR), - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, binStartMonths, context.relBuilder.literal(12))); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + binStartMonths, + context.relBuilder.literal(12))); } private RexNode calculateBinStartMonth(RexNode binStartMonths, CalcitePlanContext context) { - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, context.relBuilder.call( SqlStdOperatorTable.MOD, binStartMonths, context.relBuilder.literal(12)), context.relBuilder.literal(1)); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java index dd0150a0b54..3eaaa7d2082 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for standard time units (microseconds through hours). */ public class StandardTimeSpanHandler { @@ -34,8 +36,11 @@ public RexNode createExpression( // Add back alignment offset if (alignmentOffset != 0) { binValue = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, binValue, context.relBuilder.literal(alignmentOffset)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + binValue, + context.relBuilder.literal(alignmentOffset)); } // Convert back to timestamp @@ -51,20 +56,25 @@ private RexNode convertToTargetUnit( // For sub-second units, work in milliseconds if (isSubSecondUnit(config)) { RexNode epochMillis = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, epochSeconds, context.relBuilder.literal(1000L)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + epochSeconds, + context.relBuilder.literal(1000L)); if (config.getDivisionFactor() == 1) { return epochMillis; } else if (config.getDivisionFactor() > 1) { - return context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, epochMillis, context.relBuilder.literal(config.getDivisionFactor())); } else { // Microseconds - return context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, epochMillis, context.relBuilder.literal(BinConstants.MICROS_PER_MILLI)); } @@ -73,8 +83,9 @@ private RexNode convertToTargetUnit( if (config.getDivisionFactor() == 1) { return epochSeconds; } else { - return context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, epochSeconds, context.relBuilder.literal(config.getDivisionFactor())); } @@ -90,22 +101,27 @@ private RexNode convertFromTargetUnit( binMillis = binValue; } else if (config.getDivisionFactor() > 1) { binMillis = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, binValue, context.relBuilder.literal(config.getDivisionFactor())); } else { // Microseconds binMillis = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, binValue, context.relBuilder.literal(BinConstants.MICROS_PER_MILLI)); } RexNode binSeconds = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, binMillis, context.relBuilder.literal(1000L)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + binMillis, + context.relBuilder.literal(1000L)); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binSeconds); } else { @@ -114,8 +130,9 @@ private RexNode convertFromTargetUnit( binSeconds = binValue; } else { binSeconds = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, binValue, context.relBuilder.literal(config.getDivisionFactor())); } @@ -129,17 +146,22 @@ private RexNode applyAlignmentOffset( if (alignmentOffset == 0) { return epochValue; } - return context.relBuilder.call( - SqlStdOperatorTable.MINUS, epochValue, context.relBuilder.literal(alignmentOffset)); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + epochValue, + context.relBuilder.literal(alignmentOffset)); } private RexNode performBinning( RexNode adjustedValue, int intervalValue, CalcitePlanContext context) { RexNode intervalLiteral = context.relBuilder.literal(intervalValue); RexNode divided = - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, adjustedValue, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, adjustedValue, intervalLiteral); RexNode floored = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); - return context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, floored, intervalLiteral); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, floored, intervalLiteral); } private long convertAlignmentOffset(long offsetMillis, TimeUnitConfig config) { From 534c99b405ac545dc03c903b295fc2fe2d35eff4 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 7 Nov 2025 14:25:55 -0800 Subject: [PATCH 14/14] Fix tests Signed-off-by: Kai Huang --- .../calcite/explain_bin_aligntime.yaml | 6 +++--- .../explain_bin_aligntime.yaml | 6 +++--- .../sql/ppl/calcite/CalcitePPLBinTest.java | 16 ++++++++++------ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml index 53cd91e02ca..9a82afe29ec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml @@ -3,8 +3,8 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(category=[$0], value=[$1], timestamp=[$2], @timestamp=[$9]) LogicalSort(fetch=[5]) - LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(/(/(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) + LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[UNIX_TIMESTAMP($t3)], expr#5=[3600], expr#6=[/($t4, $t5)], expr#7=[2], expr#8=[/($t6, $t7)], expr#9=[FLOOR($t8)], expr#10=[*($t9, $t7)], expr#11=[*($t10, $t5)], expr#12=[FROM_UNIXTIME($t11)], proj#0..2=[{exprs}], $f3=[$t12]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[UNIX_TIMESTAMP($t3)], expr#5=[3600], expr#6=[DIVIDE($t4, $t5)], expr#7=[2], expr#8=[DIVIDE($t6, $t7)], expr#9=[FLOOR($t8)], expr#10=[*($t9, $t7)], expr#11=[*($t10, $t5)], expr#12=[FROM_UNIXTIME($t11)], proj#0..2=[{exprs}], $f3=[$t12]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml index 282a080733e..e8a727cf1e3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml @@ -3,10 +3,10 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(category=[$0], value=[$1], timestamp=[$2], @timestamp=[$9]) LogicalSort(fetch=[5]) - LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(/(/(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) + LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..9=[{inputs}], expr#10=[UNIX_TIMESTAMP($t0)], expr#11=[3600], expr#12=[/($t10, $t11)], expr#13=[2], expr#14=[/($t12, $t13)], expr#15=[FLOOR($t14)], expr#16=[*($t15, $t13)], expr#17=[*($t16, $t11)], expr#18=[FROM_UNIXTIME($t17)], category=[$t1], value=[$t2], timestamp=[$t3], @timestamp=[$t18]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[UNIX_TIMESTAMP($t0)], expr#11=[3600], expr#12=[DIVIDE($t10, $t11)], expr#13=[2], expr#14=[DIVIDE($t12, $t13)], expr#15=[FLOOR($t14)], expr#16=[*($t15, $t13)], expr#17=[*($t16, $t11)], expr#18=[FROM_UNIXTIME($t17)], category=[$t1], value=[$t2], timestamp=[$t3], @timestamp=[$t18]) EnumerableLimit(fetch=[5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) \ No newline at end of file diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index 48b7bb17f3f..0d933fe1649 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -141,13 +141,15 @@ public void testBinWithTimeSpan() { verifyLogical( root, "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3]," - + " SYS_START=[FROM_UNIXTIME(*(FLOOR(/(/(UNIX_TIMESTAMP($2), 3600), 1)), 3600))])\n" + + " SYS_START=[FROM_UNIXTIME(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($2), 3600), 1))," + + " 3600))])\n" + " LogicalTableScan(table=[[scott, products_temporal]])\n"); verifyPPLToSparkSQL( root, - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `FROM_UNIXTIME`(FLOOR(`UNIX_TIMESTAMP`(`SYS_START`) /" - + " 3600 / 1) * 3600) `SYS_START`\n" + "SELECT `ID`, `SUPPLIER`, `SYS_END`," + + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } @@ -161,13 +163,15 @@ public void testBinWithAligntime() { verifyLogical( root, "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3]," - + " SYS_START=[FROM_UNIXTIME(*(FLOOR(/(/(UNIX_TIMESTAMP($2), 3600), 1)), 3600))])\n" + + " SYS_START=[FROM_UNIXTIME(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($2), 3600), 1))," + + " 3600))])\n" + " LogicalTableScan(table=[[scott, products_temporal]])\n"); verifyPPLToSparkSQL( root, - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `FROM_UNIXTIME`(FLOOR(`UNIX_TIMESTAMP`(`SYS_START`) /" - + " 3600 / 1) * 3600) `SYS_START`\n" + "SELECT `ID`, `SUPPLIER`, `SYS_END`," + + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); }