From e057ddc5501aa33461a890bb9b9757fe5cb8911e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 11 Nov 2025 22:24:30 +0000 Subject: [PATCH] Fix binning udf resolution / Add type coercion support for binning UDFs (#4742) * Fix binning udf resolution Signed-off-by: Kai Huang * spotless fix Signed-off-by: Kai Huang * update tes Signed-off-by: Kai Huang * add comments Signed-off-by: Kai Huang * removal Signed-off-by: Kai Huang * update yaml test Signed-off-by: Kai Huang * rerun Signed-off-by: Kai Huang * update support for type coercion Signed-off-by: Kai Huang * update doc Signed-off-by: Kai Huang * enhance error handling and doc Signed-off-by: Kai Huang * fixes Signed-off-by: Kai Huang * fixes Signed-off-by: Kai Huang * fixes Signed-off-by: Kai Huang * Fix tests Signed-off-by: Kai Huang --------- Signed-off-by: Kai Huang (cherry picked from commit 20f2234002b1a3c29cd6aa7ecba30a4dc047870a) Signed-off-by: github-actions[bot] --- .../calcite/utils/OpenSearchTypeFactory.java | 13 +- .../sql/calcite/utils/PPLOperandTypes.java | 59 ++++++++ .../calcite/utils/binning/BinnableField.java | 20 +-- .../binning/handlers/CountBinHandler.java | 17 ++- .../binning/handlers/DefaultBinHandler.java | 16 +- .../utils/binning/handlers/LogSpanHelper.java | 26 +++- .../binning/handlers/MinSpanBinHandler.java | 17 ++- .../binning/handlers/NumericSpanHelper.java | 6 +- .../binning/handlers/RangeBinHandler.java | 13 +- .../utils/binning/time/AlignmentHandler.java | 47 ++++-- .../utils/binning/time/DaySpanHandler.java | 5 +- .../utils/binning/time/MonthSpanHandler.java | 63 +++++--- .../binning/time/StandardTimeSpanHandler.java | 66 ++++++--- .../function/BuiltinFunctionName.java | 6 + .../expression/function/PPLFuncImpTable.java | 13 ++ .../udf/binning/WidthBucketFunction.java | 2 +- docs/user/ppl/cmd/bin.rst | 19 ++- .../calcite/remote/CalciteBinCommandIT.java | 140 +----------------- .../calcite/explain_bin_aligntime.yaml | 6 +- .../explain_bin_aligntime.yaml | 6 +- .../rest-api-spec/test/issues/4740.yml | 120 +++++++++++++++ .../sql/ppl/calcite/CalcitePPLBinTest.java | 108 +++++++++----- 22 files changed, 500 insertions(+), 288 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index bbe162fb6d7..cbae32b0388 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -357,12 +357,12 @@ public static boolean isUserDefinedType(RelDataType type) { } /** - * Checks if the RelDataType represents a numeric field. Supports both standard SQL numeric types - * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL) and OpenSearch UDT numeric - * types. + * Checks if the RelDataType represents a numeric type. Supports standard SQL numeric types + * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric + * types, and string types (VARCHAR, CHAR). * * @param fieldType the RelDataType to check - * @return true if the type is numeric, false otherwise + * @return true if the type is numeric or string, false otherwise */ public static boolean isNumericType(RelDataType fieldType) { // Check standard SQL numeric types @@ -378,6 +378,11 @@ public static boolean isNumericType(RelDataType fieldType) { return true; } + // Check string types (VARCHAR, CHAR) + if (sqlType == SqlTypeName.VARCHAR || sqlType == SqlTypeName.CHAR) { + return true; + } + // Check for OpenSearch UDT numeric types if (isUserDefinedType(fieldType)) { AbstractExprRelDataType exprType = (AbstractExprRelDataType) fieldType; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index 07fb39c889a..ceae1ca7b51 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -136,6 +136,65 @@ private PPLOperandTypes() {} SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC)); + + public static final UDFOperandMetadata WIDTH_BUCKET_OPERAND = + UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + // 1. Numeric fields: bin age span=10 + OperandTypes.family( + SqlTypeFamily.NUMERIC, + SqlTypeFamily.INTEGER, + SqlTypeFamily.NUMERIC, + SqlTypeFamily.NUMERIC) + // 2. Timestamp fields with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) + .or( + OperandTypes.family( + SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, // TIMESTAMP - TIMESTAMP = INTERVAL (as STRING) + SqlTypeFamily.TIMESTAMP)) + // 3. Timestamp fields with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) + .or( + OperandTypes.family( + SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.INTEGER, + SqlTypeFamily.TIMESTAMP, // TIMESTAMP - TIMESTAMP = TIMESTAMP + SqlTypeFamily.TIMESTAMP)) + // DATE field with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) + .or( + OperandTypes.family( + SqlTypeFamily.DATE, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, // DATE - DATE = INTERVAL (as STRING) + SqlTypeFamily.DATE)) + // DATE field with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) + .or( + OperandTypes.family( + SqlTypeFamily.DATE, + SqlTypeFamily.INTEGER, + SqlTypeFamily.DATE, // DATE - DATE = DATE + SqlTypeFamily.DATE)) + // TIME field with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) + .or( + OperandTypes.family( + SqlTypeFamily.TIME, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, // TIME - TIME = INTERVAL (as STRING) + SqlTypeFamily.TIME)) + // TIME field with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) + .or( + OperandTypes.family( + SqlTypeFamily.TIME, + SqlTypeFamily.INTEGER, + SqlTypeFamily.TIME, // TIME - TIME = TIME + SqlTypeFamily.TIME))); + public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC_NUMERIC_NUMERIC = UDFOperandMetadata.wrap( OperandTypes.family( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java index c8c73ce3a99..a4e924b631c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java @@ -11,13 +11,7 @@ import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.exception.SemanticCheckException; -/** - * Represents a validated field that supports binning operations. The existence of this class - * guarantees that validation has been run - the field is either numeric or time-based. - * - *

This design encodes validation in the type system, preventing downstream code from forgetting - * to validate or running validation multiple times. - */ +/** Represents a field that supports binning operations. */ @Getter public class BinnableField { private final RexNode fieldExpr; @@ -27,13 +21,12 @@ public class BinnableField { private final boolean isNumeric; /** - * Creates a validated BinnableField. Throws SemanticCheckException if the field is neither - * numeric nor time-based. + * Creates a BinnableField. Validates that the field type is compatible with binning operations. * * @param fieldExpr The Rex expression for the field * @param fieldType The relational data type of the field * @param fieldName The name of the field (for error messages) - * @throws SemanticCheckException if the field is neither numeric nor time-based + * @throws SemanticCheckException if the field type is not supported for binning */ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) { this.fieldExpr = fieldExpr; @@ -43,13 +36,10 @@ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) this.isTimeBased = OpenSearchTypeFactory.isTimeBasedType(fieldType); this.isNumeric = OpenSearchTypeFactory.isNumericType(fieldType); - // Validation: field must be either numeric or time-based + // Reject truly unsupported types (e.g., BOOLEAN, ARRAY, MAP) if (!isNumeric && !isTimeBased) { throw new SemanticCheckException( - String.format( - "Cannot apply binning: field '%s' is non-numeric and not time-related, expected" - + " numeric or time-related type", - fieldName)); + String.format("Cannot apply binning to field '%s': unsupported type", fieldName)); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java index 7422a26f0b7..bfc91a9a2de 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java @@ -6,7 +6,6 @@ package org.opensearch.sql.calcite.utils.binning.handlers; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.CountBin; @@ -16,7 +15,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for bins-based (count) binning operations. */ public class CountBinHandler implements BinHandler { @@ -40,7 +40,9 @@ public RexNode createExpression( // Calculate data range using window functions RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Convert start/end parameters RexNode startValue = convertParameter(countBin.getStart(), context); @@ -49,8 +51,13 @@ public RexNode createExpression( // WIDTH_BUCKET(field_value, num_bins, data_range, max_value) RexNode numBins = context.relBuilder.literal(requestedBins); - return context.rexBuilder.makeCall( - PPLBuiltinOperators.WIDTH_BUCKET, fieldExpr, numBins, dataRange, maxValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.WIDTH_BUCKET, + fieldExpr, + numBins, + dataRange, + maxValue); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java index e68477a9566..376e458049a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java @@ -16,6 +16,8 @@ import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; import org.opensearch.sql.calcite.utils.binning.RangeFormatter; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for default binning when no parameters are specified. */ public class DefaultBinHandler implements BinHandler { @@ -45,7 +47,9 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex // Calculate data range RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Calculate magnitude-based width RexNode log10Range = context.relBuilder.call(SqlStdOperatorTable.LOG10, dataRange); @@ -60,17 +64,21 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex // Calculate bin value RexNode binStartValue = calculateBinValue(fieldExpr, widthInt, context); RexNode binEndValue = - context.relBuilder.call(SqlStdOperatorTable.PLUS, binStartValue, widthInt); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, binStartValue, widthInt); return RangeFormatter.createRangeString(binStartValue, binEndValue, context); } private RexNode calculateBinValue(RexNode fieldExpr, RexNode width, CalcitePlanContext context) { - RexNode divided = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, fieldExpr, width); + RexNode divided = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, fieldExpr, width); RexNode floored = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); - return context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, floored, width); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, floored, width); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java index 9bad71c52f9..660530bb251 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java @@ -11,6 +11,8 @@ import org.opensearch.sql.calcite.utils.binning.BinConstants; import org.opensearch.sql.calcite.utils.binning.RangeFormatter; import org.opensearch.sql.calcite.utils.binning.SpanInfo; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Helper for creating logarithmic span expressions. */ public class LogSpanHelper { @@ -31,14 +33,19 @@ public RexNode createLogSpanExpression( RexNode adjustedField = fieldExpr; if (coefficient != 1.0) { adjustedField = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, fieldExpr, context.relBuilder.literal(coefficient)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + fieldExpr, + context.relBuilder.literal(coefficient)); } // Calculate log_base(adjusted_field) RexNode lnField = context.relBuilder.call(SqlStdOperatorTable.LN, adjustedField); RexNode lnBase = context.relBuilder.literal(Math.log(base)); - RexNode logValue = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, lnField, lnBase); + RexNode logValue = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, lnField, lnBase); // Get bin number RexNode binNumber = context.relBuilder.call(SqlStdOperatorTable.FLOOR, logValue); @@ -49,15 +56,20 @@ public RexNode createLogSpanExpression( RexNode basePowerBin = context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binNumber); RexNode lowerBound = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBin); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBin); RexNode binPlusOne = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, binNumber, context.relBuilder.literal(1.0)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + binNumber, + context.relBuilder.literal(1.0)); RexNode basePowerBinPlusOne = context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binPlusOne); RexNode upperBound = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBinPlusOne); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBinPlusOne); // Create range string RexNode rangeStr = RangeFormatter.createRangeString(lowerBound, upperBound, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java index 16e11b7abce..a193d7e5c91 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java @@ -9,7 +9,6 @@ import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.MinSpanBin; @@ -18,7 +17,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for minspan-based binning operations. */ public class MinSpanBinHandler implements BinHandler { @@ -51,7 +51,9 @@ public RexNode createExpression( // Calculate data range using window functions RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Convert start/end parameters RexNode startValue = convertParameter(minSpanBin.getStart(), context); @@ -60,8 +62,13 @@ public RexNode createExpression( // MINSPAN_BUCKET(field_value, min_span, data_range, max_value) RexNode minSpanParam = context.relBuilder.literal(minspan); - return context.rexBuilder.makeCall( - PPLBuiltinOperators.MINSPAN_BUCKET, fieldExpr, minSpanParam, dataRange, maxValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MINSPAN_BUCKET, + fieldExpr, + minSpanParam, + dataRange, + maxValue); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java index 76494dc0435..e14acfb729d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java @@ -7,7 +7,8 @@ import org.apache.calcite.rex.RexNode; import org.opensearch.sql.calcite.CalcitePlanContext; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Helper for creating numeric span expressions. */ public class NumericSpanHelper { @@ -32,6 +33,7 @@ private RexNode createExpression( RexNode fieldExpr, RexNode spanValue, CalcitePlanContext context) { // SPAN_BUCKET(field_value, span_value) - return context.rexBuilder.makeCall(PPLBuiltinOperators.SPAN_BUCKET, fieldExpr, spanValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SPAN_BUCKET, fieldExpr, spanValue); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java index aa726cb9dbb..585e9234dc6 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java @@ -13,7 +13,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for range-based binning (start/end parameters only). */ public class RangeBinHandler implements BinHandler { @@ -43,8 +44,14 @@ public RexNode createExpression( RexNode endParam = convertParameter(rangeBin.getEnd(), context, visitor); // Use RANGE_BUCKET with data bounds and user parameters - return context.rexBuilder.makeCall( - PPLBuiltinOperators.RANGE_BUCKET, fieldExpr, dataMin, dataMax, startParam, endParam); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.RANGE_BUCKET, + fieldExpr, + dataMin, + dataMax, + startParam, + endParam); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java index 7bdcf269593..7227bc732b3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java @@ -8,7 +8,9 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for time alignment operations (@d, @d+offset, epoch alignment). */ public class AlignmentHandler { @@ -30,18 +32,22 @@ public static RexNode createEpochAlignedSpan( // SPL Universal Formula: bin_start = reference + floor((timestamp - reference) / span) * span RexNode timeOffset = - context.relBuilder.call(SqlStdOperatorTable.MINUS, epochSeconds, referenceTimestamp); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, epochSeconds, referenceTimestamp); RexNode binNumber = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, timeOffset, intervalLiteral)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, timeOffset, intervalLiteral)); RexNode binOffset = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.PLUS, referenceTimestamp, binOffset); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, referenceTimestamp, binOffset); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } @@ -75,19 +81,24 @@ public static RexNode createTimeModifierAlignedSpan( RexNode daysSinceEpoch = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, earliestTimestamp, secondsPerDay)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + earliestTimestamp, + secondsPerDay)); RexNode startOfEarliestDay = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, daysSinceEpoch, secondsPerDay); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, daysSinceEpoch, secondsPerDay); // Calculate alignment reference point RexNode alignmentReference; if (offsetMillis != 0) { long offsetSeconds = offsetMillis / 1000L; alignmentReference = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, startOfEarliestDay, context.relBuilder.literal(offsetSeconds)); } else { @@ -96,27 +107,33 @@ public static RexNode createTimeModifierAlignedSpan( // Apply SPL Universal Formula RexNode timeOffset = - context.relBuilder.call(SqlStdOperatorTable.MINUS, epochSeconds, alignmentReference); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, epochSeconds, alignmentReference); RexNode binNumber = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, timeOffset, intervalLiteral)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, timeOffset, intervalLiteral)); RexNode binOffset = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.PLUS, alignmentReference, binOffset); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, alignmentReference, binOffset); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } else { // No day alignment RexNode divided = - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, epochSeconds, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, epochSeconds, intervalLiteral); RexNode binNumber = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java index d233d14c42c..d3f160eb15a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for day-based time spans. */ public class DaySpanHandler { @@ -38,6 +40,7 @@ private RexNode calculateBinStart(RexNode value, int interval, CalcitePlanContex RexNode intervalLiteral = context.relBuilder.literal(interval); RexNode positionInCycle = context.relBuilder.call(SqlStdOperatorTable.MOD, value, intervalLiteral); - return context.relBuilder.call(SqlStdOperatorTable.MINUS, value, positionInCycle); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, value, positionInCycle); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java index 7317ef565c9..a92dea0b5d5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for month-based time spans using SPL Monthly Binning Algorithm. */ public class MonthSpanHandler { @@ -36,12 +38,17 @@ public RexNode createExpression( context.rexBuilder.makeCall( PPLBuiltinOperators.MAKEDATE, binStartYear, - context.rexBuilder.makeCall( - SqlStdOperatorTable.PLUS, - context.rexBuilder.makeCall( - SqlStdOperatorTable.MULTIPLY, - context.rexBuilder.makeCall( - SqlStdOperatorTable.MINUS, binStartMonth, context.relBuilder.literal(1)), + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + binStartMonth, + context.relBuilder.literal(1)), context.relBuilder.literal(31)), context.relBuilder.literal(1))); @@ -52,38 +59,52 @@ public RexNode createExpression( private RexNode calculateMonthsSinceEpoch( RexNode inputYear, RexNode inputMonth, CalcitePlanContext context) { RexNode yearsSinceEpoch = - context.relBuilder.call( - SqlStdOperatorTable.MINUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, inputYear, context.relBuilder.literal(BinConstants.UNIX_EPOCH_YEAR)); RexNode monthsFromYears = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, yearsSinceEpoch, context.relBuilder.literal(12)); - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + yearsSinceEpoch, + context.relBuilder.literal(12)); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, monthsFromYears, - context.relBuilder.call( - SqlStdOperatorTable.MINUS, inputMonth, context.relBuilder.literal(1))); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + inputMonth, + context.relBuilder.literal(1))); } private RexNode calculateBinStart(RexNode value, int interval, CalcitePlanContext context) { RexNode intervalLiteral = context.relBuilder.literal(interval); RexNode positionInCycle = context.relBuilder.call(SqlStdOperatorTable.MOD, value, intervalLiteral); - return context.relBuilder.call(SqlStdOperatorTable.MINUS, value, positionInCycle); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, value, positionInCycle); } private RexNode calculateBinStartYear(RexNode binStartMonths, CalcitePlanContext context) { - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, context.relBuilder.literal(BinConstants.UNIX_EPOCH_YEAR), - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, binStartMonths, context.relBuilder.literal(12))); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + binStartMonths, + context.relBuilder.literal(12))); } private RexNode calculateBinStartMonth(RexNode binStartMonths, CalcitePlanContext context) { - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, context.relBuilder.call( SqlStdOperatorTable.MOD, binStartMonths, context.relBuilder.literal(12)), context.relBuilder.literal(1)); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java index 51d3d78d770..ffa57138884 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for standard time units (microseconds through hours). */ public class StandardTimeSpanHandler { @@ -34,8 +36,11 @@ public RexNode createExpression( // Add back alignment offset if (alignmentOffset != 0) { binValue = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, binValue, context.relBuilder.literal(alignmentOffset)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + binValue, + context.relBuilder.literal(alignmentOffset)); } // Convert back to timestamp @@ -51,20 +56,25 @@ private RexNode convertToTargetUnit( // For sub-second units, work in milliseconds if (isSubSecondUnit(config)) { RexNode epochMillis = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, epochSeconds, context.relBuilder.literal(1000L)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + epochSeconds, + context.relBuilder.literal(1000L)); if (config.getDivisionFactor() == 1) { return epochMillis; } else if (config.getDivisionFactor() > 1) { - return context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, epochMillis, context.relBuilder.literal(config.getDivisionFactor())); } else { // Microseconds - return context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, epochMillis, context.relBuilder.literal(BinConstants.MICROS_PER_MILLI)); } @@ -73,8 +83,9 @@ private RexNode convertToTargetUnit( if (config.getDivisionFactor() == 1) { return epochSeconds; } else { - return context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, epochSeconds, context.relBuilder.literal(config.getDivisionFactor())); } @@ -90,22 +101,27 @@ private RexNode convertFromTargetUnit( binMillis = binValue; } else if (config.getDivisionFactor() > 1) { binMillis = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, binValue, context.relBuilder.literal(config.getDivisionFactor())); } else { // Microseconds binMillis = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, binValue, context.relBuilder.literal(BinConstants.MICROS_PER_MILLI)); } RexNode binSeconds = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, binMillis, context.relBuilder.literal(1000L)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + binMillis, + context.relBuilder.literal(1000L)); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binSeconds); } else { @@ -114,8 +130,9 @@ private RexNode convertFromTargetUnit( binSeconds = binValue; } else { binSeconds = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, binValue, context.relBuilder.literal(config.getDivisionFactor())); } @@ -129,17 +146,22 @@ private RexNode applyAlignmentOffset( if (alignmentOffset == 0) { return epochValue; } - return context.relBuilder.call( - SqlStdOperatorTable.MINUS, epochValue, context.relBuilder.literal(alignmentOffset)); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + epochValue, + context.relBuilder.literal(alignmentOffset)); } private RexNode performBinning( RexNode adjustedValue, int intervalValue, CalcitePlanContext context) { RexNode intervalLiteral = context.relBuilder.literal(intervalValue); RexNode divided = - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, adjustedValue, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, adjustedValue, intervalLiteral); RexNode floored = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); - return context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, floored, intervalLiteral); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, floored, intervalLiteral); } private long convertAlignmentOffset(long offsetMillis, TimeUnitConfig config) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index ced98022ca9..73c5e613415 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -59,6 +59,12 @@ public enum BuiltinFunctionName { TAN(FunctionName.of("tan")), SPAN(FunctionName.of("span")), + /** Binning Functions. */ + SPAN_BUCKET(FunctionName.of("span_bucket")), + WIDTH_BUCKET(FunctionName.of("width_bucket")), + MINSPAN_BUCKET(FunctionName.of("minspan_bucket")), + RANGE_BUCKET(FunctionName.of("range_bucket")), + /** Collection functions */ ARRAY(FunctionName.of("array")), ARRAY_LENGTH(FunctionName.of("array_length")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 0ee1863b540..350ce99bf49 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -136,6 +136,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINSPAN_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_DAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_HOUR; @@ -166,6 +167,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RADIANS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RANGE_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH; @@ -189,6 +191,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SINH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SQRT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_POP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_SAMP; @@ -230,6 +233,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKDAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKOFYEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WIDTH_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.XOR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; @@ -716,6 +720,11 @@ void populate() { SUBTRACT, SqlStdOperatorTable.MINUS, PPLTypeChecker.wrapFamily((FamilyOperandTypeChecker) OperandTypes.NUMERIC_NUMERIC)); + // Add DATETIME-DATETIME variant for timestamp binning support + registerOperator( + SUBTRACT, + SqlStdOperatorTable.MINUS, + PPLTypeChecker.family(SqlTypeFamily.DATETIME, SqlTypeFamily.DATETIME)); registerOperator(MULTIPLY, SqlStdOperatorTable.MULTIPLY); registerOperator(MULTIPLYFUNCTION, SqlStdOperatorTable.MULTIPLY); registerOperator(TRUNCATE, SqlStdOperatorTable.TRUNCATE); @@ -858,6 +867,10 @@ void populate() { registerOperator(EXPM1, PPLBuiltinOperators.EXPM1); registerOperator(RINT, PPLBuiltinOperators.RINT); registerOperator(SPAN, PPLBuiltinOperators.SPAN); + registerOperator(SPAN_BUCKET, PPLBuiltinOperators.SPAN_BUCKET); + registerOperator(WIDTH_BUCKET, PPLBuiltinOperators.WIDTH_BUCKET); + registerOperator(MINSPAN_BUCKET, PPLBuiltinOperators.MINSPAN_BUCKET); + registerOperator(RANGE_BUCKET, PPLBuiltinOperators.RANGE_BUCKET); registerOperator(E, PPLBuiltinOperators.E); registerOperator(CONV, PPLBuiltinOperators.CONV); registerOperator(MOD, PPLBuiltinOperators.MOD); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java index cdfe417cb2e..bc78ca7de33 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java @@ -66,7 +66,7 @@ public static boolean dateRelatedType(RelDataType type) { @Override public UDFOperandMetadata getOperandMetadata() { - return PPLOperandTypes.NUMERIC_NUMERIC_NUMERIC_NUMERIC; + return PPLOperandTypes.WIDTH_BUCKET_OPERAND; } public static class WidthBucketImplementor implements NotNullImplementor { diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst index c2591ad8998..412cd9bff0e 100644 --- a/docs/user/ppl/cmd/bin.rst +++ b/docs/user/ppl/cmd/bin.rst @@ -11,13 +11,13 @@ bin Description ============ -| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric field and generates a new field with values that represent the lower bound of each bucket. +| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. Syntax ============ bin [span=] [minspan=] [bins=] [aligntime=(earliest | latest | )] [start=] [end=] -* field: mandatory. The numeric field to bin. +* field: mandatory. The field to bin. Accepts numeric or time-based fields. * span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. * minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. * bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. @@ -525,3 +525,18 @@ PPL query:: | 28.0-29.0 | 13 | +-----------+----------------+ + +Example 20: Binning with string fields +============================================== + +PPL query:: + + os> source=accounts | eval age_str = CAST(age AS STRING) | bin age_str bins=3 | stats count() by age_str | sort age_str; + fetched rows / total rows = 2/2 + +---------+---------+ + | count() | age_str | + |---------+---------| + | 1 | 20-30 | + | 3 | 30-40 | + +---------+---------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index ef8abc3cba0..bb326dc39a7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.legacy.TestsConstants.*; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -507,145 +508,6 @@ public void testBinWithNonExistentField() { errorMessage.contains("non_existent_field") || errorMessage.contains("not found")); } - @Test - public void testBinWithMinspanOnNonNumericField() { - // Test that bin command with minspan throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format( - "source=%s | bin firstname minspan=10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'firstname' is non-numeric and not time-related, expected" - + " numeric or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithSpanOnNonNumericField() { - // Test that bin command with span throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin lastname span=5 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'lastname' is non-numeric and not time-related, expected" - + " numeric or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithBinsOnNonNumericField() { - // Test that bin command with bins throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin state bins=10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'state' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithStartEndOnNonNumericField() { - // Test that bin command with start/end throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format( - "source=%s | bin city start=0 end=100 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'city' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinDefaultOnNonNumericField() { - // Test that default bin (no parameters) throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery(String.format("source=%s | bin email | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'email' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinLogSpanOnNonNumericField() { - // Test that bin command with log span throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin gender span=log10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'gender' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - @Test public void testBinSpanWithStartEndNeverShrinkRange() throws IOException { JSONObject result = diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml index 53cd91e02ca..9a82afe29ec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml @@ -3,8 +3,8 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(category=[$0], value=[$1], timestamp=[$2], @timestamp=[$9]) LogicalSort(fetch=[5]) - LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(/(/(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) + LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[UNIX_TIMESTAMP($t3)], expr#5=[3600], expr#6=[/($t4, $t5)], expr#7=[2], expr#8=[/($t6, $t7)], expr#9=[FLOOR($t8)], expr#10=[*($t9, $t7)], expr#11=[*($t10, $t5)], expr#12=[FROM_UNIXTIME($t11)], proj#0..2=[{exprs}], $f3=[$t12]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[UNIX_TIMESTAMP($t3)], expr#5=[3600], expr#6=[DIVIDE($t4, $t5)], expr#7=[2], expr#8=[DIVIDE($t6, $t7)], expr#9=[FLOOR($t8)], expr#10=[*($t9, $t7)], expr#11=[*($t10, $t5)], expr#12=[FROM_UNIXTIME($t11)], proj#0..2=[{exprs}], $f3=[$t12]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml index 282a080733e..e8a727cf1e3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml @@ -3,10 +3,10 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(category=[$0], value=[$1], timestamp=[$2], @timestamp=[$9]) LogicalSort(fetch=[5]) - LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(/(/(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) + LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..9=[{inputs}], expr#10=[UNIX_TIMESTAMP($t0)], expr#11=[3600], expr#12=[/($t10, $t11)], expr#13=[2], expr#14=[/($t12, $t13)], expr#15=[FLOOR($t14)], expr#16=[*($t15, $t13)], expr#17=[*($t16, $t11)], expr#18=[FROM_UNIXTIME($t17)], category=[$t1], value=[$t2], timestamp=[$t3], @timestamp=[$t18]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[UNIX_TIMESTAMP($t0)], expr#11=[3600], expr#12=[DIVIDE($t10, $t11)], expr#13=[2], expr#14=[DIVIDE($t12, $t13)], expr#15=[FLOOR($t14)], expr#16=[*($t15, $t13)], expr#17=[*($t16, $t11)], expr#18=[FROM_UNIXTIME($t17)], category=[$t1], value=[$t2], timestamp=[$t3], @timestamp=[$t18]) EnumerableLimit(fetch=[5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) \ No newline at end of file diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml new file mode 100644 index 00000000000..5fdb4198abe --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml @@ -0,0 +1,120 @@ +setup: + - do: + indices.create: + index: test_binning_4740 + body: + mappings: + properties: + "@timestamp": + type: date + "age": + type: keyword + "balance": + type: keyword + "name": + type: keyword + - do: + bulk: + index: test_binning_4740 + refresh: true + body: + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:00:00.000Z","age":"25","balance":"1000.0","name":"Alice"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:05:00.000Z","age":"30","balance":"2000.0","name":"Bob"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:10:00.000Z","age":"35","balance":"3000.0","name":"Charlie"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:15:00.000Z","age":"40","balance":"4000.0","name":"David"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:20:00.000Z","age":"45","balance":"5000.0","name":"Eve"}' + +--- +"bin with numeric field using WIDTH_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age bins=3 | stats count() by age | sort age + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } + +--- +"bin with numeric span using SPAN_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age span=10 | stats count() by age | sort age + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } + +--- +"bin with minspan using MINSPAN_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin balance minspan=1000 | stats count() by balance | sort balance + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "balance", "type": "string" } ] } + - match: { "datarows": [ [ 1, "1000-2000" ], [ 1, "2000-3000" ], [ 1, "3000-4000" ], [ 1, "4000-5000" ], [ 1, "5000-6000" ] ] } + +--- +"bin with start and end using RANGE_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age start=20 end=50 | stats count() by age | sort age + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } + +--- +"bin with default binning (no parameters) on string field - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin balance | stats count() by balance | sort balance + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "balance", "type": "string" } ] } + - match: { "datarows": [ [ 1, "1000.0-2000.0" ], [ 1, "2000.0-3000.0" ], [ 1, "3000.0-4000.0" ], [ 1, "4000.0-5000.0" ], [ 1, "5000.0-6000.0" ] ] } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index ddb22092c99..0d933fe1649 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -5,6 +5,9 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; @@ -39,8 +42,7 @@ public void testBinWithBins() { String ppl = "source=EMP | bin SAL bins=10"; RelNode root = getRelNode(ppl); - // Note: WIDTH_BUCKET uses window functions without ROWS UNBOUNDED PRECEDING in the actual - // output + // Note: WIDTH_BUCKET uses window functions and now properly resolves via PPLFuncImpTable verifyLogical( root, "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], " @@ -48,6 +50,15 @@ public void testBinWithBins() { + "-(MAX($5) OVER (), MIN($5) OVER ()), " + "MAX($5) OVER ())])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `WIDTH_BUCKET`(`SAL`," + + " 10, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) -" + + " (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))," + + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))" + + " `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -64,6 +75,15 @@ public void testBinWithMinspan() { + "-(MAX($5) OVER (), MIN($5) OVER ()), " + "MAX($5) OVER ())])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`," + + " `MINSPAN_BUCKET`(`SAL`, 1.000E2, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)) `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -79,6 +99,37 @@ public void testBinWithStartEnd() { + "MIN($5) OVER (), MAX($5) OVER (), " + "1000, 5000)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `RANGE_BUCKET`(`SAL`," + + " MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 1000," + + " 5000) `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testBinWithTimestampFieldUsingBins() { + String ppl = "source=products_temporal | bin SYS_START bins=10"; + RelNode root = getRelNode(ppl); + + // WIDTH_BUCKET with timestamp field + // The third parameter (data_range) is a STRING interval, not numeric + verifyLogical( + root, + "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3], SYS_START=[WIDTH_BUCKET($2, 10, " + + "-(MAX($2) OVER (), MIN($2) OVER ()), " + + "MAX($2) OVER ())])\n" + + " LogicalTableScan(table=[[scott, products_temporal]])\n"); + + String expectedSparkSql = + "SELECT `ID`, `SUPPLIER`, `SYS_END`, `WIDTH_BUCKET`(`SYS_START`, 10, (MAX(`SYS_START`) OVER" + + " (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SYS_START`)" + + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SYS_START`)" + + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) `SYS_START`\n" + + "FROM `scott`.`products_temporal`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -90,13 +141,15 @@ public void testBinWithTimeSpan() { verifyLogical( root, "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3]," - + " SYS_START=[FROM_UNIXTIME(*(FLOOR(/(/(UNIX_TIMESTAMP($2), 3600), 1)), 3600))])\n" + + " SYS_START=[FROM_UNIXTIME(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($2), 3600), 1))," + + " 3600))])\n" + " LogicalTableScan(table=[[scott, products_temporal]])\n"); verifyPPLToSparkSQL( root, - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `FROM_UNIXTIME`(FLOOR(`UNIX_TIMESTAMP`(`SYS_START`) /" - + " 3600 / 1) * 3600) `SYS_START`\n" + "SELECT `ID`, `SUPPLIER`, `SYS_END`," + + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } @@ -110,43 +163,26 @@ public void testBinWithAligntime() { verifyLogical( root, "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3]," - + " SYS_START=[FROM_UNIXTIME(*(FLOOR(/(/(UNIX_TIMESTAMP($2), 3600), 1)), 3600))])\n" + + " SYS_START=[FROM_UNIXTIME(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($2), 3600), 1))," + + " 3600))])\n" + " LogicalTableScan(table=[[scott, products_temporal]])\n"); verifyPPLToSparkSQL( root, - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `FROM_UNIXTIME`(FLOOR(`UNIX_TIMESTAMP`(`SYS_START`) /" - + " 3600 / 1) * 3600) `SYS_START`\n" + "SELECT `ID`, `SUPPLIER`, `SYS_END`," + + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } - @Test(expected = SemanticCheckException.class) - public void testBinWithMinspanOnNonNumericField() { - String ppl = "source=EMP | bin ENAME minspan=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithSpanOnNonNumericField() { - String ppl = "source=EMP | bin JOB span=5"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithBinsOnNonNumericField() { - String ppl = "source=EMP | bin ENAME bins=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithStartEndOnNonNumericField() { - String ppl = "source=EMP | bin JOB start=1 end=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinDefaultOnNonNumericField() { - String ppl = "source=EMP | bin ENAME"; - getRelNode(ppl); // Should throw SemanticCheckException + @Test + public void testBinOnNonBinnableType() { + // Test that binning on truly unsupported types (not numeric, time, or string) fails + String ppl = "source=products_temporal | eval bool_field = true | bin bool_field bins=3"; + + SemanticCheckException exception = + assertThrows(SemanticCheckException.class, () -> getRelNode(ppl)); + assertTrue(exception.getMessage().contains("Cannot apply binning")); + assertTrue(exception.getMessage().contains("unsupported type")); } }