Skip to content

Commit ed6d208

Browse files
committed
enhance error handling and doc
Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 5f5c9e9 commit ed6d208

File tree

9 files changed

+89
-18
lines changed

9 files changed

+89
-18
lines changed

core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -338,12 +338,13 @@ public static boolean isUserDefinedType(RelDataType type) {
338338
}
339339

340340
/**
341-
* Checks if the RelDataType represents a numeric field. Supports both standard SQL numeric types
342-
* (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL) and OpenSearch UDT numeric
343-
* types.
341+
* Checks if the RelDataType represents a numeric field or a string field that can be coerced to
342+
* numeric. Supports standard SQL numeric types (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT,
343+
* DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric types, and string types (VARCHAR, CHAR) which
344+
* can contain numeric values and will be automatically coerced.
344345
*
345346
* @param fieldType the RelDataType to check
346-
* @return true if the type is numeric, false otherwise
347+
* @return true if the type is numeric or a coercible string type, false otherwise
347348
*/
348349
public static boolean isNumericType(RelDataType fieldType) {
349350
// Check standard SQL numeric types
@@ -359,6 +360,11 @@ public static boolean isNumericType(RelDataType fieldType) {
359360
return true;
360361
}
361362

363+
// Check string types (VARCHAR, CHAR) which can be coerced to numeric for binning
364+
if (sqlType == SqlTypeName.VARCHAR || sqlType == SqlTypeName.CHAR) {
365+
return true;
366+
}
367+
362368
// Check for OpenSearch UDT numeric types
363369
if (isUserDefinedType(fieldType)) {
364370
AbstractExprRelDataType<?> exprType = (AbstractExprRelDataType<?>) fieldType;

core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
import org.apache.calcite.rel.type.RelDataType;
1010
import org.apache.calcite.rex.RexNode;
1111
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
12+
import org.opensearch.sql.exception.SemanticCheckException;
1213

13-
/**
14-
* Represents a field that supports binning operations. Supports numeric, time-based, and string
15-
* fields. Type coercion for string fields is handled automatically by Calcite's type system.
16-
*/
14+
/** Represents a field that supports binning operations. */
1715
@Getter
1816
public class BinnableField {
1917
private final RexNode fieldExpr;
@@ -23,12 +21,12 @@ public class BinnableField {
2321
private final boolean isNumeric;
2422

2523
/**
26-
* Creates a BinnableField for binning operations. Supports numeric, time-based, and string
27-
* fields. Type coercion for string fields is handled by Calcite's type system.
24+
* Creates a BinnableField. Validates that the field type is compatible with binning operations.
2825
*
2926
* @param fieldExpr The Rex expression for the field
3027
* @param fieldType The relational data type of the field
3128
* @param fieldName The name of the field (for error messages)
29+
* @throws SemanticCheckException if the field type is not supported for binning
3230
*/
3331
public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) {
3432
this.fieldExpr = fieldExpr;
@@ -37,6 +35,12 @@ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName)
3735

3836
this.isTimeBased = OpenSearchTypeFactory.isTimeBasedType(fieldType);
3937
this.isNumeric = OpenSearchTypeFactory.isNumericType(fieldType);
38+
39+
// Reject truly unsupported types (e.g., BOOLEAN, ARRAY, MAP)
40+
if (!isNumeric && !isTimeBased) {
41+
throw new SemanticCheckException(
42+
String.format("Cannot apply binning to field '%s': unsupported type", fieldName));
43+
}
4044
}
4145

4246
/**

core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,14 @@ public Expression implement(
7373
/** Minspan bucket calculation. */
7474
public static String calculateMinspanBucket(
7575
Number fieldValue, Number minSpanParam, Number dataRange, Number maxValue) {
76-
if (fieldValue == null || minSpanParam == null || dataRange == null || maxValue == null) {
76+
// Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL)
77+
if (dataRange == null || maxValue == null) {
78+
throw new IllegalArgumentException(
79+
"Cannot apply binning: field contains non-numeric string values. "
80+
+ "Only numeric types or string types with valid numeric values are supported.");
81+
}
82+
83+
if (fieldValue == null || minSpanParam == null) {
7784
return null;
7885
}
7986

core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,14 @@ public Expression implement(
7979
/** Range bucket calculation with expansion algorithm and magnitude-based width. */
8080
public static String calculateRangeBucket(
8181
Number fieldValue, Number dataMin, Number dataMax, Number startParam, Number endParam) {
82-
if (fieldValue == null || dataMin == null || dataMax == null) {
82+
// Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL)
83+
if (dataMin == null || dataMax == null) {
84+
throw new IllegalArgumentException(
85+
"Cannot apply binning: field contains non-numeric string values. "
86+
+ "Only numeric types or string types with valid numeric values are supported.");
87+
}
88+
89+
if (fieldValue == null) {
8390
return null;
8491
}
8592

core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,14 @@ public Expression implement(
6666

6767
/** Span bucket calculation. */
6868
public static String calculateSpanBucket(Number fieldValue, Number spanParam) {
69-
if (fieldValue == null || spanParam == null) {
69+
// Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL)
70+
if (fieldValue == null) {
71+
throw new IllegalArgumentException(
72+
"Cannot apply binning: field contains non-numeric string values. "
73+
+ "Only numeric types or string types with valid numeric values are supported.");
74+
}
75+
76+
if (spanParam == null) {
7077
return null;
7178
}
7279

core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,14 @@ public Expression implement(
9191
/** Width bucket calculation using nice number algorithm. */
9292
public static String calculateWidthBucket(
9393
Number fieldValue, Number numBinsParam, Number dataRange, Number maxValue) {
94-
if (fieldValue == null || numBinsParam == null || dataRange == null || maxValue == null) {
94+
// Detect NULL from failed type coercion (e.g., CAST("abc" AS DOUBLE) returns NULL)
95+
if (dataRange == null || maxValue == null) {
96+
throw new IllegalArgumentException(
97+
"Cannot apply binning: field contains non-numeric string values. "
98+
+ "Only numeric types or string types with valid numeric values are supported.");
99+
}
100+
101+
if (fieldValue == null || numBinsParam == null) {
95102
return null;
96103
}
97104

docs/user/ppl/cmd/bin.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ bin
1616

1717
Description
1818
============
19-
| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. String fields containing numeric values are automatically converted to numeric types through type coercion.
19+
| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. String fields containing valid numeric values (e.g., "25", "30.5") are automatically converted to numeric types through type coercion.
2020
2121
Syntax
2222
============
2323
bin <field> [span=<interval>] [minspan=<interval>] [bins=<count>] [aligntime=(earliest | latest | <time-specifier>)] [start=<value>] [end=<value>]
2424

25-
* field: mandatory. The field to bin. Accepts numeric fields, time-based fields, or string fields containing numeric values (automatically coerced to numeric type).
25+
* field: mandatory. The field to bin. Accepts numeric fields, time-based fields, or string fields containing valid numeric values (e.g., "25", "30.5"). String fields with numeric values are automatically coerced to numeric type. Non-numeric strings will result in an error.
2626
* span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters.
2727
* minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters.
2828
* bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters.
@@ -534,7 +534,7 @@ PPL query::
534534
Example 20: Type coercion with string fields
535535
==============================================
536536

537-
The ``bin`` command automatically converts string fields containing numeric values to numeric types, enabling binning operations on keyword/text fields.
537+
The ``bin`` command automatically converts string fields containing valid numeric values to numeric types, enabling binning operations on keyword/text fields. Only strings that represent valid numbers (e.g., "25", "30.5", "100") are supported. Non-numeric strings will cause an error.
538538

539539
PPL query::
540540

@@ -547,5 +547,5 @@ PPL query::
547547
| 3 | 30-40 |
548548
+---------+---------+
549549

550-
In this example, even though ``age_str`` is a string field (keyword type), the bin command automatically coerces it to numeric values for binning.
550+
In this example, even though ``age_str`` is a string field (keyword type) containing numeric values like "28", "32", etc., the bin command automatically coerces these numeric strings to numeric values for binning. If ``age_str`` contained non-numeric strings like "young" or "old", the query would fail.
551551

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package org.opensearch.sql.calcite.remote;
77

88
import static org.junit.Assert.assertTrue;
9+
import static org.junit.jupiter.api.Assertions.assertThrows;
910
import static org.opensearch.sql.legacy.TestsConstants.*;
1011
import static org.opensearch.sql.util.MatcherUtils.rows;
1112
import static org.opensearch.sql.util.MatcherUtils.schema;
@@ -1067,4 +1068,21 @@ public void testBinWithEvalCreatedDottedFieldName() throws IOException {
10671068
rows(false, "go", "opentelemetry", 16, 1, "12-14"),
10681069
rows(true, "rust", "opentelemetry", 12, 1, "14-16"));
10691070
}
1071+
1072+
@Test
1073+
public void testBinOnNonNumericStringField() {
1074+
// Test that binning on a string field with non-numeric values fails with clear error
1075+
ResponseException exception =
1076+
assertThrows(
1077+
ResponseException.class,
1078+
() -> {
1079+
executeQuery(
1080+
String.format("source=%s | bin firstname bins=3 | head 1", TEST_INDEX_ACCOUNT));
1081+
});
1082+
1083+
String errorMessage = exception.getMessage();
1084+
assertTrue(
1085+
"Error should indicate non-numeric string values",
1086+
errorMessage.contains("non-numeric string values"));
1087+
}
10701088
}

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55

66
package org.opensearch.sql.ppl.calcite;
77

8+
import static org.junit.Assert.assertThrows;
9+
import static org.junit.Assert.assertTrue;
10+
811
import org.apache.calcite.rel.RelNode;
912
import org.apache.calcite.test.CalciteAssert;
1013
import org.junit.Test;
14+
import org.opensearch.sql.exception.SemanticCheckException;
1115

1216
public class CalcitePPLBinTest extends CalcitePPLAbstractTest {
1317

@@ -166,4 +170,15 @@ public void testBinWithAligntime() {
166170
+ " 3600 / 1) * 3600) `SYS_START`\n"
167171
+ "FROM `scott`.`products_temporal`");
168172
}
173+
174+
@Test
175+
public void testBinOnNonBinnableType() {
176+
// Test that binning on truly unsupported types (not numeric, time, or string) fails
177+
String ppl = "source=products_temporal | eval bool_field = true | bin bool_field bins=3";
178+
179+
SemanticCheckException exception =
180+
assertThrows(SemanticCheckException.class, () -> getRelNode(ppl));
181+
assertTrue(exception.getMessage().contains("Cannot apply binning"));
182+
assertTrue(exception.getMessage().contains("unsupported type"));
183+
}
169184
}

0 commit comments

Comments
 (0)