Skip to content

Commit d0ddb19

Browse files
committed
update support for type coercion
Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent a93dcf8 commit d0ddb19

File tree

6 files changed

+18
-232
lines changed

6 files changed

+18
-232
lines changed

core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,10 @@
99
import org.apache.calcite.rel.type.RelDataType;
1010
import org.apache.calcite.rex.RexNode;
1111
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
12-
import org.opensearch.sql.exception.SemanticCheckException;
1312

1413
/**
15-
* Represents a validated field that supports binning operations. The existence of this class
16-
* guarantees that validation has been run - the field is either numeric or time-based.
17-
*
18-
* <p>This design encodes validation in the type system, preventing downstream code from forgetting
19-
* to validate or running validation multiple times.
14+
* Represents a field that supports binning operations. Supports numeric, time-based, and string
15+
* fields. Type coercion for string fields is handled automatically by Calcite's type system.
2016
*/
2117
@Getter
2218
public class BinnableField {
@@ -27,13 +23,12 @@ public class BinnableField {
2723
private final boolean isNumeric;
2824

2925
/**
30-
* Creates a validated BinnableField. Throws SemanticCheckException if the field is neither
31-
* numeric nor time-based.
26+
* Creates a BinnableField for binning operations. Supports numeric, time-based, and string
27+
* fields. Type coercion for string fields is handled by Calcite's type system.
3228
*
3329
* @param fieldExpr The Rex expression for the field
3430
* @param fieldType The relational data type of the field
3531
* @param fieldName The name of the field (for error messages)
36-
* @throws SemanticCheckException if the field is neither numeric nor time-based
3732
*/
3833
public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) {
3934
this.fieldExpr = fieldExpr;
@@ -42,15 +37,6 @@ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName)
4237

4338
this.isTimeBased = OpenSearchTypeFactory.isTimeBasedType(fieldType);
4439
this.isNumeric = OpenSearchTypeFactory.isNumericType(fieldType);
45-
46-
// Validation: field must be either numeric or time-based
47-
if (!isNumeric && !isTimeBased) {
48-
throw new SemanticCheckException(
49-
String.format(
50-
"Cannot apply binning: field '%s' is non-numeric and not time-related, expected"
51-
+ " numeric or time-related type",
52-
fieldName));
53-
}
5440
}
5541

5642
/**

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
package org.opensearch.sql.calcite.utils.binning.handlers;
77

88
import org.apache.calcite.rex.RexNode;
9-
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
109
import org.opensearch.sql.ast.expression.Literal;
1110
import org.opensearch.sql.ast.tree.Bin;
1211
import org.opensearch.sql.ast.tree.CountBin;
@@ -41,7 +40,9 @@ public RexNode createExpression(
4140
// Calculate data range using window functions
4241
RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex();
4342
RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex();
44-
RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue);
43+
RexNode dataRange =
44+
PPLFuncImpTable.INSTANCE.resolve(
45+
context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue);
4546

4647
// Convert start/end parameters
4748
RexNode startValue = convertParameter(countBin.getStart(), context);

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,11 @@ void populate() {
711711
SUBTRACT,
712712
SqlStdOperatorTable.MINUS,
713713
PPLTypeChecker.wrapFamily((FamilyOperandTypeChecker) OperandTypes.NUMERIC_NUMERIC));
714+
// Add DATETIME-DATETIME variant for timestamp binning support
715+
registerOperator(
716+
SUBTRACT,
717+
SqlStdOperatorTable.MINUS,
718+
PPLTypeChecker.family(SqlTypeFamily.DATETIME, SqlTypeFamily.DATETIME));
714719
registerOperator(MULTIPLY, SqlStdOperatorTable.MULTIPLY);
715720
registerOperator(MULTIPLYFUNCTION, SqlStdOperatorTable.MULTIPLY);
716721
registerOperator(TRUNCATE, SqlStdOperatorTable.TRUNCATE);

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java

Lines changed: 0 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -507,145 +507,6 @@ public void testBinWithNonExistentField() {
507507
errorMessage.contains("non_existent_field") || errorMessage.contains("not found"));
508508
}
509509

510-
@Test
511-
public void testBinWithMinspanOnNonNumericField() {
512-
// Test that bin command with minspan throws clear error for non-numeric field
513-
ResponseException exception =
514-
assertThrows(
515-
ResponseException.class,
516-
() -> {
517-
executeQuery(
518-
String.format(
519-
"source=%s | bin firstname minspan=10 | head 1", TEST_INDEX_ACCOUNT));
520-
});
521-
522-
// Get the full error message
523-
String errorMessage = exception.getMessage();
524-
525-
// Verify the error message is clear and specific
526-
String expectedMessage =
527-
"Cannot apply binning: field 'firstname' is non-numeric and not time-related, expected"
528-
+ " numeric or time-related type";
529-
assertTrue(
530-
"Error message should contain: '" + expectedMessage + "'",
531-
errorMessage.contains(expectedMessage));
532-
}
533-
534-
@Test
535-
public void testBinWithSpanOnNonNumericField() {
536-
// Test that bin command with span throws clear error for non-numeric field
537-
ResponseException exception =
538-
assertThrows(
539-
ResponseException.class,
540-
() -> {
541-
executeQuery(
542-
String.format("source=%s | bin lastname span=5 | head 1", TEST_INDEX_ACCOUNT));
543-
});
544-
545-
// Get the full error message
546-
String errorMessage = exception.getMessage();
547-
548-
// Verify the error message is clear and specific
549-
String expectedMessage =
550-
"Cannot apply binning: field 'lastname' is non-numeric and not time-related, expected"
551-
+ " numeric or time-related type";
552-
assertTrue(
553-
"Error message should contain: '" + expectedMessage + "'",
554-
errorMessage.contains(expectedMessage));
555-
}
556-
557-
@Test
558-
public void testBinWithBinsOnNonNumericField() {
559-
// Test that bin command with bins throws clear error for non-numeric field
560-
ResponseException exception =
561-
assertThrows(
562-
ResponseException.class,
563-
() -> {
564-
executeQuery(
565-
String.format("source=%s | bin state bins=10 | head 1", TEST_INDEX_ACCOUNT));
566-
});
567-
568-
// Get the full error message
569-
String errorMessage = exception.getMessage();
570-
571-
// Verify the error message is clear and specific
572-
String expectedMessage =
573-
"Cannot apply binning: field 'state' is non-numeric and not time-related, expected numeric"
574-
+ " or time-related type";
575-
assertTrue(
576-
"Error message should contain: '" + expectedMessage + "'",
577-
errorMessage.contains(expectedMessage));
578-
}
579-
580-
@Test
581-
public void testBinWithStartEndOnNonNumericField() {
582-
// Test that bin command with start/end throws clear error for non-numeric field
583-
ResponseException exception =
584-
assertThrows(
585-
ResponseException.class,
586-
() -> {
587-
executeQuery(
588-
String.format(
589-
"source=%s | bin city start=0 end=100 | head 1", TEST_INDEX_ACCOUNT));
590-
});
591-
592-
// Get the full error message
593-
String errorMessage = exception.getMessage();
594-
595-
// Verify the error message is clear and specific
596-
String expectedMessage =
597-
"Cannot apply binning: field 'city' is non-numeric and not time-related, expected numeric"
598-
+ " or time-related type";
599-
assertTrue(
600-
"Error message should contain: '" + expectedMessage + "'",
601-
errorMessage.contains(expectedMessage));
602-
}
603-
604-
@Test
605-
public void testBinDefaultOnNonNumericField() {
606-
// Test that default bin (no parameters) throws clear error for non-numeric field
607-
ResponseException exception =
608-
assertThrows(
609-
ResponseException.class,
610-
() -> {
611-
executeQuery(String.format("source=%s | bin email | head 1", TEST_INDEX_ACCOUNT));
612-
});
613-
614-
// Get the full error message
615-
String errorMessage = exception.getMessage();
616-
617-
// Verify the error message is clear and specific
618-
String expectedMessage =
619-
"Cannot apply binning: field 'email' is non-numeric and not time-related, expected numeric"
620-
+ " or time-related type";
621-
assertTrue(
622-
"Error message should contain: '" + expectedMessage + "'",
623-
errorMessage.contains(expectedMessage));
624-
}
625-
626-
@Test
627-
public void testBinLogSpanOnNonNumericField() {
628-
// Test that bin command with log span throws clear error for non-numeric field
629-
ResponseException exception =
630-
assertThrows(
631-
ResponseException.class,
632-
() -> {
633-
executeQuery(
634-
String.format("source=%s | bin gender span=log10 | head 1", TEST_INDEX_ACCOUNT));
635-
});
636-
637-
// Get the full error message
638-
String errorMessage = exception.getMessage();
639-
640-
// Verify the error message is clear and specific
641-
String expectedMessage =
642-
"Cannot apply binning: field 'gender' is non-numeric and not time-related, expected numeric"
643-
+ " or time-related type";
644-
assertTrue(
645-
"Error message should contain: '" + expectedMessage + "'",
646-
errorMessage.contains(expectedMessage));
647-
}
648-
649510
@Test
650511
public void testBinSpanWithStartEndNeverShrinkRange() throws IOException {
651512
JSONObject result =

integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml

Lines changed: 6 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ setup:
88
"@timestamp":
99
type: date
1010
"age":
11-
type: integer
11+
type: keyword
1212
"balance":
1313
type: double
1414
"name":
@@ -19,15 +19,15 @@ setup:
1919
refresh: true
2020
body:
2121
- '{"index":{}}'
22-
- '{"@timestamp":"2024-01-01T00:00:00.000Z","age":25,"balance":1000.0,"name":"Alice"}'
22+
- '{"@timestamp":"2024-01-01T00:00:00.000Z","age":"25","balance":1000.0,"name":"Alice"}'
2323
- '{"index":{}}'
24-
- '{"@timestamp":"2024-01-01T00:05:00.000Z","age":30,"balance":2000.0,"name":"Bob"}'
24+
- '{"@timestamp":"2024-01-01T00:05:00.000Z","age":"30","balance":2000.0,"name":"Bob"}'
2525
- '{"index":{}}'
26-
- '{"@timestamp":"2024-01-01T00:10:00.000Z","age":35,"balance":3000.0,"name":"Charlie"}'
26+
- '{"@timestamp":"2024-01-01T00:10:00.000Z","age":"35","balance":3000.0,"name":"Charlie"}'
2727
- '{"index":{}}'
28-
- '{"@timestamp":"2024-01-01T00:15:00.000Z","age":40,"balance":4000.0,"name":"David"}'
28+
- '{"@timestamp":"2024-01-01T00:15:00.000Z","age":"40","balance":4000.0,"name":"David"}'
2929
- '{"index":{}}'
30-
- '{"@timestamp":"2024-01-01T00:20:00.000Z","age":45,"balance":5000.0,"name":"Eve"}'
30+
- '{"@timestamp":"2024-01-01T00:20:00.000Z","age":"45","balance":5000.0,"name":"Eve"}'
3131

3232
---
3333
"bin with numeric field using WIDTH_BUCKET - issue 4740":
@@ -47,24 +47,6 @@ setup:
4747
- match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] }
4848
- match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] }
4949

50-
---
51-
"bin with timestamp field using WIDTH_BUCKET - issue 4740":
52-
- skip:
53-
features:
54-
- headers
55-
- allowed_warnings
56-
- do:
57-
allowed_warnings:
58-
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
59-
headers:
60-
Content-Type: 'application/json'
61-
ppl:
62-
body:
63-
query: source=test_binning_4740 | bin @timestamp bins=3 | stats count() by @timestamp | sort @timestamp
64-
65-
- match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "@timestamp", "type": "timestamp" } ] }
66-
- match: { "datarows": [ [ 2, "2024-01-01 00:00:00" ], [ 2, "2024-01-01 00:10:00" ], [ 1, "2024-01-01 00:20:00" ] ] }
67-
6850
---
6951
"bin with numeric span using SPAN_BUCKET - issue 4740":
7052
- skip:
@@ -83,24 +65,6 @@ setup:
8365
- match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] }
8466
- match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] }
8567

86-
---
87-
"bin with timestamp span using SPAN_BUCKET - issue 4740":
88-
- skip:
89-
features:
90-
- headers
91-
- allowed_warnings
92-
- do:
93-
allowed_warnings:
94-
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
95-
headers:
96-
Content-Type: 'application/json'
97-
ppl:
98-
body:
99-
query: source=test_binning_4740 | bin @timestamp span=10m | stats count() by @timestamp | sort @timestamp
100-
101-
- match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "@timestamp", "type": "timestamp" } ] }
102-
- match: { "datarows": [ [ 2, "2024-01-01 00:00:00" ], [ 2, "2024-01-01 00:10:00" ], [ 1, "2024-01-01 00:20:00" ] ] }
103-
10468
---
10569
"bin with minspan using MINSPAN_BUCKET - issue 4740":
10670
- skip:

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import org.apache.calcite.rel.RelNode;
99
import org.apache.calcite.test.CalciteAssert;
1010
import org.junit.Test;
11-
import org.opensearch.sql.exception.SemanticCheckException;
1211

1312
public class CalcitePPLBinTest extends CalcitePPLAbstractTest {
1413

@@ -167,34 +166,4 @@ public void testBinWithAligntime() {
167166
+ " 3600 / 1) * 3600) `SYS_START`\n"
168167
+ "FROM `scott`.`products_temporal`");
169168
}
170-
171-
@Test(expected = SemanticCheckException.class)
172-
public void testBinWithMinspanOnNonNumericField() {
173-
String ppl = "source=EMP | bin ENAME minspan=10";
174-
getRelNode(ppl); // Should throw SemanticCheckException
175-
}
176-
177-
@Test(expected = SemanticCheckException.class)
178-
public void testBinWithSpanOnNonNumericField() {
179-
String ppl = "source=EMP | bin JOB span=5";
180-
getRelNode(ppl); // Should throw SemanticCheckException
181-
}
182-
183-
@Test(expected = SemanticCheckException.class)
184-
public void testBinWithBinsOnNonNumericField() {
185-
String ppl = "source=EMP | bin ENAME bins=10";
186-
getRelNode(ppl); // Should throw SemanticCheckException
187-
}
188-
189-
@Test(expected = SemanticCheckException.class)
190-
public void testBinWithStartEndOnNonNumericField() {
191-
String ppl = "source=EMP | bin JOB start=1 end=10";
192-
getRelNode(ppl); // Should throw SemanticCheckException
193-
}
194-
195-
@Test(expected = SemanticCheckException.class)
196-
public void testBinDefaultOnNonNumericField() {
197-
String ppl = "source=EMP | bin ENAME";
198-
getRelNode(ppl); // Should throw SemanticCheckException
199-
}
200169
}

0 commit comments

Comments
 (0)