Skip to content

Commit 57a99e4

Browse files
leoyvensalamb
andauthored
Coerce and simplify FixedSizeBinary equality to literal binary (#15726)
* coerce FixedSizeBinary to Binary * simplify FixedSizeBytes equality to literal * fix clippy * remove redundant ExprSimplifier case * Add explain test to make sure unwrapping is working correctly --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent a4d494c commit 57a99e4

File tree

4 files changed

+100
-1
lines changed

4 files changed

+100
-1
lines changed

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,6 +1297,10 @@ fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
12971297
Some(LargeBinary)
12981298
}
12991299
(Binary, Utf8) | (Utf8, Binary) => Some(Binary),
1300+
1301+
// Cast FixedSizeBinary to Binary
1302+
(FixedSizeBinary(_), Binary) | (Binary, FixedSizeBinary(_)) => Some(Binary),
1303+
13001304
_ => None,
13011305
}
13021306
}

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3310,6 +3310,15 @@ mod tests {
33103310
simplifier.simplify(expr)
33113311
}
33123312

3313+
fn coerce(expr: Expr) -> Expr {
3314+
let schema = expr_test_schema();
3315+
let execution_props = ExecutionProps::new();
3316+
let simplifier = ExprSimplifier::new(
3317+
SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)),
3318+
);
3319+
simplifier.coerce(expr, schema.as_ref()).unwrap()
3320+
}
3321+
33133322
fn simplify(expr: Expr) -> Expr {
33143323
try_simplify(expr).unwrap()
33153324
}
@@ -3352,6 +3361,7 @@ mod tests {
33523361
Field::new("c2_non_null", DataType::Boolean, false),
33533362
Field::new("c3_non_null", DataType::Int64, false),
33543363
Field::new("c4_non_null", DataType::UInt32, false),
3364+
Field::new("c5", DataType::FixedSizeBinary(3), true),
33553365
]
33563366
.into(),
33573367
HashMap::new(),
@@ -4475,6 +4485,34 @@ mod tests {
44754485
}
44764486
}
44774487

4488+
#[test]
4489+
fn simplify_fixed_size_binary_eq_lit() {
4490+
let bytes = [1u8, 2, 3].as_slice();
4491+
4492+
// The expression starts simple.
4493+
let expr = col("c5").eq(lit(bytes));
4494+
4495+
// The type coercer introduces a cast.
4496+
let coerced = coerce(expr.clone());
4497+
let schema = expr_test_schema();
4498+
assert_eq!(
4499+
coerced,
4500+
col("c5")
4501+
.cast_to(&DataType::Binary, schema.as_ref())
4502+
.unwrap()
4503+
.eq(lit(bytes))
4504+
);
4505+
4506+
// The simplifier removes the cast.
4507+
assert_eq!(
4508+
simplify(coerced),
4509+
col("c5").eq(Expr::Literal(ScalarValue::FixedSizeBinary(
4510+
3,
4511+
Some(bytes.to_vec()),
4512+
)))
4513+
);
4514+
}
4515+
44784516
fn if_not_null(expr: Expr, then: bool) -> Expr {
44794517
Expr::Case(Case {
44804518
expr: Some(expr.is_not_null().into()),

datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ fn is_supported_type(data_type: &DataType) -> bool {
197197
is_supported_numeric_type(data_type)
198198
|| is_supported_string_type(data_type)
199199
|| is_supported_dictionary_type(data_type)
200+
|| is_supported_binary_type(data_type)
200201
}
201202

202203
/// Returns true if unwrap_cast_in_comparison support this numeric type
@@ -230,6 +231,10 @@ fn is_supported_dictionary_type(data_type: &DataType) -> bool {
230231
DataType::Dictionary(_, inner) if is_supported_type(inner))
231232
}
232233

234+
fn is_supported_binary_type(data_type: &DataType) -> bool {
235+
matches!(data_type, DataType::Binary | DataType::FixedSizeBinary(_))
236+
}
237+
233238
///// Tries to move a cast from an expression (such as column) to the literal other side of a comparison operator./
234239
///
235240
/// Specifically, rewrites
@@ -292,6 +297,7 @@ pub(super) fn try_cast_literal_to_type(
292297
try_cast_numeric_literal(lit_value, target_type)
293298
.or_else(|| try_cast_string_literal(lit_value, target_type))
294299
.or_else(|| try_cast_dictionary(lit_value, target_type))
300+
.or_else(|| try_cast_binary(lit_value, target_type))
295301
}
296302

297303
/// Convert a numeric value from one numeric data type to another
@@ -501,6 +507,20 @@ fn cast_between_timestamp(from: &DataType, to: &DataType, value: i128) -> Option
501507
}
502508
}
503509

510+
fn try_cast_binary(
511+
lit_value: &ScalarValue,
512+
target_type: &DataType,
513+
) -> Option<ScalarValue> {
514+
match (lit_value, target_type) {
515+
(ScalarValue::Binary(Some(v)), DataType::FixedSizeBinary(n))
516+
if v.len() == *n as usize =>
517+
{
518+
Some(ScalarValue::FixedSizeBinary(*n, Some(v.clone())))
519+
}
520+
_ => None,
521+
}
522+
}
523+
504524
#[cfg(test)]
505525
mod tests {
506526
use super::*;
@@ -1450,4 +1470,13 @@ mod tests {
14501470
)
14511471
}
14521472
}
1473+
1474+
#[test]
1475+
fn try_cast_to_fixed_size_binary() {
1476+
expect_cast(
1477+
ScalarValue::Binary(Some(vec![1, 2, 3])),
1478+
DataType::FixedSizeBinary(3),
1479+
ExpectedCast::Value(ScalarValue::FixedSizeBinary(3, Some(vec![1, 2, 3]))),
1480+
)
1481+
}
14531482
}

datafusion/sqllogictest/test_files/binary.slt

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,36 @@ query error DataFusion error: Error during planning: Cannot infer common argumen
147147
SELECT column1, column1 = arrow_cast(X'0102', 'FixedSizeBinary(2)') FROM t
148148

149149
# Comparison to different sized Binary
150-
query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation FixedSizeBinary\(3\) = Binary
150+
query ?B
151151
SELECT column1, column1 = X'0102' FROM t
152+
----
153+
000102 false
154+
003102 false
155+
NULL NULL
156+
ff0102 false
157+
000102 false
158+
159+
query ?B
160+
SELECT column1, column1 = X'000102' FROM t
161+
----
162+
000102 true
163+
003102 false
164+
NULL NULL
165+
ff0102 false
166+
000102 true
167+
168+
# Plan should not have a cast of the column (should have casted the literal
169+
# to FixedSizeBinary as that is much faster)
170+
171+
query TT
172+
explain SELECT column1, column1 = X'000102' FROM t
173+
----
174+
logical_plan
175+
01)Projection: t.column1, t.column1 = FixedSizeBinary(3, "0,1,2") AS t.column1 = Binary("0,1,2")
176+
02)--TableScan: t projection=[column1]
177+
physical_plan
178+
01)ProjectionExec: expr=[column1@0 as column1, column1@0 = 000102 as t.column1 = Binary("0,1,2")]
179+
02)--DataSourceExec: partitions=1, partition_sizes=[1]
152180

153181
statement ok
154182
drop table t_source

0 commit comments

Comments
 (0)