Skip to content

Commit bcadbf2

Browse files
authored
feat(cubesql): Filter push down for date_part(?upper) AND date_part(?lower) (#10070)
Allowing filters to be pushed down with `DATE_PART('year', ?column) = ?year AND DATE_PART('quarter', ?column) = ?quarter` by pushing it as inDateRange. Now, such queries will be accelerated with pre-aggregations. I am re-using code that we have for tablea, but without usage of `Trunc` as wrapper. I've added ability to merge date range with `quarter`.
1 parent bc8ff68 commit bcadbf2

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9462,6 +9462,85 @@ ORDER BY "source"."str0" ASC
94629462
)
94639463
}
94649464

9465+
#[tokio::test]
9466+
async fn test_filter_extract_by_year_and_quarter() {
9467+
init_testing_logger();
9468+
9469+
async fn assert_quarter_result(quarter: i32, start_date: &str, end_date: &str) {
9470+
let query_plan = convert_select_to_query_plan(
9471+
format!(r#"
9472+
SELECT COUNT(*) AS "count",
9473+
EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok"
9474+
FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce"
9475+
WHERE EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") = 2019
9476+
AND EXTRACT(QUARTER FROM "KibanaSampleDataEcommerce"."order_date") = {}
9477+
GROUP BY 2
9478+
"#, quarter),
9479+
DatabaseProtocol::PostgreSQL,
9480+
).await;
9481+
9482+
assert_eq!(
9483+
query_plan.as_logical_plan().find_cube_scan().request,
9484+
V1LoadRequestQuery {
9485+
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
9486+
dimensions: Some(vec![]),
9487+
segments: Some(vec![]),
9488+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
9489+
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
9490+
granularity: Some("year".to_string()),
9491+
date_range: Some(json!(vec![start_date, end_date])),
9492+
},]),
9493+
order: Some(vec![]),
9494+
..Default::default()
9495+
}
9496+
)
9497+
}
9498+
9499+
assert_quarter_result(1, "2019-01-01", "2019-03-31").await;
9500+
assert_quarter_result(2, "2019-04-01", "2019-06-30").await;
9501+
assert_quarter_result(3, "2019-07-01", "2019-09-30").await;
9502+
assert_quarter_result(4, "2019-10-01", "2019-12-31").await;
9503+
}
9504+
9505+
#[tokio::test]
9506+
async fn test_filter_extract_by_year_and_month() {
9507+
init_testing_logger();
9508+
9509+
let logical_plan = convert_select_to_query_plan(
9510+
r#"
9511+
SELECT
9512+
COUNT(*) AS "count",
9513+
EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok"
9514+
FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce"
9515+
WHERE EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") = 2019 AND EXTRACT(MONTH FROM "KibanaSampleDataEcommerce"."order_date") = 2
9516+
GROUP BY 2
9517+
;"#
9518+
.to_string(),
9519+
DatabaseProtocol::PostgreSQL,
9520+
)
9521+
.await
9522+
.as_logical_plan();
9523+
9524+
assert_eq!(
9525+
logical_plan.find_cube_scan().request,
9526+
V1LoadRequestQuery {
9527+
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
9528+
dimensions: Some(vec![]),
9529+
segments: Some(vec![]),
9530+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
9531+
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
9532+
granularity: Some("year".to_string()),
9533+
date_range: Some(json!(vec![
9534+
"2019-02-01".to_string(),
9535+
"2019-02-28".to_string(),
9536+
])),
9537+
},]),
9538+
order: Some(vec![]),
9539+
..Default::default()
9540+
}
9541+
)
9542+
}
9543+
94659544
#[tokio::test]
94669545
async fn test_tableau_filter_extract_by_year() {
94679546
init_testing_logger();

rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,6 +1726,44 @@ impl RewriteRules for FilterRules {
17261726
"?filter_aliases",
17271727
),
17281728
),
1729+
// EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") = 2019
1730+
// AND EXTRACT(MONTH FROM "KibanaSampleDataEcommerce"."order_date") = 3
1731+
transforming_rewrite(
1732+
"extract-date-range-and-gran-equals",
1733+
filter_op(
1734+
filter_op_filters(
1735+
filter_member("?member", "FilterMemberOp:inDateRange", "?values"),
1736+
filter_replacer(
1737+
binary_expr(
1738+
self.fun_expr(
1739+
"DatePart",
1740+
vec![literal_expr("?granularity"), column_expr("?column")],
1741+
),
1742+
"=",
1743+
literal_expr("?value"),
1744+
),
1745+
"?alias_to_cube",
1746+
"?members",
1747+
"?filter_aliases",
1748+
),
1749+
),
1750+
"FilterOpOp:and",
1751+
),
1752+
filter_member("?member", "FilterMemberOp:inDateRange", "?new_values"),
1753+
self.transform_filter_extract_date_range_and_trunc_gran_equals(
1754+
"?member",
1755+
"?values",
1756+
"?granularity",
1757+
"?column",
1758+
"?value",
1759+
"?alias_to_cube",
1760+
"?members",
1761+
"?filter_aliases",
1762+
"?new_values",
1763+
),
1764+
),
1765+
// TODO: Introduce rule to unwrap TRUNC(EXTRACT(?granularity FROM ?column_expr))
1766+
//
17291767
// TRUNC(EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date")) = 2019
17301768
// AND TRUNC(EXTRACT(MONTH FROM "KibanaSampleDataEcommerce"."order_date")) = 3
17311769
transforming_rewrite(
@@ -1765,6 +1803,7 @@ impl RewriteRules for FilterRules {
17651803
"?new_values",
17661804
),
17671805
),
1806+
// TODO: Introduce new rule to unwrap TRUNC(EXTRACT(?granularity FROM ?column_expr)) -> EXTRACT(?granularity FROM ?column_expr)
17681807
// When the filter set above is paired with other filters, it needs to be
17691808
// regrouped for the above rewrite rule to match
17701809
rewrite(
@@ -1829,6 +1868,7 @@ impl RewriteRules for FilterRules {
18291868
"FilterOpOp:and",
18301869
),
18311870
),
1871+
// TODO: Introduce new rule to unwrap TRUNC(EXTRACT(?granularity FROM ?column_expr)) -> EXTRACT(?granularity FROM ?column_expr)
18321872
// The filter set above may be inverted, let's account for that as well
18331873
rewrite(
18341874
"extract-date-range-and-trunc-reverse",
@@ -1877,6 +1917,7 @@ impl RewriteRules for FilterRules {
18771917
"FilterOpOp:and",
18781918
),
18791919
),
1920+
// TODO: Introduce new rule to unwrap TRUNC(EXTRACT(?granularity FROM ?column_expr)) -> EXTRACT(?granularity FROM ?column_expr)
18801921
rewrite(
18811922
"extract-date-range-and-trunc-reverse-nested",
18821923
filter_op(
@@ -3991,6 +4032,7 @@ impl FilterRules {
39914032
if start_date_year != end_date.year() {
39924033
return false;
39934034
}
4035+
39944036
// Month value must be valid
39954037
if !(1..=12).contains(&value) {
39964038
return false;
@@ -4015,8 +4057,57 @@ impl FilterRules {
40154057
return false;
40164058
}
40174059

4060+
// Preserves existing constraints, for example:
4061+
// inDataRange: order_date >= '2019-02-15' AND order_date < '2019-03-10'
4062+
// Month filter: EXTRACT(MONTH FROM order_date) = 2 (February)
4063+
let new_start_date = max(new_start_date, start_date);
4064+
let new_end_date = min(new_end_date, end_date);
4065+
4066+
vec![
4067+
new_start_date.format("%Y-%m-%d").to_string(),
4068+
new_end_date.format("%Y-%m-%d").to_string(),
4069+
]
4070+
}
4071+
"quarter" | "qtr" => {
4072+
// Check that the range only covers one year
4073+
let start_date_year = start_date.year();
4074+
if start_date_year != end_date.year() {
4075+
return false;
4076+
}
4077+
4078+
// Quarter value must be valid (1-4)
4079+
if !(1..=4).contains(&value) {
4080+
return false;
4081+
}
4082+
4083+
let quarter_start_month = (value - 1) * 3 + 1;
4084+
4085+
// Obtain the new range
4086+
let Some(new_start_date) =
4087+
NaiveDate::from_ymd_opt(start_date_year, quarter_start_month as u32, 1)
4088+
else {
4089+
return false;
4090+
};
4091+
4092+
let Some(new_end_date) = new_start_date
4093+
.checked_add_months(Months::new(3))
4094+
.and_then(|date| date.checked_sub_days(Days::new(1)))
4095+
else {
4096+
return false;
4097+
};
4098+
4099+
// Paranoid check, If the resulting range is outside of the original range, we can't merge
4100+
// the filters
4101+
if new_start_date > end_date || new_end_date < start_date {
4102+
return false;
4103+
}
4104+
4105+
// Preserves existing constraints, for example:
4106+
// inDataRange: order_date >= '2019-04-15' AND order_date < '2019-12-31'
4107+
// Month filter: EXTRACT(QUARTER FROM order_date) = 2
40184108
let new_start_date = max(new_start_date, start_date);
40194109
let new_end_date = min(new_end_date, end_date);
4110+
40204111
vec![
40214112
new_start_date.format("%Y-%m-%d").to_string(),
40224113
new_end_date.format("%Y-%m-%d").to_string(),

0 commit comments

Comments
 (0)