Skip to content

Commit 4a2838c

Browse files
committed
feat(cubesql): Remove bottom-up extraction completely
1 parent b5ddd11 commit 4a2838c

File tree

6 files changed

+21
-182
lines changed

6 files changed

+21
-182
lines changed

.github/workflows/rust-cubesql.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,7 @@ jobs:
6262
# We use host instead of cross container, because it's much faster
6363
runs-on: ubuntu-24.04
6464
timeout-minutes: 60
65-
name: Unit (Rewrite Engine) (CUBESQL_TOP_DOWN_EXTRACTOR=${{ matrix.top-down-extractor }})
66-
strategy:
67-
matrix:
68-
top-down-extractor: ['true', 'false']
69-
fail-fast: false
65+
name: Unit (Rewrite Engine)
7066

7167
steps:
7268
- name: Checkout
@@ -94,7 +90,6 @@ jobs:
9490
CUBESQL_TESTING_CUBE_TOKEN: ${{ secrets.CUBESQL_TESTING_CUBE_TOKEN }}
9591
CUBESQL_TESTING_CUBE_URL: ${{ secrets.CUBESQL_TESTING_CUBE_URL }}
9692
CUBESQL_SQL_PUSH_DOWN: true
97-
CUBESQL_TOP_DOWN_EXTRACTOR: ${{ matrix.top-down-extractor }}
9893
CUBESQL_REWRITE_CACHE: true
9994
CUBESQL_REWRITE_TIMEOUT: 60
10095
run: |

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13962,11 +13962,7 @@ ORDER BY "source"."str0" ASC
1396213962
// CAST(CAST(ta_1.order_date AS Date32) - CAST(CAST(Utf8("1970-01-01") AS Date32) AS Date32) + Int64(3) AS Decimal(38, 10))
1396313963
if Rewriter::sql_push_down_enabled() {
1396413964
let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql;
13965-
if Rewriter::top_down_extractor_enabled() {
13966-
assert!(sql.contains("LIMIT 1000"));
13967-
} else {
13968-
assert!(sql.contains("\"limit\": 1000"));
13969-
}
13965+
assert!(sql.contains("LIMIT 1000"));
1397013966
assert!(sql.contains("% 7"));
1397113967

1397213968
let physical_plan = query_plan.as_physical_plan().await.unwrap();
@@ -16175,18 +16171,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1617516171
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
1617616172
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
1617716173
granularity: Some("month".to_string()),
16178-
date_range: if Rewriter::top_down_extractor_enabled() {
16179-
Some(json!(vec![
16180-
"2019-01-01T00:00:00.000Z".to_string(),
16181-
"2019-01-31T23:59:59.999Z".to_string()
16182-
]))
16183-
} else {
16184-
// Non-optimal variant with top down extractor disabled
16185-
Some(json!(vec![
16186-
"2019-01-01 00:00:00.000".to_string(),
16187-
"2019-01-31 23:59:59.999".to_string()
16188-
]))
16189-
}
16174+
date_range: Some(json!(vec![
16175+
"2019-01-01T00:00:00.000Z".to_string(),
16176+
"2019-01-31T23:59:59.999Z".to_string()
16177+
]))
1619016178
}]),
1619116179
order: Some(vec![]),
1619216180
..Default::default()

rust/cubesql/cubesql/src/compile/query_engine.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ pub trait QueryEngine {
226226
state.auth_context().unwrap(),
227227
qtrace,
228228
span_id.clone(),
229-
self.config_ref().top_down_extractor(),
230229
)
231230
.await
232231
.map_err(|e| match e.cause {

rust/cubesql/cubesql/src/compile/rewrite/cost.rs

Lines changed: 5 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88
},
99
transport::{MetaContext, V1CubeMetaDimensionExt},
1010
};
11-
use egg::{Analysis, CostFunction, EGraph, Id, Language, RecExpr};
11+
use egg::{Analysis, EGraph, Id, Language, RecExpr};
1212
use indexmap::IndexSet;
1313

1414
#[derive(Debug)]
@@ -25,7 +25,7 @@ impl BestCubePlan {
2525
}
2626
}
2727

28-
pub fn initial_cost(&self, enode: &LogicalPlanLanguage, top_down: bool) -> CubePlanCost {
28+
pub fn initial_cost(&self, enode: &LogicalPlanLanguage) -> CubePlanCost {
2929
let table_scans = match enode {
3030
LogicalPlanLanguage::TableScan(_) => 1,
3131
_ => 0,
@@ -52,8 +52,7 @@ impl BestCubePlan {
5252
};
5353

5454
let non_pushed_down_limit_sort = match enode {
55-
LogicalPlanLanguage::Limit(_) if !top_down => 1,
56-
LogicalPlanLanguage::Sort(_) if top_down => 1,
55+
LogicalPlanLanguage::Sort(_) => 1,
5756
_ => 0,
5857
};
5958

@@ -248,7 +247,6 @@ impl BestCubePlan {
248247

249248
#[derive(Clone, Copy)]
250249
pub struct CubePlanCostOptions {
251-
top_down: bool,
252250
penalize_post_processing: bool,
253251
}
254252

@@ -312,73 +310,13 @@ pub enum CubePlanState {
312310
Wrapper,
313311
}
314312

315-
impl CubePlanState {
316-
pub fn add_child(&self, other: &Self) -> Self {
317-
match (self, other) {
318-
(CubePlanState::Wrapper, _) => CubePlanState::Wrapper,
319-
(_, CubePlanState::Wrapped) => CubePlanState::Wrapped,
320-
(CubePlanState::Wrapped, _) => CubePlanState::Wrapped,
321-
(CubePlanState::Unwrapped(a), _) => CubePlanState::Unwrapped(*a),
322-
}
323-
}
324-
}
325-
326313
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
327314
pub enum SortState {
328315
None,
329316
Current,
330317
DirectChild,
331318
}
332319

333-
impl SortState {
334-
pub fn add_child(&self, other: &Self) -> Self {
335-
match (self, other) {
336-
(Self::Current, _) => Self::Current,
337-
(_, Self::Current) | (Self::DirectChild, _) => Self::DirectChild,
338-
_ => Self::None,
339-
}
340-
}
341-
}
342-
343-
#[derive(Debug, Clone, Eq, PartialEq)]
344-
pub struct CubePlanCostAndState {
345-
pub cost: CubePlanCost,
346-
pub state: CubePlanState,
347-
pub sort_state: SortState,
348-
}
349-
350-
impl PartialOrd for CubePlanCostAndState {
351-
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
352-
Some(self.cost.cmp(&other.cost))
353-
}
354-
}
355-
356-
impl Ord for CubePlanCostAndState {
357-
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
358-
self.cost.cmp(&other.cost)
359-
}
360-
}
361-
362-
impl CubePlanCostAndState {
363-
pub fn add_child(&self, other: &Self) -> Self {
364-
Self {
365-
cost: self.cost.add_child(&other.cost),
366-
state: self.state.add_child(&other.state),
367-
sort_state: self.sort_state.add_child(&other.sort_state),
368-
}
369-
}
370-
371-
pub fn finalize(&self, enode: &LogicalPlanLanguage, options: CubePlanCostOptions) -> Self {
372-
Self {
373-
cost: self
374-
.cost
375-
.finalize(&self.state, &self.sort_state, enode, options),
376-
state: self.state.clone(),
377-
sort_state: self.sort_state.clone(),
378-
}
379-
}
380-
}
381-
382320
impl CubePlanCost {
383321
pub fn add_child(&self, other: &Self) -> Self {
384322
Self {
@@ -469,7 +407,7 @@ impl CubePlanCost {
469407
},
470408
non_pushed_down_limit_sort: match sort_state {
471409
SortState::DirectChild => self.non_pushed_down_limit_sort,
472-
SortState::Current if options.top_down => self.non_pushed_down_limit_sort,
410+
SortState::Current => self.non_pushed_down_limit_sort,
473411
_ => 0,
474412
},
475413
// Don't track state here: we want representation that have fewer wrappers with zero members _in total_
@@ -520,60 +458,6 @@ impl CubePlanCost {
520458
}
521459
}
522460

523-
impl CostFunction<LogicalPlanLanguage> for BestCubePlan {
524-
type Cost = CubePlanCostAndState;
525-
fn cost<C>(&mut self, enode: &LogicalPlanLanguage, mut costs: C) -> Self::Cost
526-
where
527-
C: FnMut(Id) -> Self::Cost,
528-
{
529-
let ast_size_outside_wrapper = match enode {
530-
LogicalPlanLanguage::Aggregate(_) => 1,
531-
LogicalPlanLanguage::Projection(_) => 1,
532-
LogicalPlanLanguage::Limit(_) => 1,
533-
LogicalPlanLanguage::Sort(_) => 1,
534-
LogicalPlanLanguage::Filter(_) => 1,
535-
LogicalPlanLanguage::Join(_) => 1,
536-
LogicalPlanLanguage::CrossJoin(_) => 1,
537-
LogicalPlanLanguage::Union(_) => 1,
538-
LogicalPlanLanguage::Window(_) => 1,
539-
LogicalPlanLanguage::Subquery(_) => 1,
540-
LogicalPlanLanguage::Distinct(_) => 1,
541-
_ => 0,
542-
};
543-
544-
let cost = self.initial_cost(enode, false);
545-
let initial_cost = CubePlanCostAndState {
546-
cost,
547-
state: match enode {
548-
LogicalPlanLanguage::CubeScanWrapped(CubeScanWrapped(true)) => {
549-
CubePlanState::Wrapped
550-
}
551-
LogicalPlanLanguage::CubeScanWrapper(_) => CubePlanState::Wrapper,
552-
_ => CubePlanState::Unwrapped(ast_size_outside_wrapper),
553-
},
554-
sort_state: match enode {
555-
LogicalPlanLanguage::Sort(_) => SortState::Current,
556-
_ => SortState::None,
557-
},
558-
};
559-
let res = enode
560-
.children()
561-
.iter()
562-
.fold(initial_cost.clone(), |cost, id| {
563-
let child = costs(*id);
564-
cost.add_child(&child)
565-
})
566-
.finalize(
567-
enode,
568-
CubePlanCostOptions {
569-
top_down: false,
570-
penalize_post_processing: self.penalize_post_processing,
571-
},
572-
);
573-
res
574-
}
575-
}
576-
577461
pub trait TopDownCost: Clone + Debug + PartialOrd {
578462
fn add(&self, other: &Self) -> Self;
579463
}
@@ -902,7 +786,7 @@ impl TopDownState<LogicalPlanLanguage> for CubePlanTopDownState {
902786

903787
impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost> for BestCubePlan {
904788
fn cost(&self, node: &LogicalPlanLanguage) -> CubePlanCost {
905-
self.initial_cost(node, true)
789+
self.initial_cost(node)
906790
}
907791

908792
fn finalize(
@@ -917,7 +801,6 @@ impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost
917801
&state.limit,
918802
node,
919803
CubePlanCostOptions {
920-
top_down: true,
921804
penalize_post_processing: self.penalize_post_processing,
922805
},
923806
)

rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use crate::{
2323
use datafusion::{
2424
logical_plan::LogicalPlan, physical_plan::planner::DefaultPhysicalPlanner, scalar::ScalarValue,
2525
};
26-
use egg::{EGraph, Extractor, Id, IterationData, Language, Rewrite, Runner, StopReason};
26+
use egg::{EGraph, Id, IterationData, Language, Rewrite, Runner, StopReason};
2727
use serde::{Deserialize, Serialize};
2828
use std::{
2929
collections::{HashMap, HashSet},
@@ -330,7 +330,6 @@ impl Rewriter {
330330
auth_context: AuthContextRef,
331331
qtrace: &mut Option<Qtrace>,
332332
span_id: Option<Arc<SpanId>>,
333-
top_down_extractor: bool,
334333
) -> Result<LogicalPlan, CubeError> {
335334
let cube_context = self.cube_context.clone();
336335
let egraph = self.graph.clone();
@@ -361,26 +360,16 @@ impl Rewriter {
361360
Self::run_rewrites(&cube_context, egraph, rules, "final")?;
362361

363362
// TODO maybe check replacers and penalized_ast_size_outside_wrapper right after extraction?
364-
let best = if top_down_extractor {
365-
let mut extractor = TopDownExtractor::new(
366-
&runner.egraph,
367-
BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing),
368-
CubePlanTopDownState::new(),
369-
);
370-
let Some((best_cost, best)) = extractor.find_best(root) else {
371-
return Err(CubeError::internal("Unable to find best plan".to_string()));
372-
};
373-
log::debug!("Best cost: {:#?}", best_cost);
374-
best
375-
} else {
376-
let extractor = Extractor::new(
377-
&runner.egraph,
378-
BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing),
379-
);
380-
let (best_cost, best) = extractor.find_best(root);
381-
log::debug!("Best cost: {:#?}", best_cost);
382-
best
363+
let mut extractor = TopDownExtractor::new(
364+
&runner.egraph,
365+
BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing),
366+
CubePlanTopDownState::new(),
367+
);
368+
let Some((best_cost, best)) = extractor.find_best(root) else {
369+
return Err(CubeError::internal("Unable to find best plan".to_string()));
383370
};
371+
log::debug!("Best cost: {:#?}", best_cost);
372+
384373
let qtrace_best_graph = if Qtrace::is_enabled() {
385374
best.as_ref().to_vec()
386375
} else {
@@ -474,12 +463,6 @@ impl Rewriter {
474463
.unwrap_or(true)
475464
}
476465

477-
pub fn top_down_extractor_enabled() -> bool {
478-
env::var("CUBESQL_TOP_DOWN_EXTRACTOR")
479-
.map(|v| v.to_lowercase() != "false")
480-
.unwrap_or(true)
481-
}
482-
483466
pub fn rewrite_rules(
484467
meta_context: Arc<MetaContext>,
485468
config_obj: Arc<dyn ConfigObj>,

rust/cubesql/cubesql/src/config/mod.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ pub trait ConfigObj: DIService + Debug {
115115
fn max_sessions(&self) -> usize;
116116

117117
fn no_implicit_order(&self) -> bool;
118-
119-
fn top_down_extractor(&self) -> bool;
120118
}
121119

122120
#[derive(Debug, Clone)]
@@ -137,7 +135,6 @@ pub struct ConfigObjImpl {
137135
pub non_streaming_query_max_row_limit: i32,
138136
pub max_sessions: usize,
139137
pub no_implicit_order: bool,
140-
pub top_down_extractor: bool,
141138
}
142139

143140
impl ConfigObjImpl {
@@ -175,7 +172,6 @@ impl ConfigObjImpl {
175172
non_streaming_query_max_row_limit: env_parse("CUBEJS_DB_QUERY_LIMIT", 50000),
176173
max_sessions: env_parse("CUBEJS_MAX_SESSIONS", 1024),
177174
no_implicit_order: env_parse("CUBESQL_SQL_NO_IMPLICIT_ORDER", true),
178-
top_down_extractor: env_parse("CUBESQL_TOP_DOWN_EXTRACTOR", true),
179175
}
180176
}
181177
}
@@ -242,10 +238,6 @@ impl ConfigObj for ConfigObjImpl {
242238
fn max_sessions(&self) -> usize {
243239
self.max_sessions
244240
}
245-
246-
fn top_down_extractor(&self) -> bool {
247-
self.top_down_extractor
248-
}
249241
}
250242

251243
impl Config {
@@ -278,7 +270,6 @@ impl Config {
278270
non_streaming_query_max_row_limit: 50000,
279271
max_sessions: 1024,
280272
no_implicit_order: true,
281-
top_down_extractor: true,
282273
}),
283274
}
284275
}

0 commit comments

Comments
 (0)