From 5d2e6dc058c9bd0e0b5100c05d093fb42703566d Mon Sep 17 00:00:00 2001 From: yokofly Date: Mon, 4 Dec 2023 03:10:40 -0800 Subject: [PATCH 1/4] add a new visit pass again. --- src/Interpreters/RequiredSourceColumnsVisitor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index f86adc46cd9..7232f618362 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -144,6 +144,9 @@ void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const AS /// revisit select_expression_list (with children) when all the aliases are set Visitor(data).visit(select.select()); + + /// revisit select_expression_list again (see issue https://github.com/timeplus-io/proton/issues/356 ) + Visitor(data).visit(select.select()); } void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPtr &, Data & data) From 85330799347182d77318e6e99f66ab68cfaf64b3 Mon Sep 17 00:00:00 2001 From: yokofly Date: Mon, 4 Dec 2023 18:24:09 -0800 Subject: [PATCH 2/4] add smoke test --- .../test_stream_smoke/0099_fixed_issues.json | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tests/stream/test_stream_smoke/0099_fixed_issues.json b/tests/stream/test_stream_smoke/0099_fixed_issues.json index 54711c352c0..2733335e0bc 100644 --- a/tests/stream/test_stream_smoke/0099_fixed_issues.json +++ b/tests/stream/test_stream_smoke/0099_fixed_issues.json @@ -597,6 +597,38 @@ [3, 2] ]} ] - } + }, + { + "id": 21, + "tags": ["parser", "tree-rewriter"], + "name": "#356", + "description": "Fix parser opt bug. below 2 select query only change the order.", + "steps":[ + { + "statements": [ + {"client":"python", "query_type": "table", "query": "drop stream if exists cte1"}, + {"client":"python", "query_type": "table", "query": "drop stream if exists cte2"}, + {"client":"python", "query_type": "table", "exist": "cte1", "wait":1, "query": "create stream if not exists cte1(`Id` int32, `OrderQty` float, `LastQty` float);"}, + {"client":"python", "query_type": "table", "exist": "cte2", "wait":1, "query": "create stream if not exists cte2(`Id` int32, `OrderQty` float, `LastQty` float);"}, + {"client":"python", "query_type": "stream", "depends_on_stream":"cte1", "query_id":"9921_1", "query_end_timer":"3", "query":" SELECT least(cte1.LastQty, cte2.LastQty) AS OrderQty, cte1.LastQty + cte1.OrderQty AS LastQty FROM cte1 INNER JOIN cte2 ON cte1.Id = cte2.Id;"}, + {"client":"python", "query_type": "stream", "depends_on_stream":"cte1", "query_id":"9921_2", "query_end_timer":"3", "query":" SELECT cte1.LastQty + cte1.OrderQty AS LastQty, least(cte1.LastQty, cte2.LastQty) AS OrderQty FROM cte1 INNER JOIN cte2 ON cte1.Id = cte2.Id;"}, + {"client":"python", "query_type": "table", "depends_on_stream": "cte1", "wait":1, "query": "insert into cte1(Id, OrderQty, LastQty) values(1, 10, 2)(2, 15, 3)(3,20,4)"}, + {"client":"python", "query_type": "table", "depends_on_stream": "cte2", "wait":1, "query": "insert into cte2(Id, OrderQty, LastQty) values(1, 10, 2)(2, 15, 3)(3,20,4)"} + ] + } + ], + "expected_results": [ + {"query_id":"9921_1", "expected_results":[ + [2.0, 12.0], + [3.0, 18.0], + [4.0, 24.0] + ]}, + {"query_id":"9921_2", "expected_results":[ + [12.0, 2.0], + [18.0, 3.0], + [24.0, 4.0] + ]} + ] + } ] } \ No newline at end of file From 37d2682e6039f876881d50129d478b3acdd2beb2 Mon Sep 17 00:00:00 2001 From: yokofly Date: Wed, 6 Dec 2023 20:57:53 -0800 Subject: [PATCH 3/4] add a check before the re-scan the sql filed shall be small enough, the plain manual loop perf is enough compared with `std::ranges::all_of` but more read-able? https://quick-bench.com/q/azeLdyzDIqFnBYNokyg4K1HB6os --- src/Interpreters/RequiredSourceColumnsData.h | 15 +++++++++++++++ src/Interpreters/RequiredSourceColumnsVisitor.cpp | 5 ++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/RequiredSourceColumnsData.h b/src/Interpreters/RequiredSourceColumnsData.h index 3334cea5565..cdc7fc02046 100644 --- a/src/Interpreters/RequiredSourceColumnsData.h +++ b/src/Interpreters/RequiredSourceColumnsData.h @@ -51,6 +51,21 @@ struct RequiredSourceColumnsData NameSet requiredColumns() const; size_t nameInclusion(const String & name) const; + + /// proton: starts + bool needsScanAgain() const + { + if (complex_aliases.empty()) + return false; + + for (const auto & alias : complex_aliases) + { + if (required_names.contains(alias)) + return true; + } + return false; + } + /// proton: ends }; std::ostream & operator << (std::ostream & os, const RequiredSourceColumnsData & cols); diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index 7232f618362..cff0e1da2a5 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -145,8 +145,11 @@ void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const AS /// revisit select_expression_list (with children) when all the aliases are set Visitor(data).visit(select.select()); + /// proton: starts /// revisit select_expression_list again (see issue https://github.com/timeplus-io/proton/issues/356 ) - Visitor(data).visit(select.select()); + if (data.needsScanAgain()) + Visitor(data).visit(select.select()); + /// proton: ends } void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPtr &, Data & data) From 4b7e197a569c7973f5be6d340cbf3f5587adad52 Mon Sep 17 00:00:00 2001 From: yokofly Date: Fri, 8 Dec 2023 16:52:32 -0800 Subject: [PATCH 4/4] specify further about need scan check condition ``` // src/Interpreters/RequiredSourceColumnsData.cpp:10 bool RequiredSourceColumnsData::addColumnAliasIfAny(const IAST & ast) { if (required_names.contains(alias)) masked_columns.insert(alias); complex_aliases.insert(alias); return true; } ``` --- src/Interpreters/RequiredSourceColumnsData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/RequiredSourceColumnsData.h b/src/Interpreters/RequiredSourceColumnsData.h index cdc7fc02046..94c3294c8b7 100644 --- a/src/Interpreters/RequiredSourceColumnsData.h +++ b/src/Interpreters/RequiredSourceColumnsData.h @@ -60,7 +60,7 @@ struct RequiredSourceColumnsData for (const auto & alias : complex_aliases) { - if (required_names.contains(alias)) + if (!masked_columns.contains(alias) && required_names.contains(alias)) return true; } return false;