Skip to content

Commit 0389734

Browse files
authored
chore(optimizer): add tpcds unit test for optimizer (#17605)
* add tpcds unit test for optimizer, easy to show and debug * change the tpcds optimizer to file * change snow plan from json to tree * add q1.yaml * fix yarm lint error * fix foramt lint
1 parent 3d378ee commit 0389734

34 files changed

+1541
-1
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ serde_stacker = { version = "0.1" }
461461
serde_test = "1.0"
462462
serde_urlencoded = "0.7.1"
463463
serde_with = { version = "3.8.1" }
464+
serde_yaml = { version = "0.9.34" }
464465
serfig = "0.1.0"
465466
sha1 = "0.10.5"
466467
sha2 = "0.10.8"

src/query/catalog/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,6 @@ pub mod table_args;
3131
pub mod table_context;
3232
pub mod table_function;
3333
pub mod table_with_options;
34+
35+
pub use statistics::BasicColumnStatistics;
36+
pub use table::TableStatistics;

src/query/service/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,8 @@ mysql_async = { workspace = true }
190190
p256 = { workspace = true }
191191
pretty_assertions = { workspace = true }
192192
reqwest = { workspace = true }
193+
serde_json.workspace = true
194+
serde_yaml = { workspace = true }
193195
temp-env = { workspace = true }
194196
tempfile = { workspace = true }
195197
tower = { workspace = true }
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# TPC-DS Optimizer Test Data
2+
3+
This directory contains test data for TPC-DS optimizer tests. The tests are structured as follows:
4+
5+
## Directory Structure
6+
7+
```
8+
data
9+
├── tables/ # SQL table definitions
10+
└── yaml/ # YAML test case definitions
11+
```
12+
13+
## YAML Test Case Format
14+
15+
Each test case is defined in a YAML file with the following structure:
16+
17+
```yaml
18+
name: "Q3" # Test case name
19+
description: "Test description" # Optional description
20+
21+
sql: | # SQL query to test
22+
SELECT ...
23+
24+
table_statistics: # Table statistics
25+
table_name:
26+
num_rows: 1000
27+
data_size: 102400
28+
data_size_compressed: 51200
29+
index_size: 20480
30+
number_of_blocks: 10
31+
number_of_segments: 2
32+
33+
column_statistics: # Column statistics
34+
table_name.column_name:
35+
min: 1990 # Min value (can be number or string)
36+
max: 2000 # Max value (can be number or string)
37+
ndv: 10 # Number of distinct values
38+
null_count: 0 # Number of null values
39+
40+
raw_plan: | # Expected raw plan
41+
...
42+
43+
optimized_plan: | # Expected optimized plan
44+
...
45+
46+
snow_plan: | # Optional expected snowflake plan
47+
...
48+
```
49+
50+
## Table Definitions
51+
52+
Table definitions are stored in SQL files in the `tables` directory. Each file contains a `CREATE TABLE` statement for a specific table used in the tests.
53+
54+
## Adding New Tests
55+
56+
To add a new test case:
57+
58+
1. Create a new YAML file in the `yaml` directory with the test case definition.
59+
2. If the test uses new tables, add the table definitions to the `tables` directory.
60+
3. The test runner will automatically discover and run all test cases in the `yaml` directory.
61+
62+
## Updating Existing Tests
63+
64+
If the expected output of a test changes (e.g., due to optimizer improvements):
65+
66+
1. Run the test to see the actual output.
67+
2. Update the `raw_plan`, `optimized_plan`, or `snow_plan` field in the YAML file to match the actual output.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
CREATE OR REPLACE TABLE call_center
2+
(
3+
cc_call_center_sk integer ,
4+
cc_call_center_id char(16) ,
5+
cc_rec_start_date date null,
6+
cc_rec_end_date date null,
7+
cc_closed_date_sk integer null,
8+
cc_open_date_sk integer null,
9+
cc_name varchar(50) null,
10+
cc_class varchar(50) null,
11+
cc_employees integer null,
12+
cc_sq_ft integer null,
13+
cc_hours char(20) null,
14+
cc_manager varchar(40) null,
15+
cc_mkt_id integer null,
16+
cc_mkt_class char(50) null,
17+
cc_mkt_desc varchar(100) null,
18+
cc_market_manager varchar(40) null,
19+
cc_division integer null,
20+
cc_division_name varchar(50) null,
21+
cc_company integer null,
22+
cc_company_name char(50) null,
23+
cc_street_number char(10) null,
24+
cc_street_name varchar(60) null,
25+
cc_street_type char(15) null,
26+
cc_suite_number char(10) null,
27+
cc_city varchar(60) null,
28+
cc_county varchar(30) null,
29+
cc_state char(2) null,
30+
cc_zip char(10) null,
31+
cc_country varchar(20) null,
32+
cc_gmt_offset decimal(5,2) null,
33+
cc_tax_percentage decimal(5,2) null
34+
);
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
CREATE OR REPLACE TABLE catalog_page
2+
(
3+
cp_catalog_page_sk integer ,
4+
cp_catalog_page_id char(16) ,
5+
cp_start_date_sk integer null,
6+
cp_end_date_sk integer null,
7+
cp_department varchar(50) null,
8+
cp_catalog_number integer null,
9+
cp_catalog_page_number integer null,
10+
cp_description varchar(100) null,
11+
cp_type varchar(100) null
12+
);
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
CREATE OR REPLACE TABLE catalog_returns
2+
(
3+
cr_returned_date_sk integer null,
4+
cr_returned_time_sk integer null,
5+
cr_item_sk integer ,
6+
cr_refunded_customer_sk integer null,
7+
cr_refunded_cdemo_sk integer null,
8+
cr_refunded_hdemo_sk integer null,
9+
cr_refunded_addr_sk integer null,
10+
cr_returning_customer_sk integer null,
11+
cr_returning_cdemo_sk integer null,
12+
cr_returning_hdemo_sk integer null,
13+
cr_returning_addr_sk integer null,
14+
cr_call_center_sk integer null,
15+
cr_catalog_page_sk integer null,
16+
cr_ship_mode_sk integer null,
17+
cr_warehouse_sk integer null,
18+
cr_reason_sk integer null,
19+
cr_order_number integer ,
20+
cr_return_quantity integer null,
21+
cr_return_amount decimal(7,2) null,
22+
cr_return_tax decimal(7,2) null,
23+
cr_return_amt_inc_tax decimal(7,2) null,
24+
cr_fee decimal(7,2) null,
25+
cr_return_ship_cost decimal(7,2) null,
26+
cr_refunded_cash decimal(7,2) null,
27+
cr_reversed_charge decimal(7,2) null,
28+
cr_store_credit decimal(7,2) null,
29+
cr_net_loss decimal(7,2) null
30+
);
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
CREATE OR REPLACE TABLE customer
2+
(
3+
c_customer_sk integer ,
4+
c_customer_id char(16) ,
5+
c_current_cdemo_sk integer null,
6+
c_current_hdemo_sk integer null,
7+
c_current_addr_sk integer null,
8+
c_first_shipto_date_sk integer null,
9+
c_first_sales_date_sk integer null,
10+
c_salutation char(10) null,
11+
c_first_name char(20) null,
12+
c_last_name char(30) null,
13+
c_preferred_cust_flag char(1) null,
14+
c_birth_day integer null,
15+
c_birth_month integer null,
16+
c_birth_year integer null,
17+
c_birth_country varchar(20) null,
18+
c_login char(13) null,
19+
c_email_address char(50) null,
20+
c_last_review_date_sk integer null
21+
);
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
CREATE OR REPLACE TABLE customer_address
2+
(
3+
ca_address_sk integer ,
4+
ca_address_id char(16) ,
5+
ca_street_number char(10) null,
6+
ca_street_name varchar(60) null,
7+
ca_street_type char(15) null,
8+
ca_suite_number char(10) null,
9+
ca_city varchar(60) null,
10+
ca_county varchar(30) null,
11+
ca_state char(2) null,
12+
ca_zip char(10) null,
13+
ca_country varchar(20) null,
14+
ca_gmt_offset decimal(5,2) null,
15+
ca_location_type char(20) null
16+
);

0 commit comments

Comments
 (0)