1
- #include " SourceColumnsDescription.h"
1
+ #include < Storages/Streaming/ SourceColumnsDescription.h>
2
2
3
3
#include < Core/Block.h>
4
4
#include < NativeLog/Record/Record.h>
5
5
#include < Storages/StorageSnapshot.h>
6
6
#include < base/ClockUtils.h>
7
7
#include < Common/ProtonCommon.h>
8
8
9
+ #include < numeric>
10
+
9
11
namespace DB
10
12
{
11
13
SourceColumnsDescription::PhysicalColumnPositions &
@@ -30,21 +32,39 @@ void SourceColumnsDescription::PhysicalColumnPositions::clear()
30
32
subcolumns.clear ();
31
33
}
32
34
33
- SourceColumnsDescription::SourceColumnsDescription (const Names & required_column_names, StorageSnapshotPtr storage_snapshot)
35
+ SourceColumnsDescription::SourceColumnsDescription (
36
+ const Names & required_column_names, StorageSnapshotPtr storage_snapshot, bool enable_partial_read)
34
37
: SourceColumnsDescription(
35
- storage_snapshot->getColumnsByNames (GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withVirtuals().withExtendedObjects(), required_column_names),
38
+ storage_snapshot->getColumnsByNames (
39
+ GetColumnsOptions (GetColumnsOptions::All).withSubcolumns().withVirtuals().withExtendedObjects(), required_column_names),
36
40
storage_snapshot->getMetadataForQuery()->getSampleBlock(),
37
- storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects()))
41
+ storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects()),
42
+ enable_partial_read)
38
43
{
39
44
}
40
45
41
- SourceColumnsDescription::SourceColumnsDescription (const NamesAndTypesList & columns_to_read, const Block & schema, const NamesAndTypesList & all_extended_columns)
46
+ SourceColumnsDescription::SourceColumnsDescription (
47
+ const NamesAndTypesList & columns_to_read,
48
+ const Block & schema,
49
+ const NamesAndTypesList & all_extended_columns,
50
+ bool enable_partial_read)
42
51
{
43
52
// / FIXME, when we have multi-version of schema, the header and the schema may be mismatched
44
53
auto column_size = columns_to_read.size ();
45
54
55
+ if (enable_partial_read)
56
+ {
57
+ // / Just read required partial physical columns
58
+ physical_column_positions_to_read.positions .reserve (column_size);
59
+ }
60
+ else
61
+ {
62
+ // / Read full physical columns
63
+ physical_column_positions_to_read.positions .resize (schema.columns ());
64
+ std::iota (physical_column_positions_to_read.positions .begin (), physical_column_positions_to_read.positions .end (), 0 );
65
+ }
66
+
46
67
positions.reserve (column_size);
47
- physical_column_positions_to_read.positions .reserve (column_size);
48
68
subcolumns_to_read.reserve (column_size);
49
69
50
70
std::vector<uint16_t > read_all_subcolumns_positions;
@@ -112,45 +132,48 @@ SourceColumnsDescription::SourceColumnsDescription(const NamesAndTypesList & col
112
132
auto pos_in_schema = schema.getPositionByName (name_in_storage);
113
133
const auto & column_in_storage = schema.getByName (name_in_storage);
114
134
115
- // / Calculate main column pos
116
- size_t physical_pos_in_schema_to_read = 0 ;
117
- // / We don't need to read duplicate physical columns from schema
118
- auto physical_pos_iter = std::find (
119
- physical_column_positions_to_read.positions .begin (), physical_column_positions_to_read.positions .end (), pos_in_schema);
120
- if (physical_pos_iter == physical_column_positions_to_read.positions .end ())
135
+ size_t physical_pos_in_schema_to_read = pos_in_schema;
136
+ // / Specially, re-calculate pos in partially read schema
137
+ if (enable_partial_read)
121
138
{
122
- physical_pos_in_schema_to_read = physical_column_positions_to_read.positions .size ();
123
- physical_column_positions_to_read.positions .emplace_back (pos_in_schema);
139
+ // / We don't need to read duplicate physical columns from schema
140
+ auto physical_pos_iter = std::find (
141
+ physical_column_positions_to_read.positions .begin (), physical_column_positions_to_read.positions .end (), pos_in_schema);
142
+ if (physical_pos_iter == physical_column_positions_to_read.positions .end ())
143
+ {
144
+ physical_pos_in_schema_to_read = physical_column_positions_to_read.positions .size ();
145
+ physical_column_positions_to_read.positions .emplace_back (pos_in_schema);
146
+ }
147
+ else
148
+ physical_pos_in_schema_to_read = physical_pos_iter - physical_column_positions_to_read.positions .begin ();
149
+ }
124
150
125
- // / json, array(json), tuple(..., json, ...)
126
- if (column_in_storage.type ->hasDynamicSubcolumns ())
151
+ // / json, array(json), tuple(..., json, ...)
152
+ if (column_in_storage.type ->hasDynamicSubcolumns ())
153
+ {
154
+ // / We like to read parent json column once if multiple subcolumns of the same json are required
155
+ // / like `select json.a, json.b from stream`
156
+ auto find_iter = std::find_if (
157
+ physical_object_columns_to_read.begin (),
158
+ physical_object_columns_to_read.end (),
159
+ [&name_in_storage](const auto & col_name_type) { return col_name_type.name == name_in_storage; });
160
+
161
+ if (find_iter == physical_object_columns_to_read.end ())
127
162
{
128
- // / We like to read parent json column once if multiple subcolumns of the same json are required
129
- // / like `select json.a, json.b from stream`
130
- auto find_iter = std::find_if (
131
- physical_object_columns_to_read.begin (),
132
- physical_object_columns_to_read.end (),
133
- [&column](const auto & col_name_type) { return col_name_type.name == column.name ; });
134
-
135
- if (find_iter == physical_object_columns_to_read.end ())
163
+ if (column.isSubcolumn ())
136
164
{
137
- if (column.isSubcolumn ())
138
- {
139
- // / When reading a subcolumn of a json like `select json.a from stream`, we will need read the parent `json` column
140
- auto name_and_type = all_extended_columns.tryGetByName (name_in_storage);
141
- assert (name_and_type);
142
- physical_object_columns_to_read.emplace_back (std::move (*name_and_type));
143
- }
144
- else
145
- {
146
- // / This column is parent json column, like `select json from stream`, use the name and type directly
147
- physical_object_columns_to_read.emplace_back (column);
148
- }
165
+ // / When reading a subcolumn of a json like `select json.a from stream`, we will need read the parent `json` column
166
+ auto name_and_type = all_extended_columns.tryGetByName (name_in_storage);
167
+ assert (name_and_type);
168
+ physical_object_columns_to_read.emplace_back (std::move (*name_and_type));
169
+ }
170
+ else
171
+ {
172
+ // / This column is parent json column, like `select json from stream`, use the name and type directly
173
+ physical_object_columns_to_read.emplace_back (column);
149
174
}
150
175
}
151
176
}
152
- else
153
- physical_pos_in_schema_to_read = physical_pos_iter - physical_column_positions_to_read.positions .begin ();
154
177
155
178
// / For subcolumn, which dependents on the main column
156
179
if (column.isSubcolumn ())
@@ -181,7 +204,7 @@ SourceColumnsDescription::SourceColumnsDescription(const NamesAndTypesList & col
181
204
physical_column_positions_to_read.subcolumns .erase (pos);
182
205
183
206
// / Clients like to read virtual columns only, add `_tp_time`, then we know how many rows
184
- if (physical_column_positions_to_read.positions .empty ())
207
+ if (enable_partial_read && physical_column_positions_to_read.positions .empty ())
185
208
physical_column_positions_to_read.positions .emplace_back (schema.getPositionByName (ProtonConsts::RESERVED_EVENT_TIME));
186
209
}
187
210
}
0 commit comments