@@ -22,6 +22,7 @@ use parquet::arrow::ArrowWriter;
22
22
use parquet:: basic:: Encoding ;
23
23
use parquet:: file:: properties:: EnabledStatistics ;
24
24
use parquet:: file:: properties:: WriterProperties ;
25
+ use parquet:: file:: properties:: WriterVersion ;
25
26
use parquet:: format:: FileMetaData ;
26
27
27
28
/// Serialize data blocks to parquet format.
@@ -30,17 +31,41 @@ pub fn blocks_to_parquet(
30
31
blocks : Vec < DataBlock > ,
31
32
write_buffer : & mut Vec < u8 > ,
32
33
compression : TableCompression ,
34
+ enable_encoding : bool ,
33
35
) -> Result < FileMetaData > {
34
36
assert ! ( !blocks. is_empty( ) ) ;
35
- let props = WriterProperties :: builder ( )
37
+ let builder = WriterProperties :: builder ( )
36
38
. set_compression ( compression. into ( ) )
37
39
// use `usize::MAX` to effectively limit the number of row groups to 1
38
40
. set_max_row_group_size ( usize:: MAX )
39
- . set_encoding ( Encoding :: PLAIN )
40
- . set_dictionary_enabled ( false )
41
41
. set_statistics_enabled ( EnabledStatistics :: None )
42
- . set_bloom_filter_enabled ( false )
43
- . build ( ) ;
42
+ . set_bloom_filter_enabled ( false ) ;
43
+
44
+ let builder = if enable_encoding {
45
+ // Enable dictionary encoding and fallback encodings.
46
+ //
47
+ // Memo for quick lookup:
48
+ // The fallback encoding "strategy" used by parquet-54.2.1 is:
49
+ //
50
+ // ~~~
51
+ // (Type::BOOLEAN, WriterVersion::PARQUET_2_0) => Encoding::RLE,
52
+ // (Type::INT32, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
53
+ // (Type::INT64, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
54
+ // (Type::BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
55
+ // (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
56
+ // _ => Encoding::PLAIN,
57
+ // ~~~
58
+ //
59
+ builder
60
+ . set_writer_version ( WriterVersion :: PARQUET_2_0 )
61
+ . set_dictionary_enabled ( true )
62
+ } else {
63
+ builder
64
+ . set_dictionary_enabled ( false )
65
+ . set_encoding ( Encoding :: PLAIN )
66
+ } ;
67
+
68
+ let props = builder. build ( ) ;
44
69
let batches = blocks
45
70
. into_iter ( )
46
71
. map ( |block| block. to_record_batch ( table_schema) )
0 commit comments