@@ -16,8 +16,8 @@ use std::collections::HashSet;
16
16
use std:: sync:: Arc ;
17
17
18
18
use chrono:: DateTime ;
19
- use chrono:: Days ;
20
19
use chrono:: Duration ;
20
+ use chrono:: TimeDelta ;
21
21
use chrono:: Utc ;
22
22
use databend_common_base:: base:: uuid:: Uuid ;
23
23
use databend_common_catalog:: table:: Table ;
@@ -32,6 +32,7 @@ use databend_common_storages_fuse::io::MetaReaders;
32
32
use databend_common_storages_fuse:: io:: SegmentsIO ;
33
33
use databend_common_storages_fuse:: io:: TableMetaLocationGenerator ;
34
34
use databend_common_storages_fuse:: FuseTable ;
35
+ use databend_common_storages_fuse:: RetentionPolicy ;
35
36
use databend_storages_common_cache:: CacheAccessor ;
36
37
use databend_storages_common_cache:: CacheManager ;
37
38
use databend_storages_common_cache:: LoadParams ;
@@ -89,57 +90,91 @@ pub async fn do_vacuum2(
89
90
let fuse_table = FuseTable :: try_from_table ( table) ?;
90
91
let start = std:: time:: Instant :: now ( ) ;
91
92
92
- let retention_period_in_days = if fuse_table. is_transient ( ) {
93
- 0
94
- } else {
95
- ctx. get_settings ( ) . get_data_retention_time_in_days ( ) ?
96
- } ;
93
+ let retention_policy = fuse_table. get_data_retention_policy ( ctx. as_ref ( ) ) ?;
97
94
98
- let is_vacuum_all = retention_period_in_days == 0 ;
95
+ // Indicates whether to use the current table snapshot as gc root,
96
+ // true means vacuum all the historical snapshots.
97
+ let mut is_vacuum_all = false ;
98
+ let mut respect_flash_back_with_lvt = None ;
99
99
100
- let Some ( lvt) = set_lvt ( fuse_table, ctx. as_ref ( ) , retention_period_in_days) . await ? else {
101
- return Ok ( vec ! [ ] ) ;
102
- } ;
100
+ let snapshots_before_lvt = match retention_policy {
101
+ RetentionPolicy :: ByTimePeriod ( delta_duration) => {
102
+ info ! ( "using by ByTimePeriod policy {:?}" , delta_duration) ;
103
+ let retention_period = if fuse_table. is_transient ( ) {
104
+ // For transient table, keep no history data
105
+ TimeDelta :: zero ( )
106
+ } else {
107
+ delta_duration
108
+ } ;
103
109
104
- ctx. set_status_info ( & format ! (
105
- "set lvt for table {} takes {:?}, lvt: {:?}" ,
106
- fuse_table. get_table_info( ) . desc,
107
- start. elapsed( ) ,
108
- lvt
109
- ) ) ;
110
+ is_vacuum_all = retention_period. is_zero ( ) ;
110
111
111
- let start = std:: time:: Instant :: now ( ) ;
112
- let snapshots_before_lvt = if is_vacuum_all {
113
- list_until_prefix (
114
- fuse_table,
115
- fuse_table
116
- . meta_location_generator ( )
117
- . snapshot_location_prefix ( ) ,
118
- fuse_table. snapshot_loc ( ) . unwrap ( ) . as_str ( ) ,
119
- true ,
120
- None ,
121
- )
122
- . await ?
123
- } else {
124
- list_until_timestamp (
125
- fuse_table,
126
- fuse_table
127
- . meta_location_generator ( )
128
- . snapshot_location_prefix ( ) ,
129
- lvt,
130
- true ,
131
- None ,
132
- )
133
- . await ?
112
+ let Some ( lvt) = set_lvt ( fuse_table, ctx. as_ref ( ) , retention_period) . await ? else {
113
+ return Ok ( vec ! [ ] ) ;
114
+ } ;
115
+
116
+ if respect_flash_back {
117
+ respect_flash_back_with_lvt = Some ( lvt) ;
118
+ }
119
+
120
+ ctx. set_status_info ( & format ! (
121
+ "set lvt for table {} takes {:?}, lvt: {:?}" ,
122
+ fuse_table. get_table_info( ) . desc,
123
+ start. elapsed( ) ,
124
+ lvt
125
+ ) ) ;
126
+
127
+ let snapshots_before_lvt =
128
+ collect_gc_candidate_by_retention_period ( fuse_table, lvt, is_vacuum_all) . await ?;
129
+ snapshots_before_lvt
130
+ }
131
+ RetentionPolicy :: ByNumOfSnapshotsToKeep ( num_snapshots_to_keep) => {
132
+ info ! (
133
+ "using by ByNumOfSnapshotsToKeep policy {:?}" ,
134
+ num_snapshots_to_keep
135
+ ) ;
136
+ // List the snapshot order by timestamp asc, till the current snapshot(inclusively).
137
+ let need_one_more = true ;
138
+ let mut snapshots = list_until_prefix (
139
+ fuse_table,
140
+ fuse_table
141
+ . meta_location_generator ( )
142
+ . snapshot_location_prefix ( ) ,
143
+ fuse_table. snapshot_loc ( ) . unwrap ( ) . as_str ( ) ,
144
+ need_one_more,
145
+ None ,
146
+ )
147
+ . await ?;
148
+
149
+ let len = snapshots. len ( ) ;
150
+ if len <= num_snapshots_to_keep {
151
+ // Only the current snapshot is there, done
152
+ return Ok ( vec ! [ ] ) ;
153
+ }
154
+ if num_snapshots_to_keep == 1 {
155
+ // Expecting only one snapshot left, which means that we can use the current snapshot
156
+ // as gc root, this flag will be propagated to the select_gc_root func later.
157
+ is_vacuum_all = true ;
158
+ }
159
+
160
+ // When selecting the GC root later, the last snapshot in `snapshots` is a candidate,
161
+ // but its commit status is uncertain, its previous snapshot is typically used as the GC root, except in the is_vacuum_all case.
162
+ //
163
+ // Therefore, during snapshot truncation, we keep 2 extra snapshots; see `select_gc_root` for details.
164
+ let num_candidates = len - num_snapshots_to_keep + 2 ;
165
+ snapshots. truncate ( num_candidates) ;
166
+ snapshots
167
+ }
134
168
} ;
135
169
136
170
let elapsed = start. elapsed ( ) ;
137
171
ctx. set_status_info ( & format ! (
138
- "list snapshots before lvt for table {} takes {:?}, snapshots_dir: {:?}, lvt : {:?}, snapshots: {:?}" ,
172
+ "list snapshots for table {} takes {:?}, snapshots_dir: {:?}, snapshots: {:?}" ,
139
173
fuse_table. get_table_info( ) . desc,
140
174
elapsed,
141
- fuse_table. meta_location_generator( ) . snapshot_location_prefix( ) ,
142
- lvt,
175
+ fuse_table
176
+ . meta_location_generator( )
177
+ . snapshot_location_prefix( ) ,
143
178
slice_summary( & snapshots_before_lvt)
144
179
) ) ;
145
180
@@ -148,9 +183,8 @@ pub async fn do_vacuum2(
148
183
fuse_table,
149
184
& snapshots_before_lvt,
150
185
is_vacuum_all,
151
- respect_flash_back ,
186
+ respect_flash_back_with_lvt ,
152
187
ctx. clone ( ) . get_abort_checker ( ) ,
153
- lvt,
154
188
)
155
189
. await ?
156
190
else {
@@ -341,13 +375,45 @@ pub async fn do_vacuum2(
341
375
Ok ( files_to_gc)
342
376
}
343
377
378
+ async fn collect_gc_candidate_by_retention_period (
379
+ fuse_table : & FuseTable ,
380
+ lvt : DateTime < Utc > ,
381
+ is_vacuum_all : bool ,
382
+ ) -> Result < Vec < Entry > > {
383
+ let snapshots_before_lvt = if is_vacuum_all {
384
+ list_until_prefix (
385
+ fuse_table,
386
+ fuse_table
387
+ . meta_location_generator ( )
388
+ . snapshot_location_prefix ( ) ,
389
+ fuse_table. snapshot_loc ( ) . unwrap ( ) . as_str ( ) ,
390
+ true ,
391
+ None ,
392
+ )
393
+ . await ?
394
+ } else {
395
+ list_until_timestamp (
396
+ fuse_table,
397
+ fuse_table
398
+ . meta_location_generator ( )
399
+ . snapshot_location_prefix ( ) ,
400
+ lvt,
401
+ true ,
402
+ None ,
403
+ )
404
+ . await ?
405
+ } ;
406
+
407
+ Ok ( snapshots_before_lvt)
408
+ }
409
+
344
410
/// Try set lvt as min(latest_snapshot.timestamp, now - retention_time).
345
411
///
346
412
/// Return `None` means we stop vacuumming, but don't want to report error to user.
347
413
async fn set_lvt (
348
414
fuse_table : & FuseTable ,
349
415
ctx : & dyn TableContext ,
350
- retention : u64 ,
416
+ retention_period : TimeDelta ,
351
417
) -> Result < Option < DateTime < Utc > > > {
352
418
let Some ( latest_snapshot) = fuse_table. read_table_snapshot ( ) . await ? else {
353
419
info ! (
@@ -366,7 +432,7 @@ async fn set_lvt(
366
432
let cat = ctx. get_default_catalog ( ) ?;
367
433
// safe to unwrap, as we have checked the version is v5
368
434
let latest_ts = latest_snapshot. timestamp . unwrap ( ) ;
369
- let lvt_point_candidate = std:: cmp:: min ( Utc :: now ( ) - Days :: new ( retention ) , latest_ts) ;
435
+ let lvt_point_candidate = std:: cmp:: min ( Utc :: now ( ) - retention_period , latest_ts) ;
370
436
371
437
let lvt_point = cat
372
438
. set_table_lvt (
@@ -391,6 +457,7 @@ async fn list_until_prefix(
391
457
gc_root_meta_ts : Option < DateTime < Utc > > ,
392
458
) -> Result < Vec < Entry > > {
393
459
info ! ( "list until prefix: {}" , until) ;
460
+ eprintln ! ( "list until prefix inside: {}" , until) ;
394
461
let dal = fuse_table. get_operator_ref ( ) ;
395
462
396
463
match dal. info ( ) . scheme ( ) {
@@ -457,8 +524,10 @@ async fn fs_list_until_prefix(
457
524
let mut res = Vec :: new ( ) ;
458
525
for entry in entries {
459
526
if entry. path ( ) >= until {
527
+ eprintln ! ( "entry path: {} >= until: {}" , entry. path( ) , until) ;
460
528
info ! ( "entry path: {} >= until: {}" , entry. path( ) , until) ;
461
529
if need_one_more {
530
+ eprintln ! ( "kept" ) ;
462
531
res. push ( entry) ;
463
532
}
464
533
break ;
@@ -538,14 +607,13 @@ async fn select_gc_root(
538
607
fuse_table : & FuseTable ,
539
608
snapshots_before_lvt : & [ Entry ] ,
540
609
is_vacuum_all : bool ,
541
- respect_flash_back : bool ,
610
+ respect_flash_back : Option < DateTime < Utc > > ,
542
611
abort_checker : AbortChecker ,
543
- lvt : DateTime < Utc > ,
544
612
) -> Result < Option < ( Arc < TableSnapshot > , Vec < String > , DateTime < Utc > ) > > {
545
613
let gc_root_path = if is_vacuum_all {
546
614
// safe to unwrap, or we should have stopped vacuuming in set_lvt()
547
615
fuse_table. snapshot_loc ( ) . unwrap ( )
548
- } else if respect_flash_back {
616
+ } else if let Some ( lvt ) = respect_flash_back {
549
617
let latest_location = fuse_table. snapshot_loc ( ) . unwrap ( ) ;
550
618
let gc_root = fuse_table
551
619
. find ( latest_location, abort_checker, |snapshot| {
@@ -580,6 +648,8 @@ async fn select_gc_root(
580
648
gc_root_path
581
649
} ;
582
650
651
+ eprintln ! ( "gc root path {}" , gc_root_path) ;
652
+
583
653
let dal = fuse_table. get_operator_ref ( ) ;
584
654
let gc_root = read_snapshot_from_location ( fuse_table, & gc_root_path) . await ;
585
655
@@ -637,6 +707,8 @@ async fn select_gc_root(
637
707
} ) ?;
638
708
let snapshots_to_gc = gc_candidates[ ..gc_root_idx] . to_vec ( ) ;
639
709
710
+ eprintln ! ( "snapshots to gc {:?}" , snapshots_to_gc) ;
711
+
640
712
Ok ( Some ( ( gc_root, snapshots_to_gc, gc_root_meta_ts) ) )
641
713
}
642
714
Err ( e) => {
0 commit comments