20
20
#include " util/fibers/fibers.h"
21
21
#include " util/fibers/stacktrace.h"
22
22
23
- ABSL_FLAG (bool , enable_heartbeat_eviction, true ,
24
- " Enable eviction during heartbeat when memory is under pressure." );
25
-
26
23
ABSL_FLAG (uint32_t , max_eviction_per_heartbeat, 100 ,
27
24
" The maximum number of key-value pairs that will be deleted in each eviction "
28
25
" when heartbeat based eviction is triggered under memory pressure." );
@@ -484,8 +481,7 @@ OpResult<DbSlice::PrimeItAndExp> DbSlice::FindInternal(const Context& cntx, std:
484
481
if (caching_mode_ && IsValid (res.it )) {
485
482
if (!change_cb_.empty ()) {
486
483
auto bump_cb = [&](PrimeTable::bucket_iterator bit) {
487
- DVLOG (2 ) << " Running callbacks for key " << key << " in dbid " << cntx.db_index ;
488
- CallChangeCallbacks (cntx.db_index , bit);
484
+ CallChangeCallbacks (cntx.db_index , key, bit);
489
485
};
490
486
db.prime .CVCUponBump (change_cb_.back ().first , res.it , bump_cb);
491
487
}
@@ -565,8 +561,7 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
565
561
CHECK (status == OpStatus::KEY_NOTFOUND || status == OpStatus::OUT_OF_MEMORY) << status;
566
562
567
563
// It's a new entry.
568
- DVLOG (2 ) << " Running callbacks for key " << key << " in dbid " << cntx.db_index ;
569
- CallChangeCallbacks (cntx.db_index , key);
564
+ CallChangeCallbacks (cntx.db_index , key, {key});
570
565
571
566
// In case we are loading from rdb file or replicating we want to disable conservative memory
572
567
// checks (inside PrimeEvictionPolicy::CanGrow) and reject insertions only after we pass max
@@ -598,8 +593,8 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
598
593
CompactObj co_key{key};
599
594
PrimeIterator it;
600
595
601
- // I try/catch just for sake of having a convenient place to set a breakpoint.
602
- size_t table_before = db. prime . mem_usage ();
596
+ size_t table_before = db. table_memory ();
597
+
603
598
try {
604
599
it = db.prime .InsertNew (std::move (co_key), PrimeValue{}, evp);
605
600
} catch (bad_alloc& e) {
@@ -608,19 +603,20 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
608
603
return OpStatus::OUT_OF_MEMORY;
609
604
}
610
605
611
- table_memory_ += (db.prime .mem_usage () - table_before);
612
606
size_t evicted_obj_bytes = 0 ;
613
-
614
- // We may still reach the state when our memory usage is above the limit even if we
615
- // do not add new segments. For example, we have half full segments
616
- // and we add new objects or update the existing ones and our memory usage grows.
617
- if (evp. mem_budget () < 0 ) {
618
- // TODO(roman): EvictObjects is too aggressive and it's messing with cache hit-rate .
619
- // The regular eviction policy does a decent job though it may cross the passed limit
620
- // a little bit. I do not consider it as a serious bug.
621
- // evicted_obj_bytes = EvictObjects(-evp.mem_budget() , it, &db );
607
+ if (evp. mem_budget () < 0 && apply_memory_limit) {
608
+ // We may reach the state when our memory usage is below the limit even if we
609
+ // do not add new segments. For example, we have half full segments
610
+ // and we add new objects or update the existing ones and our memory usage grows.
611
+ // We do not require for a single operation to unload the whole negative debt.
612
+ // Instead, we create a positive, converging force that should help with freeing enough memory .
613
+ // Free at least 256 bytes or 3% of the total debt.
614
+ size_t evict_goal = std::max< size_t >( 256 , (-evp. mem_budget ()) / 32 );
615
+ evicted_obj_bytes = FreeMemWithEvictionStep (cntx. db_index , it. segment_id (), evict_goal );
622
616
}
623
617
618
+ table_memory_ += (db.table_memory () - table_before);
619
+
624
620
db.stats .inline_keys += it->first .IsInline ();
625
621
AccountObjectMemory (key, it->first .ObjType (), it->first .MallocUsed (), &db); // Account for key
626
622
@@ -709,7 +705,7 @@ void DbSlice::FlushSlotsFb(const cluster::SlotSet& slot_ids) {
709
705
string_view key = get<string_view>(req.change );
710
706
table->CVCUponInsert (
711
707
next_version, key,
712
- [this , db_index, next_version, iterate_bucket](PrimeTable::bucket_iterator it) {
708
+ [db_index, next_version, iterate_bucket](PrimeTable::bucket_iterator it) {
713
709
DCHECK_LT (it.GetVersion (), next_version);
714
710
iterate_bucket (db_index, it);
715
711
});
@@ -762,7 +758,7 @@ void DbSlice::FlushDbIndexes(const std::vector<DbIndex>& indexes) {
762
758
}
763
759
764
760
CHECK (fetched_items_.empty ());
765
- auto cb = [this , indexes, flush_db_arr = std::move (flush_db_arr)]() mutable {
761
+ auto cb = [indexes, flush_db_arr = std::move (flush_db_arr)]() mutable {
766
762
flush_db_arr.clear ();
767
763
ServerState::tlocal ()->DecommitMemory (ServerState::kDataHeap | ServerState::kBackingHeap |
768
764
ServerState::kGlibcmalloc );
@@ -1023,9 +1019,7 @@ bool DbSlice::CheckLock(IntentLock::Mode mode, DbIndex dbid, uint64_t fp) const
1023
1019
}
1024
1020
1025
1021
void DbSlice::PreUpdate (DbIndex db_ind, Iterator it, std::string_view key) {
1026
- DVLOG (2 ) << " Running callbacks in dbid " << db_ind;
1027
- CallChangeCallbacks (db_ind, ChangeReq{it.GetInnerIt ()});
1028
-
1022
+ CallChangeCallbacks (db_ind, key, ChangeReq{it.GetInnerIt ()});
1029
1023
it.GetInnerIt ().SetVersion (NextVersion ());
1030
1024
}
1031
1025
@@ -1217,25 +1211,22 @@ int32_t DbSlice::GetNextSegmentForEviction(int32_t segment_id, DbIndex db_ind) c
1217
1211
db_arr_[db_ind]->prime .GetSegmentCount ();
1218
1212
}
1219
1213
1220
- void DbSlice::FreeMemWithEvictionStep (DbIndex db_ind, size_t increase_goal_bytes) {
1214
+ size_t DbSlice::FreeMemWithEvictionStep (DbIndex db_ind, size_t starting_segment_id,
1215
+ size_t increase_goal_bytes) {
1221
1216
DCHECK (!owner_->IsReplica ());
1222
- if ((!caching_mode_) || !expire_allowed_ || ! GetFlag (FLAGS_enable_heartbeat_eviction) )
1223
- return ;
1217
+ if ((!caching_mode_) || !expire_allowed_)
1218
+ return 0 ;
1224
1219
1225
1220
auto max_eviction_per_hb = GetFlag (FLAGS_max_eviction_per_heartbeat);
1226
1221
auto max_segment_to_consider = GetFlag (FLAGS_max_segment_to_consider);
1227
1222
1228
1223
auto time_start = absl::GetCurrentTimeNanos ();
1229
1224
auto & db_table = db_arr_[db_ind];
1230
- int32_t num_segments = db_table->prime .GetSegmentCount ();
1231
- int32_t num_buckets = PrimeTable::Segment_t::kTotalBuckets ;
1232
- int32_t num_slots = PrimeTable::Segment_t::kSlotNum ;
1225
+ constexpr int32_t num_buckets = PrimeTable::Segment_t::kTotalBuckets ;
1226
+ constexpr int32_t num_slots = PrimeTable::Segment_t::kSlotNum ;
1233
1227
1234
- size_t used_memory_after;
1235
- size_t evicted = 0 ;
1228
+ size_t evicted_items = 0 , evicted_bytes = 0 ;
1236
1229
string tmp;
1237
- int32_t starting_segment_id = rand () % num_segments;
1238
- size_t used_memory_before = owner_->UsedMemory ();
1239
1230
1240
1231
bool record_keys = owner_->journal () != nullptr || expired_keys_events_recording_;
1241
1232
vector<string> keys_to_journal;
@@ -1257,7 +1248,7 @@ void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes
1257
1248
continue ;
1258
1249
1259
1250
auto evict_it = db_table->prime .GetIterator (segment_id, bucket_id, slot_id);
1260
- if (evict_it->first .IsSticky ())
1251
+ if (evict_it->first .IsSticky () || !evict_it-> second . HasAllocated () )
1261
1252
continue ;
1262
1253
1263
1254
// check if the key is locked by looking up transaction table.
@@ -1269,13 +1260,12 @@ void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes
1269
1260
if (record_keys)
1270
1261
keys_to_journal.emplace_back (key);
1271
1262
1263
+ evicted_bytes += evict_it->second .MallocUsed ();
1264
+ ++evicted_items;
1272
1265
PerformDeletion (Iterator (evict_it, StringOrView::FromView (key)), db_table.get ());
1273
- ++evicted;
1274
1266
1275
- used_memory_after = owner_->UsedMemory ();
1276
1267
// returns when whichever condition is met first
1277
- if ((evicted == max_eviction_per_hb) ||
1278
- (used_memory_before - used_memory_after >= increase_goal_bytes))
1268
+ if ((evicted_items == max_eviction_per_hb) || (evicted_bytes >= increase_goal_bytes))
1279
1269
goto finish;
1280
1270
}
1281
1271
}
@@ -1294,12 +1284,12 @@ void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes
1294
1284
}
1295
1285
1296
1286
auto time_finish = absl::GetCurrentTimeNanos ();
1297
- events_.evicted_keys += evicted;
1298
- DVLOG (2 ) << " Memory usage before eviction: " << used_memory_before;
1299
- DVLOG (2 ) << " Memory usage after eviction: " << used_memory_after;
1300
- DVLOG (2 ) << " Number of keys evicted / max eviction per hb: " << evicted << " /"
1287
+ events_.evicted_keys += evicted_items;
1288
+ DVLOG (2 ) << " Evicted: " << evicted_bytes;
1289
+ DVLOG (2 ) << " Number of keys evicted / max eviction per hb: " << evicted_items << " /"
1301
1290
<< max_eviction_per_hb;
1302
1291
DVLOG (2 ) << " Eviction time (us): " << (time_finish - time_start) / 1000 ;
1292
+ return evicted_bytes;
1303
1293
}
1304
1294
1305
1295
void DbSlice::CreateDb (DbIndex db_ind) {
@@ -1310,93 +1300,6 @@ void DbSlice::CreateDb(DbIndex db_ind) {
1310
1300
}
1311
1301
}
1312
1302
1313
- // "it" is the iterator that we just added/updated and it should not be deleted.
1314
- // "table" is the instance where we should delete the objects from.
1315
- size_t DbSlice::EvictObjects (size_t memory_to_free, Iterator it, DbTable* table) {
1316
- if (owner_->IsReplica ()) {
1317
- return 0 ;
1318
- }
1319
- PrimeTable::Segment_t* segment = table->prime .GetSegment (it.GetInnerIt ().segment_id ());
1320
- DCHECK (segment);
1321
-
1322
- constexpr unsigned kNumStashBuckets = PrimeTable::Segment_t::kStashBucketNum ;
1323
- constexpr unsigned kNumRegularBuckets = PrimeTable::Segment_t::kBucketNum ;
1324
-
1325
- PrimeTable::bucket_iterator it2 (it.GetInnerIt ());
1326
- unsigned evicted = 0 ;
1327
- bool evict_succeeded = false ;
1328
-
1329
- EngineShard* shard = owner_;
1330
- size_t used_memory_start = shard->UsedMemory ();
1331
-
1332
- auto freed_memory_fun = [&] {
1333
- size_t current = shard->UsedMemory ();
1334
- return current < used_memory_start ? used_memory_start - current : 0 ;
1335
- };
1336
-
1337
- for (unsigned i = 0 ; !evict_succeeded && i < kNumStashBuckets ; ++i) {
1338
- unsigned stash_bid = i + kNumRegularBuckets ;
1339
- const auto & bucket = segment->GetBucket (stash_bid);
1340
- if (bucket.IsEmpty ())
1341
- continue ;
1342
-
1343
- for (int slot_id = PrimeTable::Segment_t::kSlotNum - 1 ; slot_id >= 0 ; --slot_id) {
1344
- if (!bucket.IsBusy (slot_id))
1345
- continue ;
1346
-
1347
- auto evict_it = table->prime .GetIterator (it.GetInnerIt ().segment_id (), stash_bid, slot_id);
1348
- // skip the iterator that we must keep or the sticky items.
1349
- if (evict_it == it.GetInnerIt () || evict_it->first .IsSticky ())
1350
- continue ;
1351
-
1352
- PerformDeletion (evict_it, table);
1353
- ++evicted;
1354
-
1355
- if (freed_memory_fun () > memory_to_free) {
1356
- evict_succeeded = true ;
1357
- break ;
1358
- }
1359
- }
1360
- }
1361
-
1362
- if (evicted) {
1363
- DVLOG (1 ) << " Evicted " << evicted << " stashed items, freed " << freed_memory_fun () << " bytes" ;
1364
- }
1365
-
1366
- // Try normal buckets now. We iterate from largest slot to smallest across the whole segment.
1367
- for (int slot_id = PrimeTable::Segment_t::kSlotNum - 1 ; !evict_succeeded && slot_id >= 0 ;
1368
- --slot_id) {
1369
- for (unsigned i = 0 ; i < kNumRegularBuckets ; ++i) {
1370
- unsigned bid = (it.GetInnerIt ().bucket_id () + i) % kNumRegularBuckets ;
1371
- const auto & bucket = segment->GetBucket (bid);
1372
- if (!bucket.IsBusy (slot_id))
1373
- continue ;
1374
-
1375
- auto evict_it = table->prime .GetIterator (it.GetInnerIt ().segment_id (), bid, slot_id);
1376
- if (evict_it == it.GetInnerIt () || evict_it->first .IsSticky ())
1377
- continue ;
1378
-
1379
- PerformDeletion (evict_it, table);
1380
- ++evicted;
1381
-
1382
- if (freed_memory_fun () > memory_to_free) {
1383
- evict_succeeded = true ;
1384
- break ;
1385
- }
1386
- }
1387
- }
1388
-
1389
- if (evicted) {
1390
- DVLOG (1 ) << " Evicted total: " << evicted << " items, freed " << freed_memory_fun () << " bytes "
1391
- << " success: " << evict_succeeded;
1392
-
1393
- events_.evicted_keys += evicted;
1394
- events_.hard_evictions += evicted;
1395
- }
1396
-
1397
- return freed_memory_fun ();
1398
- };
1399
-
1400
1303
void DbSlice::RegisterWatchedKey (DbIndex db_indx, std::string_view key,
1401
1304
ConnectionState::ExecInfo* exec_info) {
1402
1305
db_arr_[db_indx]->watched_keys [key].push_back (exec_info);
@@ -1566,7 +1469,11 @@ void DbSlice::OnCbFinish() {
1566
1469
fetched_items_.clear ();
1567
1470
}
1568
1471
1569
- void DbSlice::CallChangeCallbacks (DbIndex id, const ChangeReq& cr) const {
1472
+ void DbSlice::CallChangeCallbacks (DbIndex id, std::string_view key, const ChangeReq& cr) const {
1473
+ if (change_cb_.empty ())
1474
+ return ;
1475
+
1476
+ DVLOG (2 ) << " Running callbacks for key " << key << " in dbid " << id;
1570
1477
FetchedItemsRestorer fetched_restorer (&fetched_items_);
1571
1478
std::unique_lock<LocalBlockingCounter> lk (block_counter_);
1572
1479
0 commit comments