2020#include " util/fibers/fibers.h"
2121#include " util/fibers/stacktrace.h"
2222
23- ABSL_FLAG (bool , enable_heartbeat_eviction, true ,
24- " Enable eviction during heartbeat when memory is under pressure." );
25-
2623ABSL_FLAG (uint32_t , max_eviction_per_heartbeat, 100 ,
2724 " The maximum number of key-value pairs that will be deleted in each eviction "
2825 " when heartbeat based eviction is triggered under memory pressure." );
@@ -484,8 +481,7 @@ OpResult<DbSlice::PrimeItAndExp> DbSlice::FindInternal(const Context& cntx, std:
484481 if (caching_mode_ && IsValid (res.it )) {
485482 if (!change_cb_.empty ()) {
486483 auto bump_cb = [&](PrimeTable::bucket_iterator bit) {
487- DVLOG (2 ) << " Running callbacks for key " << key << " in dbid " << cntx.db_index ;
488- CallChangeCallbacks (cntx.db_index , bit);
484+ CallChangeCallbacks (cntx.db_index , key, bit);
489485 };
490486 db.prime .CVCUponBump (change_cb_.back ().first , res.it , bump_cb);
491487 }
@@ -565,8 +561,7 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
565561 CHECK (status == OpStatus::KEY_NOTFOUND || status == OpStatus::OUT_OF_MEMORY) << status;
566562
567563 // It's a new entry.
568- DVLOG (2 ) << " Running callbacks for key " << key << " in dbid " << cntx.db_index ;
569- CallChangeCallbacks (cntx.db_index , key);
564+ CallChangeCallbacks (cntx.db_index , key, {key});
570565
571566 // In case we are loading from rdb file or replicating we want to disable conservative memory
572567 // checks (inside PrimeEvictionPolicy::CanGrow) and reject insertions only after we pass max
@@ -598,8 +593,8 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
598593 CompactObj co_key{key};
599594 PrimeIterator it;
600595
601- // I try/catch just for sake of having a convenient place to set a breakpoint.
602- size_t table_before = db. prime . mem_usage ();
596+ size_t table_before = db. table_memory ();
597+
603598 try {
604599 it = db.prime .InsertNew (std::move (co_key), PrimeValue{}, evp);
605600 } catch (bad_alloc& e) {
@@ -608,19 +603,20 @@ OpResult<DbSlice::AddOrFindResult> DbSlice::AddOrFindInternal(const Context& cnt
608603 return OpStatus::OUT_OF_MEMORY;
609604 }
610605
611- table_memory_ += (db.prime .mem_usage () - table_before);
612606 size_t evicted_obj_bytes = 0 ;
613-
614- // We may still reach the state when our memory usage is above the limit even if we
615- // do not add new segments. For example, we have half full segments
616- // and we add new objects or update the existing ones and our memory usage grows.
617- if (evp. mem_budget () < 0 ) {
618- // TODO(roman): EvictObjects is too aggressive and it's messing with cache hit-rate .
619- // The regular eviction policy does a decent job though it may cross the passed limit
620- // a little bit. I do not consider it as a serious bug.
621- // evicted_obj_bytes = EvictObjects(-evp.mem_budget() , it, &db );
607+ if (evp. mem_budget () < 0 && apply_memory_limit) {
608+ // We may reach the state when our memory usage is below the limit even if we
609+ // do not add new segments. For example, we have half full segments
610+ // and we add new objects or update the existing ones and our memory usage grows.
611+ // We do not require for a single operation to unload the whole negative debt.
612+ // Instead, we create a positive, converging force that should help with freeing enough memory .
613+ // Free at least 256 bytes or 3% of the total debt.
614+ size_t evict_goal = std::max< size_t >( 256 , (-evp. mem_budget ()) / 32 );
615+ evicted_obj_bytes = FreeMemWithEvictionStep (cntx. db_index , it. segment_id (), evict_goal );
622616 }
623617
618+ table_memory_ += (db.table_memory () - table_before);
619+
624620 db.stats .inline_keys += it->first .IsInline ();
625621 AccountObjectMemory (key, it->first .ObjType (), it->first .MallocUsed (), &db); // Account for key
626622
@@ -709,7 +705,7 @@ void DbSlice::FlushSlotsFb(const cluster::SlotSet& slot_ids) {
709705 string_view key = get<string_view>(req.change );
710706 table->CVCUponInsert (
711707 next_version, key,
712- [this , db_index, next_version, iterate_bucket](PrimeTable::bucket_iterator it) {
708+ [db_index, next_version, iterate_bucket](PrimeTable::bucket_iterator it) {
713709 DCHECK_LT (it.GetVersion (), next_version);
714710 iterate_bucket (db_index, it);
715711 });
@@ -762,7 +758,7 @@ void DbSlice::FlushDbIndexes(const std::vector<DbIndex>& indexes) {
762758 }
763759
764760 CHECK (fetched_items_.empty ());
765- auto cb = [this , indexes, flush_db_arr = std::move (flush_db_arr)]() mutable {
761+ auto cb = [indexes, flush_db_arr = std::move (flush_db_arr)]() mutable {
766762 flush_db_arr.clear ();
767763 ServerState::tlocal ()->DecommitMemory (ServerState::kDataHeap | ServerState::kBackingHeap |
768764 ServerState::kGlibcmalloc );
@@ -1023,9 +1019,7 @@ bool DbSlice::CheckLock(IntentLock::Mode mode, DbIndex dbid, uint64_t fp) const
10231019}
10241020
10251021void DbSlice::PreUpdate (DbIndex db_ind, Iterator it, std::string_view key) {
1026- DVLOG (2 ) << " Running callbacks in dbid " << db_ind;
1027- CallChangeCallbacks (db_ind, ChangeReq{it.GetInnerIt ()});
1028-
1022+ CallChangeCallbacks (db_ind, key, ChangeReq{it.GetInnerIt ()});
10291023 it.GetInnerIt ().SetVersion (NextVersion ());
10301024}
10311025
@@ -1217,25 +1211,22 @@ int32_t DbSlice::GetNextSegmentForEviction(int32_t segment_id, DbIndex db_ind) c
12171211 db_arr_[db_ind]->prime .GetSegmentCount ();
12181212}
12191213
1220- void DbSlice::FreeMemWithEvictionStep (DbIndex db_ind, size_t increase_goal_bytes) {
1214+ size_t DbSlice::FreeMemWithEvictionStep (DbIndex db_ind, size_t starting_segment_id,
1215+ size_t increase_goal_bytes) {
12211216 DCHECK (!owner_->IsReplica ());
1222- if ((!caching_mode_) || !expire_allowed_ || ! GetFlag (FLAGS_enable_heartbeat_eviction) )
1223- return ;
1217+ if ((!caching_mode_) || !expire_allowed_)
1218+ return 0 ;
12241219
12251220 auto max_eviction_per_hb = GetFlag (FLAGS_max_eviction_per_heartbeat);
12261221 auto max_segment_to_consider = GetFlag (FLAGS_max_segment_to_consider);
12271222
12281223 auto time_start = absl::GetCurrentTimeNanos ();
12291224 auto & db_table = db_arr_[db_ind];
1230- int32_t num_segments = db_table->prime .GetSegmentCount ();
1231- int32_t num_buckets = PrimeTable::Segment_t::kTotalBuckets ;
1232- int32_t num_slots = PrimeTable::Segment_t::kSlotNum ;
1225+ constexpr int32_t num_buckets = PrimeTable::Segment_t::kTotalBuckets ;
1226+ constexpr int32_t num_slots = PrimeTable::Segment_t::kSlotNum ;
12331227
1234- size_t used_memory_after;
1235- size_t evicted = 0 ;
1228+ size_t evicted_items = 0 , evicted_bytes = 0 ;
12361229 string tmp;
1237- int32_t starting_segment_id = rand () % num_segments;
1238- size_t used_memory_before = owner_->UsedMemory ();
12391230
12401231 bool record_keys = owner_->journal () != nullptr || expired_keys_events_recording_;
12411232 vector<string> keys_to_journal;
@@ -1257,7 +1248,7 @@ void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes
12571248 continue ;
12581249
12591250 auto evict_it = db_table->prime .GetIterator (segment_id, bucket_id, slot_id);
1260- if (evict_it->first .IsSticky ())
1251+ if (evict_it->first .IsSticky () || !evict_it-> second . HasAllocated () )
12611252 continue ;
12621253
12631254 // check if the key is locked by looking up transaction table.
@@ -1269,13 +1260,12 @@ void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes
12691260 if (record_keys)
12701261 keys_to_journal.emplace_back (key);
12711262
1263+ evicted_bytes += evict_it->second .MallocUsed ();
1264+ ++evicted_items;
12721265 PerformDeletion (Iterator (evict_it, StringOrView::FromView (key)), db_table.get ());
1273- ++evicted;
12741266
1275- used_memory_after = owner_->UsedMemory ();
12761267 // returns when whichever condition is met first
1277- if ((evicted == max_eviction_per_hb) ||
1278- (used_memory_before - used_memory_after >= increase_goal_bytes))
1268+ if ((evicted_items == max_eviction_per_hb) || (evicted_bytes >= increase_goal_bytes))
12791269 goto finish;
12801270 }
12811271 }
@@ -1294,12 +1284,12 @@ void DbSlice::FreeMemWithEvictionStep(DbIndex db_ind, size_t increase_goal_bytes
12941284 }
12951285
12961286 auto time_finish = absl::GetCurrentTimeNanos ();
1297- events_.evicted_keys += evicted;
1298- DVLOG (2 ) << " Memory usage before eviction: " << used_memory_before;
1299- DVLOG (2 ) << " Memory usage after eviction: " << used_memory_after;
1300- DVLOG (2 ) << " Number of keys evicted / max eviction per hb: " << evicted << " /"
1287+ events_.evicted_keys += evicted_items;
1288+ DVLOG (2 ) << " Evicted: " << evicted_bytes;
1289+ DVLOG (2 ) << " Number of keys evicted / max eviction per hb: " << evicted_items << " /"
13011290 << max_eviction_per_hb;
13021291 DVLOG (2 ) << " Eviction time (us): " << (time_finish - time_start) / 1000 ;
1292+ return evicted_bytes;
13031293}
13041294
13051295void DbSlice::CreateDb (DbIndex db_ind) {
@@ -1310,93 +1300,6 @@ void DbSlice::CreateDb(DbIndex db_ind) {
13101300 }
13111301}
13121302
1313- // "it" is the iterator that we just added/updated and it should not be deleted.
1314- // "table" is the instance where we should delete the objects from.
1315- size_t DbSlice::EvictObjects (size_t memory_to_free, Iterator it, DbTable* table) {
1316- if (owner_->IsReplica ()) {
1317- return 0 ;
1318- }
1319- PrimeTable::Segment_t* segment = table->prime .GetSegment (it.GetInnerIt ().segment_id ());
1320- DCHECK (segment);
1321-
1322- constexpr unsigned kNumStashBuckets = PrimeTable::Segment_t::kStashBucketNum ;
1323- constexpr unsigned kNumRegularBuckets = PrimeTable::Segment_t::kBucketNum ;
1324-
1325- PrimeTable::bucket_iterator it2 (it.GetInnerIt ());
1326- unsigned evicted = 0 ;
1327- bool evict_succeeded = false ;
1328-
1329- EngineShard* shard = owner_;
1330- size_t used_memory_start = shard->UsedMemory ();
1331-
1332- auto freed_memory_fun = [&] {
1333- size_t current = shard->UsedMemory ();
1334- return current < used_memory_start ? used_memory_start - current : 0 ;
1335- };
1336-
1337- for (unsigned i = 0 ; !evict_succeeded && i < kNumStashBuckets ; ++i) {
1338- unsigned stash_bid = i + kNumRegularBuckets ;
1339- const auto & bucket = segment->GetBucket (stash_bid);
1340- if (bucket.IsEmpty ())
1341- continue ;
1342-
1343- for (int slot_id = PrimeTable::Segment_t::kSlotNum - 1 ; slot_id >= 0 ; --slot_id) {
1344- if (!bucket.IsBusy (slot_id))
1345- continue ;
1346-
1347- auto evict_it = table->prime .GetIterator (it.GetInnerIt ().segment_id (), stash_bid, slot_id);
1348- // skip the iterator that we must keep or the sticky items.
1349- if (evict_it == it.GetInnerIt () || evict_it->first .IsSticky ())
1350- continue ;
1351-
1352- PerformDeletion (evict_it, table);
1353- ++evicted;
1354-
1355- if (freed_memory_fun () > memory_to_free) {
1356- evict_succeeded = true ;
1357- break ;
1358- }
1359- }
1360- }
1361-
1362- if (evicted) {
1363- DVLOG (1 ) << " Evicted " << evicted << " stashed items, freed " << freed_memory_fun () << " bytes" ;
1364- }
1365-
1366- // Try normal buckets now. We iterate from largest slot to smallest across the whole segment.
1367- for (int slot_id = PrimeTable::Segment_t::kSlotNum - 1 ; !evict_succeeded && slot_id >= 0 ;
1368- --slot_id) {
1369- for (unsigned i = 0 ; i < kNumRegularBuckets ; ++i) {
1370- unsigned bid = (it.GetInnerIt ().bucket_id () + i) % kNumRegularBuckets ;
1371- const auto & bucket = segment->GetBucket (bid);
1372- if (!bucket.IsBusy (slot_id))
1373- continue ;
1374-
1375- auto evict_it = table->prime .GetIterator (it.GetInnerIt ().segment_id (), bid, slot_id);
1376- if (evict_it == it.GetInnerIt () || evict_it->first .IsSticky ())
1377- continue ;
1378-
1379- PerformDeletion (evict_it, table);
1380- ++evicted;
1381-
1382- if (freed_memory_fun () > memory_to_free) {
1383- evict_succeeded = true ;
1384- break ;
1385- }
1386- }
1387- }
1388-
1389- if (evicted) {
1390- DVLOG (1 ) << " Evicted total: " << evicted << " items, freed " << freed_memory_fun () << " bytes "
1391- << " success: " << evict_succeeded;
1392-
1393- events_.evicted_keys += evicted;
1394- events_.hard_evictions += evicted;
1395- }
1396-
1397- return freed_memory_fun ();
1398- };
1399-
14001303void DbSlice::RegisterWatchedKey (DbIndex db_indx, std::string_view key,
14011304 ConnectionState::ExecInfo* exec_info) {
14021305 db_arr_[db_indx]->watched_keys [key].push_back (exec_info);
@@ -1566,7 +1469,11 @@ void DbSlice::OnCbFinish() {
15661469 fetched_items_.clear ();
15671470}
15681471
1569- void DbSlice::CallChangeCallbacks (DbIndex id, const ChangeReq& cr) const {
1472+ void DbSlice::CallChangeCallbacks (DbIndex id, std::string_view key, const ChangeReq& cr) const {
1473+ if (change_cb_.empty ())
1474+ return ;
1475+
1476+ DVLOG (2 ) << " Running callbacks for key " << key << " in dbid " << id;
15701477 FetchedItemsRestorer fetched_restorer (&fetched_items_);
15711478 std::unique_lock<LocalBlockingCounter> lk (block_counter_);
15721479
0 commit comments