Skip to content

Commit d1673e6

Browse files
committed
fix: do not check-fail in OpRestore (#4332)
fix: do not check-fail OpRestore In some rare cases we reach inconsistent state inside OpRestore where a key already exists, though it should not. In that case log the error instead of crashing the server. In addition, we update the existing entry to the latest restored value. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
1 parent ad3e037 commit d1673e6

File tree

2 files changed

+52
-33
lines changed

2 files changed

+52
-33
lines changed

src/server/db_slice.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ class DbSlice {
281281
Iterator it;
282282
ExpIterator exp_it;
283283
AutoUpdater post_updater;
284+
285+
bool IsValid() const {
286+
return !it.is_done();
287+
}
284288
};
285289

286290
ItAndUpdater FindMutable(const Context& cntx, std::string_view key);

src/server/generic_family.cc

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -120,24 +120,28 @@ class RestoreArgs {
120120
: expiration_(expiration), abs_time_(abs_time), replace_(replace) {
121121
}
122122

123-
constexpr bool Replace() const {
123+
bool Replace() const {
124124
return replace_;
125125
}
126126

127-
constexpr bool Sticky() const {
127+
bool Sticky() const {
128128
return sticky_;
129129
}
130130

131+
void SetSticky(bool sticky) {
132+
sticky_ = sticky;
133+
}
134+
131135
uint64_t ExpirationTime() const {
132136
DCHECK_GE(expiration_, 0);
133137
return expiration_;
134138
}
135139

136-
[[nodiscard]] constexpr bool Expired() const {
140+
bool Expired() const {
137141
return expiration_ < 0;
138142
}
139143

140-
[[nodiscard]] constexpr bool HasExpiration() const {
144+
bool HasExpiration() const {
141145
return expiration_ != NO_EXPIRATION;
142146
}
143147

@@ -152,9 +156,12 @@ class RdbRestoreValue : protected RdbLoaderBase {
152156
rdb_version_ = rdb_version;
153157
}
154158

155-
std::optional<DbSlice::ItAndUpdater> Add(std::string_view payload, std::string_view key,
156-
DbSlice& db_slice, const DbContext& cntx,
157-
const RestoreArgs& args, EngineShard* shard);
159+
// Returns default ItAndUpdater if Add failed.
160+
// In case a valid ItAndUpdater is returned, then second is true in case a new key is added,
161+
// false if the existing key is updated (should not happen unless we have a bug).
162+
pair<DbSlice::ItAndUpdater, bool> Add(string_view key, string_view payload, const DbContext& cntx,
163+
const RestoreArgs& args, DbSlice* db_slice,
164+
EngineShard* shard);
158165

159166
private:
160167
std::optional<OpaqueObj> Parse(io::Source* source);
@@ -185,18 +192,17 @@ std::optional<RdbLoaderBase::OpaqueObj> RdbRestoreValue::Parse(io::Source* sourc
185192
return std::optional<OpaqueObj>(std::move(obj));
186193
}
187194

188-
std::optional<DbSlice::ItAndUpdater> RdbRestoreValue::Add(std::string_view data,
189-
std::string_view key, DbSlice& db_slice,
190-
const DbContext& cntx,
191-
const RestoreArgs& args,
192-
EngineShard* shard) {
195+
pair<DbSlice::ItAndUpdater, bool> RdbRestoreValue::Add(string_view key, string_view data,
196+
const DbContext& cntx,
197+
const RestoreArgs& args, DbSlice* db_slice,
198+
EngineShard* shard) {
193199
InMemSource data_src(data);
194200
PrimeValue pv;
195201
bool first_parse = true;
196202
do {
197203
auto opaque_res = Parse(&data_src);
198204
if (!opaque_res) {
199-
return std::nullopt;
205+
return {};
200206
}
201207

202208
LoadConfig config;
@@ -212,16 +218,18 @@ std::optional<DbSlice::ItAndUpdater> RdbRestoreValue::Add(std::string_view data,
212218
if (auto ec = FromOpaque(*opaque_res, config, &pv); ec) {
213219
// we failed - report and exit
214220
LOG(WARNING) << "error while trying to read data: " << ec;
215-
return std::nullopt;
221+
return {};
216222
}
217223
} while (pending_read_.remaining > 0);
218224

219-
if (auto res = db_slice.AddNew(cntx, key, std::move(pv), args.ExpirationTime()); res) {
225+
if (auto res = db_slice->AddOrUpdate(cntx, key, std::move(pv), args.ExpirationTime()); res) {
220226
res->it->first.SetSticky(args.Sticky());
221227
shard->search_indices()->AddDoc(key, cntx, res->it->second);
222-
return std::move(res.value());
228+
return {DbSlice::ItAndUpdater{
229+
.it = res->it, .exp_it = res->exp_it, .post_updater = std::move(res->post_updater)},
230+
res->is_new};
223231
}
224-
return std::nullopt;
232+
return {};
225233
}
226234

227235
[[nodiscard]] bool RestoreArgs::UpdateExpiration(int64_t now_msec) {
@@ -467,14 +475,14 @@ OpStatus Renamer::DeserializeDest(Transaction* t, EngineShard* shard) {
467475
return OpStatus::OK;
468476
}
469477

470-
RdbRestoreValue loader(serialized_value_.version.value());
471-
auto restored_dest_it = loader.Add(serialized_value_.value, dest_key_, db_slice, op_args.db_cntx,
472-
restore_args, shard);
478+
restore_args.SetSticky(serialized_value_.sticky);
473479

474-
if (restored_dest_it) {
475-
auto& dest_it = restored_dest_it->it;
476-
dest_it->first.SetSticky(serialized_value_.sticky);
480+
RdbRestoreValue loader(serialized_value_.version.value());
481+
auto [restored_dest_it, is_new] = loader.Add(dest_key_, serialized_value_.value, op_args.db_cntx,
482+
restore_args, &db_slice, shard);
477483

484+
if (restored_dest_it.IsValid()) {
485+
LOG_IF(DFATAL, !is_new) << "Unexpected override for key " << dest_key_ << " " << dest_found_;
478486
auto bc = op_args.db_cntx.ns->GetBlockingController(op_args.shard->shard_id());
479487
if (bc) {
480488
bc->AwakeWatched(t->GetDbIndex(), dest_key_);
@@ -527,27 +535,28 @@ OpResult<std::string> OpDump(const OpArgs& op_args, string_view key) {
527535
return OpStatus::KEY_NOTFOUND;
528536
}
529537

530-
OpResult<bool> OnRestore(const OpArgs& op_args, std::string_view key, std::string_view payload,
538+
OpResult<bool> OpRestore(const OpArgs& op_args, std::string_view key, std::string_view payload,
531539
RestoreArgs restore_args, RdbVersion rdb_version) {
532540
if (!restore_args.UpdateExpiration(op_args.db_cntx.time_now_ms)) {
533541
return OpStatus::OUT_OF_RANGE;
534542
}
535543

536544
auto& db_slice = op_args.GetDbSlice();
545+
bool found_prev = false;
546+
537547
// The redis impl (see cluster.c function restoreCommand), remove the old key if
538548
// the replace option is set, so lets do the same here
539549
{
540550
auto res = db_slice.FindMutable(op_args.db_cntx, key);
541-
if (restore_args.Replace()) {
542-
if (IsValid(res.it)) {
551+
if (IsValid(res.it)) {
552+
found_prev = true;
553+
if (restore_args.Replace()) {
543554
VLOG(1) << "restore command is running with replace, found old key '" << key
544555
<< "' and removing it";
545556
res.post_updater.Run();
546557
CHECK(db_slice.Del(op_args.db_cntx, res.it));
547-
}
548-
} else {
549-
// we are not allowed to replace it, so make sure it doesn't exist
550-
if (IsValid(res.it)) {
558+
} else {
559+
// we are not allowed to replace it.
551560
return OpStatus::KEY_EXISTS;
552561
}
553562
}
@@ -559,8 +568,14 @@ OpResult<bool> OnRestore(const OpArgs& op_args, std::string_view key, std::strin
559568
}
560569

561570
RdbRestoreValue loader(rdb_version);
562-
auto res = loader.Add(payload, key, db_slice, op_args.db_cntx, restore_args, op_args.shard);
563-
return res.has_value();
571+
auto [res_it, is_new] =
572+
loader.Add(key, payload, op_args.db_cntx, restore_args, &db_slice, op_args.shard);
573+
LOG_IF(DFATAL, res_it.IsValid() && !is_new)
574+
<< "Unexpected override for key " << key << ", found previous " << found_prev
575+
<< " override: " << restore_args.Replace()
576+
<< ", type: " << ObjTypeToString(res_it.it->second.ObjType());
577+
578+
return res_it.IsValid();
564579
}
565580

566581
bool ScanCb(const OpArgs& op_args, PrimeIterator prime_it, const ScanOpts& opts, string* scratch,
@@ -1473,7 +1488,7 @@ void GenericFamily::Restore(CmdArgList args, Transaction* tx, SinkReplyBuilder*
14731488
}
14741489

14751490
auto cb = [&](Transaction* t, EngineShard* shard) {
1476-
return OnRestore(t->GetOpArgs(shard), key, serialized_value, restore_args.value(),
1491+
return OpRestore(t->GetOpArgs(shard), key, serialized_value, restore_args.value(),
14771492
rdb_version.value());
14781493
};
14791494

0 commit comments

Comments
 (0)