Skip to content

Commit 4f91030

Browse files
adiholdenromange
authored andcommitted
fix(server): deadlock with replicaof inside multi (#4685)
* fix server: fix deadlock with replicaof inside multi Signed-off-by: adi_holden <adi@dragonflydb.io>
1 parent cf47d68 commit 4f91030

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

src/server/main_service.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,7 @@ Transaction::MultiMode DeduceExecMode(ExecScriptUse state,
650650
const ScriptMgr& script_mgr) {
651651
// Check if script most LIKELY has global eval transactions
652652
bool contains_global = false;
653+
bool contains_admin_cmd = false;
653654
Transaction::MultiMode multi_mode = Transaction::LOCK_AHEAD;
654655

655656
if (state == ExecScriptUse::SCRIPT_RUN) {
@@ -670,6 +671,7 @@ Transaction::MultiMode DeduceExecMode(ExecScriptUse state,
670671
transactional |= scmd.Cid()->IsTransactional();
671672
}
672673
contains_global |= scmd.Cid()->opt_mask() & CO::GLOBAL_TRANS;
674+
contains_admin_cmd |= scmd.Cid()->opt_mask() & CO::ADMIN;
673675

674676
// We can't run no-key-transactional commands in lock-ahead mode currently,
675677
// because it means we have to schedule on all shards
@@ -685,9 +687,13 @@ Transaction::MultiMode DeduceExecMode(ExecScriptUse state,
685687
if (!transactional && exec_info.watched_keys.empty())
686688
return Transaction::NOT_DETERMINED;
687689

690+
if (contains_admin_cmd) {
691+
multi_mode = Transaction::NON_ATOMIC;
692+
}
688693
// Atomic modes fall back to GLOBAL if they contain global commands.
689-
if (contains_global && multi_mode == Transaction::LOCK_AHEAD)
694+
else if (contains_global && multi_mode == Transaction::LOCK_AHEAD) {
690695
multi_mode = Transaction::GLOBAL;
696+
}
691697

692698
return multi_mode;
693699
}

tests/dragonfly/replication_test.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2799,3 +2799,35 @@ async def test_preempt_in_atomic_section_of_heartbeat(df_factory: DflyInstanceFa
27992799
await wait_for_replicas_state(*c_replicas)
28002800

28012801
await fill_task
2802+
2803+
2804+
@dfly_args({"proactor_threads": 2})
2805+
async def test_replicaof_inside_multi(df_factory):
2806+
master = df_factory.create()
2807+
replica = df_factory.create()
2808+
df_factory.start_all([master, replica])
2809+
2810+
async def replicate_inside_multi():
2811+
try:
2812+
c_master = master.client()
2813+
p = c_master.pipeline(transaction=True)
2814+
for i in range(5):
2815+
p.execute_command("dbsize")
2816+
p.execute_command(f"replicaof localhost {replica.port}")
2817+
await p.execute()
2818+
return True
2819+
except redis.exceptions.ResponseError:
2820+
return False
2821+
2822+
MULTI_COMMANDS_TO_ISSUE = 30
2823+
replication_commands = [
2824+
asyncio.create_task(replicate_inside_multi()) for _ in range(MULTI_COMMANDS_TO_ISSUE)
2825+
]
2826+
2827+
num_successes = 0
2828+
for result in asyncio.as_completed(replication_commands, timeout=80):
2829+
num_successes += await result
2830+
2831+
logging.info(f"succeses: {num_successes}")
2832+
assert MULTI_COMMANDS_TO_ISSUE > num_successes, "At least one REPLICAOF must be cancelled"
2833+
assert num_successes > 0, "At least one REPLICAOF must success"

0 commit comments

Comments
 (0)