fix: a rare deadlock with blmove and multi-db (#4568)

romange · romange · commit 863ff762d570 · 2025-02-09T12:49:08.000+02:00
The bug requires lots of conditions in order to reproduce: 1. blocking operations on multiple databases 2. use of lua scripts that wake blocking transactions The bug was discovered due to a deadlock in BLMOVE but could also manifest with other commands that would "disappear" causing local starvation effects on the connections sending them. With BLMOVE it causes a global deadlock in the transaction queue in dragonfly. The fix is actually deleting a few lines of code introduced by #3260 from 6 months ago, so it is actually a long lived regression. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
diff --git a/src/server/engine_shard.cc b/src/server/engine_shard.cc
@@ -578,17 +578,11 @@ void EngineShard::PollExecution(const char* context, Transaction* trans) {
       trans = nullptr;
 
     if ((is_self && disarmed) || continuation_trans_->DisarmInShard(sid)) {
-      auto bc = continuation_trans_->GetNamespace().GetBlockingController(shard_id_);
       if (bool keep = run(continuation_trans_, false); !keep) {
         // if this holds, we can remove this check altogether.
         DCHECK(continuation_trans_ == nullptr);
         continuation_trans_ = nullptr;
       }
-      if (bc && bc->HasAwakedTransaction()) {
-        // Break if there are any awakened transactions, as we must give way to them
-        // before continuing to handle regular transactions from the queue.
-        return;
-      }
     }
   }
 
@@ -630,7 +624,6 @@ void EngineShard::PollExecution(const char* context, Transaction* trans) {
 
   // If we disarmed, but didn't find ourselves in the loop, run now.
   if (trans && disarmed) {
-    DCHECK(trans != head);
     DCHECK(trans_mask & (Transaction::OUT_OF_ORDER | Transaction::SUSPENDED_Q));
 
     bool is_ooo = trans_mask & Transaction::OUT_OF_ORDER;
diff --git a/src/server/list_family_test.cc b/src/server/list_family_test.cc
@@ -1080,5 +1080,255 @@ TEST_F(ListFamilyTest, ContendExpire) {
   }
 }
 
+TEST_F(ListFamilyTest, LMPopInvalidSyntax) {
+  // Not enough arguments
+  auto resp = Run({"lmpop", "1", "a"});
+  EXPECT_THAT(resp, ErrArg("wrong number of arguments"));
+
+  // Zero keys
+  resp = Run({"lmpop", "0", "LEFT", "COUNT", "1"});
+  EXPECT_THAT(resp, ErrArg("syntax error"));
+
+  // Number of keys is not uint
+  resp = Run({"lmpop", "aa", "a", "LEFT"});
+  EXPECT_THAT(resp, ErrArg("value is not an integer or out of range"));
+
+  // Missing LEFT/RIGHT
+  resp = Run({"lmpop", "1", "a", "COUNT", "1"});
+  EXPECT_THAT(resp, ErrArg("syntax error"));
+
+  // Wrong number of keys
+  resp = Run({"lmpop", "1", "a", "b", "LEFT"});
+  EXPECT_THAT(resp, ErrArg("syntax error"));
+
+  // COUNT without number
+  resp = Run({"lmpop", "1", "a", "LEFT", "COUNT"});
+  EXPECT_THAT(resp, ErrArg("syntax error"));
+
+  // COUNT is not uint
+  resp = Run({"lmpop", "1", "a", "LEFT", "COUNT", "boo"});
+  EXPECT_THAT(resp, ErrArg("value is not an integer or out of range"));
+
+  // Too many arguments
+  resp = Run({"lmpop", "1", "c", "LEFT", "COUNT", "2", "foo"});
+  EXPECT_THAT(resp, ErrArg("syntax error"));
+}
+
+TEST_F(ListFamilyTest, LMPop) {
+  // All lists are empty
+  auto resp = Run({"lmpop", "1", "e", "LEFT"});
+  EXPECT_THAT(resp, ArgType(RespExpr::NIL));
+
+  // LEFT operation
+  resp = Run({"lpush", "a", "a1", "a2"});
+  EXPECT_THAT(resp, IntArg(2));
+
+  resp = Run({"lmpop", "1", "a", "LEFT"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("a", RespArray(ElementsAre("a2")))));
+
+  // RIGHT operation
+  resp = Run({"lpush", "b", "b1", "b2"});
+  EXPECT_THAT(resp, IntArg(2));
+
+  resp = Run({"lmpop", "1", "b", "RIGHT"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("b", RespArray(ElementsAre("b1")))));
+
+  // COUNT > 1
+  resp = Run({"lpush", "c", "c1", "c2"});
+  EXPECT_THAT(resp, IntArg(2));
+
+  resp = Run({"lmpop", "1", "c", "RIGHT", "COUNT", "2"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("c", RespArray(ElementsAre("c1", "c2")))));
+
+  resp = Run({"llen", "c"});
+  EXPECT_THAT(resp, IntArg(0));
+
+  // COUNT > number of elements in list
+  resp = Run({"lpush", "d", "d1", "d2"});
+  EXPECT_THAT(resp, IntArg(2));
+
+  resp = Run({"lmpop", "1", "d", "RIGHT", "COUNT", "3"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("d", RespArray(ElementsAre("d1", "d2")))));
+
+  resp = Run({"llen", "d"});
+  EXPECT_THAT(resp, IntArg(0));
+
+  // First non-empty list is not the first list
+  resp = Run({"lpush", "x", "x1"});
+  EXPECT_THAT(resp, IntArg(1));
+
+  resp = Run({"lpush", "y", "y1"});
+  EXPECT_THAT(resp, IntArg(1));
+
+  resp = Run({"lmpop", "3", "empty", "x", "y", "RIGHT"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("x", RespArray(ElementsAre("x1")))));
+
+  resp = Run({"llen", "x"});
+  EXPECT_THAT(resp, IntArg(0));
+}
+
+TEST_F(ListFamilyTest, LMPopMultipleElements) {
+  // Test removing multiple elements from left end
+  Run({"rpush", "list1", "a", "b", "c", "d", "e"});
+  auto resp = Run({"lmpop", "1", "list1", "LEFT", "COUNT", "3"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("list1", RespArray(ElementsAre("a", "b", "c")))));
+
+  resp = Run({"lrange", "list1", "0", "-1"});
+  EXPECT_THAT(resp.GetVec(), ElementsAre("d", "e"));
+
+  // Test removing multiple elements from right end
+  Run({"rpush", "list2", "v", "w", "x", "y", "z"});
+  resp = Run({"lmpop", "1", "list2", "RIGHT", "COUNT", "2"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("list2", RespArray(ElementsAre("z", "y")))));
+
+  resp = Run({"lrange", "list2", "0", "-1"});
+  EXPECT_THAT(resp.GetVec(), ElementsAre("v", "w", "x"));
+}
+
+TEST_F(ListFamilyTest, LMPopMultipleLists) {
+  // Test finding first non-empty list
+  Run({"rpush", "list1", "a", "b"});
+  Run({"rpush", "list2", "c", "d"});
+  Run({"rpush", "list3", "e", "f"});
+
+  // Pop from first non-empty list
+  auto resp = Run({"lmpop", "3", "list1", "list2", "list3", "LEFT"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("list1", RespArray(ElementsAre("a")))));
+
+  // Pop from second list after first becomes empty
+  Run({"lmpop", "1", "list1", "LEFT"});  // Empty list1
+  resp = Run({"lmpop", "3", "list1", "list2", "list3", "RIGHT", "COUNT", "2"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("list2", RespArray(ElementsAre("d", "c")))));
+
+  // Verify third list remains untouched
+  resp = Run({"lrange", "list3", "0", "-1"});
+  EXPECT_THAT(resp.GetVec(), ElementsAre("e", "f"));
+}
+
+TEST_F(ListFamilyTest, LMPopEdgeCases) {
+  // Test with empty list
+  Run({"rpush", "empty_list", "a"});
+  Run({"lpop", "empty_list"});
+  auto resp = Run({"lmpop", "1", "empty_list", "LEFT"});
+  EXPECT_THAT(resp, ArgType(RespExpr::NIL));
+
+  // Test with non-existent list
+  resp = Run({"lmpop", "1", "nonexistent", "LEFT"});
+  EXPECT_THAT(resp, ArgType(RespExpr::NIL));
+
+  // Test with wrong type key
+  Run({"set", "string_key", "value"});
+  resp = Run({"lmpop", "1", "string_key", "LEFT"});
+  EXPECT_THAT(resp, ErrArg("WRONGTYPE Operation against a key holding the wrong kind of value"));
+
+  // Test without COUNT parameter - should return 1 element by default
+  Run({"rpush", "list", "a", "b"});
+  resp = Run({"lmpop", "1", "list", "LEFT"});
+  EXPECT_THAT(resp,
+              RespArray(ElementsAre(
+                  "list", RespArray(ElementsAre("a")))));  // Should return 1 element by default
+
+  // Test with COUNT = 0 - should return error
+  resp = Run({"lmpop", "1", "list", "LEFT", "COUNT", "0"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("list", RespArray(ElementsAre()))));
+
+  // Test with negative COUNT - should return error
+  resp = Run({"lmpop", "1", "list", "LEFT", "COUNT", "-1"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("list", RespArray(ElementsAre("b")))));
+}
+
+TEST_F(ListFamilyTest, LMPopDocExample) {
+  // Try to pop from non-existing lists
+  auto resp = Run({"LMPOP", "2", "non1", "non2", "LEFT", "COUNT", "10"});
+  EXPECT_THAT(resp, ArgType(RespExpr::NIL));
+
+  // Create first list and test basic pop
+  resp = Run({"LPUSH", "mylist", "one", "two", "three", "four", "five"});
+  EXPECT_THAT(resp, IntArg(5));
+
+  resp = Run({"LMPOP", "1", "mylist", "LEFT"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("mylist", RespArray(ElementsAre("five")))));
+
+  resp = Run({"LRANGE", "mylist", "0", "-1"});
+  EXPECT_THAT(resp.GetVec(), ElementsAre("four", "three", "two", "one"));
+
+  // Test RIGHT pop with COUNT
+  resp = Run({"LMPOP", "1", "mylist", "RIGHT", "COUNT", "10"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("mylist",
+                                          RespArray(ElementsAre("one", "two", "three", "four")))));
+
+  // Create two lists and test multi-key pop
+  resp = Run({"LPUSH", "mylist", "one", "two", "three", "four", "five"});
+  EXPECT_THAT(resp, IntArg(5));
+
+  resp = Run({"LPUSH", "mylist2", "a", "b", "c", "d", "e"});
+  EXPECT_THAT(resp, IntArg(5));
+
+  resp = Run({"LMPOP", "2", "mylist", "mylist2", "RIGHT", "COUNT", "3"});
+  EXPECT_THAT(resp,
+              RespArray(ElementsAre("mylist", RespArray(ElementsAre("one", "two", "three")))));
+
+  resp = Run({"LRANGE", "mylist", "0", "-1"});
+  EXPECT_THAT(resp.GetVec(), ElementsAre("five", "four"));
+
+  resp = Run({"LMPOP", "2", "mylist", "mylist2", "RIGHT", "COUNT", "5"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("mylist", RespArray(ElementsAre("four", "five")))));
+
+  resp = Run({"LMPOP", "2", "mylist", "mylist2", "RIGHT", "COUNT", "10"});
+  EXPECT_THAT(resp,
+              RespArray(ElementsAre("mylist2", RespArray(ElementsAre("a", "b", "c", "d", "e")))));
+
+  // Verify both lists are now empty
+  resp = Run({"EXISTS", "mylist", "mylist2"});
+  EXPECT_THAT(resp, IntArg(0));
+}
+
+TEST_F(ListFamilyTest, LMPopWrongType) {
+  // Setup: create a list and a hash
+  Run({"lpush", "l1", "e1"});
+  Run({"hset", "foo", "k1", "v1"});
+
+  // Test: first key is wrong type
+  auto resp = Run({"lmpop", "2", "foo", "l1", "left"});
+  EXPECT_THAT(resp, ErrArg("WRONGTYPE Operation against a key holding the wrong kind of value"));
+
+  // Test: second key is wrong type but first doesn't exist
+  resp = Run({"lmpop", "2", "nonexistent", "foo", "left"});
+  EXPECT_THAT(resp, ErrArg("WRONGTYPE Operation against a key holding the wrong kind of value"));
+
+  // Test: second key is wrong type but first is a valid list
+  resp = Run({"lmpop", "2", "l1", "foo", "left"});
+  EXPECT_THAT(resp, RespArray(ElementsAre("l1", RespArray(ElementsAre("e1")))));
+}
+
+// Reproduce a flow that trigerred a wrong DCHECK in the transaction flow.
+TEST_F(ListFamilyTest, AwakeMulti) {
+  auto f1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {
+    for (unsigned i = 0; i < 100; ++i) {
+      Run("CONSUMER", {"blmove", "src", "dest", "LEFT", "LEFT", "0"});
+    };
+  });
+  auto f2 = pp_->at(1)->LaunchFiber([&] {
+    for (unsigned i = 0; i < 100; ++i) {
+      Run("PROD", {"lpush", "src", "a"});
+      ThisFiber::SleepFor(50us);
+    };
+  });
+
+  auto f3 = pp_->at(2)->LaunchFiber([&] {
+    for (unsigned i = 0; i < 100; ++i) {
+      Run({"multi"});
+      for (unsigned j = 0; j < 8; ++j) {
+        Run({"get", StrCat("key", j)});
+      };
+      Run({"exec"});
+    };
+  });
+
+  f1.Join();
+  f2.Join();
+  f3.Join();
+}
+
 #pragma GCC diagnostic pop
 }  // namespace dfly
diff --git a/tests/dragonfly/generic_test.py b/tests/dragonfly/generic_test.py
@@ -1,12 +1,11 @@
-import os
 import logging
 import pytest
 import redis
 import asyncio
 from redis import asyncio as aioredis
 
 from . import dfly_multi_test_args, dfly_args
-from .instance import DflyStartException
+from .instance import DflyInstance, DflyStartException
 from .utility import batch_fill_data, gen_test_data, EnvironCntx
 from .seeder import StaticSeeder
 
@@ -81,6 +80,91 @@ async def task2(k, n):
     )
 
 
+@dfly_args({"proactor_threads": 2, "num_shards": 2})
+async def test_blocking_multiple_dbs(async_client: aioredis.Redis, df_server: DflyInstance):
+    active = True
+
+    # A task to trigger the flow that eventually looses a transaction
+    # blmove is used to trigger a global deadlock, but we could use any
+    # command - the effect would be - a deadlocking locally that connection
+    async def blmove_task_loose(num):
+        async def run(id):
+            c = df_server.client()
+            await c.lpush(f"key{id}", "val")
+            while active:
+                await c.blmove(f"key{id}", f"key{id}", 0, "LEFT", "LEFT")
+                await asyncio.sleep(0.01)
+
+        tasks = []
+        for i in range(num):
+            tasks.append(run(i))
+
+        await asyncio.gather(*tasks)
+
+    # A task that creates continuation_trans_ by constantly timing out on
+    # an empty set. We could probably use any 2-hop operation like rename.
+    async def task_blocking(num):
+        async def block(id):
+            c = df_server.client()
+            while active:
+                await c.blmove(f"{{{id}}}from", f"{{{id}}}to", 0.1, "LEFT", "LEFT")
+
+        tasks = []
+        for i in range(num):
+            tasks.append(block(i))
+        await asyncio.gather(*tasks)
+
+    # produce is constantly waking up consumers. It is used to trigger the
+    # flow that creates wake ups on a differrent database in the
+    # middle of continuation transaction.
+    async def tasks_produce(num, iters):
+        LPUSH_SCRIPT = """
+            redis.call('LPUSH', KEYS[1], "val")
+        """
+
+        async def produce(id):
+            c = df_server.client(db=1)  # important to be on a different db
+            for i in range(iters):
+                # Must be a lua script and not multi-exec for some reason.
+                await c.eval(LPUSH_SCRIPT, 1, f"list{{{id}}}")
+
+        tasks = []
+        for i in range(num):
+            task = asyncio.create_task(produce(i))
+            tasks.append(task)
+
+        await asyncio.gather(*tasks)
+        logging.info("Finished producing")
+
+    # works with producer to constantly block and wake up
+    async def tasks_consume(num, iters):
+        async def drain(id, iters):
+            client = df_server.client(db=1)
+            for _ in range(iters):
+                await client.blmove(f"list{{{id}}}", f"sink{{{id}}}", 0, "LEFT", "LEFT")
+
+        tasks = []
+        for i in range(num):
+            task = asyncio.create_task(drain(i, iters))
+            tasks.append(task)
+
+        await asyncio.gather(*tasks)
+        logging.info("Finished consuming")
+
+    num_keys = 32
+    num_iters = 200
+    async_task1 = asyncio.create_task(blmove_task_loose(num_keys))
+    async_task2 = asyncio.create_task(task_blocking(num_keys))
+    logging.info("Starting tasks")
+    await asyncio.gather(
+        tasks_consume(num_keys, num_iters),
+        tasks_produce(num_keys, num_iters),
+    )
+    logging.info("Finishing tasks")
+    active = False
+    await asyncio.gather(async_task1, async_task2)
+
+
 async def test_arg_from_environ_overwritten_by_cli(df_factory):
     with EnvironCntx(DFLY_port="6378"):
         with df_factory.create(port=6377):