From 6de3fa3f1d98e77a7b37efcceece72e1dbdd87c9 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Tue, 13 May 2025 13:58:17 +0200 Subject: [PATCH 1/3] Hold runtime lock during shutdown To make sure no odd behavior is emerging when `stop`ing and `start`ing in quick succession, we now keep the runtime write lock until we're done shutting down. --- src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 7859a092e..52c9bc502 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -643,7 +643,9 @@ impl Node { /// /// After this returns most API methods will return [`Error::NotRunning`]. pub fn stop(&self) -> Result<(), Error> { - let runtime = self.runtime.write().unwrap().take().ok_or(Error::NotRunning)?; + let mut runtime_lock = self.runtime.write().unwrap(); + let runtime = runtime_lock.take().ok_or(Error::NotRunning)?; + #[cfg(tokio_unstable)] let metrics_runtime = Arc::clone(&runtime); From 5a521c15da91de50ce913375d93bbfbfa2581265 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Thu, 15 May 2025 12:17:27 +0200 Subject: [PATCH 2/3] f comment --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 52c9bc502..b1626d19d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -643,6 +643,9 @@ impl Node { /// /// After this returns most API methods will return [`Error::NotRunning`]. pub fn stop(&self) -> Result<(), Error> { + // We hold the write lock for the duration of this method to avoid any conflicts with + // inflight tasks when users would potentially concurrently try restart the node before + // `stop` returned. let mut runtime_lock = self.runtime.write().unwrap(); let runtime = runtime_lock.take().ok_or(Error::NotRunning)?; From d76160d6614dd1e1af2e827c125b3ce9befab322 Mon Sep 17 00:00:00 2001 From: Elias Rohrer Date: Tue, 13 May 2025 14:04:46 +0200 Subject: [PATCH 3/3] Reduce syncing and shutdown timeouts considerably Previously, we had to configure enormous syncing timeouts as the BDK wallet syncing would hold a central mutex that could lead to large parts of event handling and syncing locking up. Here, we drop the configured timeouts considerably across the board, since such huge values are hopefully not required anymore. --- src/chain/electrum.rs | 2 +- src/config.rs | 7 +++++-- src/lib.rs | 10 ++++------ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/chain/electrum.rs b/src/chain/electrum.rs index 6e62d9c08..9882e652b 100644 --- a/src/chain/electrum.rs +++ b/src/chain/electrum.rs @@ -40,7 +40,7 @@ use std::time::{Duration, Instant}; const BDK_ELECTRUM_CLIENT_BATCH_SIZE: usize = 5; const ELECTRUM_CLIENT_NUM_RETRIES: u8 = 3; -const ELECTRUM_CLIENT_TIMEOUT_SECS: u8 = 20; +const ELECTRUM_CLIENT_TIMEOUT_SECS: u8 = 10; pub(crate) struct ElectrumRuntimeClient { electrum_client: Arc, diff --git a/src/config.rs b/src/config.rs index 4a39c1b56..544c6d3bb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -65,10 +65,13 @@ pub(crate) const NODE_ANN_BCAST_INTERVAL: Duration = Duration::from_secs(60 * 60 pub(crate) const WALLET_SYNC_INTERVAL_MINIMUM_SECS: u64 = 10; // The timeout after which we abort a wallet syncing operation. -pub(crate) const BDK_WALLET_SYNC_TIMEOUT_SECS: u64 = 90; +pub(crate) const BDK_WALLET_SYNC_TIMEOUT_SECS: u64 = 20; // The timeout after which we abort a wallet syncing operation. -pub(crate) const LDK_WALLET_SYNC_TIMEOUT_SECS: u64 = 30; +pub(crate) const LDK_WALLET_SYNC_TIMEOUT_SECS: u64 = 10; + +// The timeout after which we give up waiting on LDK's event handler to exit on shutdown. +pub(crate) const LDK_EVENT_HANDLER_SHUTDOWN_TIMEOUT_SECS: u64 = 30; // The timeout after which we abort a fee rate cache update operation. pub(crate) const FEE_RATE_CACHE_UPDATE_TIMEOUT_SECS: u64 = 5; diff --git a/src/lib.rs b/src/lib.rs index b1626d19d..21f669175 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -126,8 +126,9 @@ pub use builder::NodeBuilder as Builder; use chain::ChainSource; use config::{ - default_user_config, may_announce_channel, ChannelConfig, Config, NODE_ANN_BCAST_INTERVAL, - PEER_RECONNECTION_INTERVAL, RGS_SYNC_INTERVAL, + default_user_config, may_announce_channel, ChannelConfig, Config, + LDK_EVENT_HANDLER_SHUTDOWN_TIMEOUT_SECS, NODE_ANN_BCAST_INTERVAL, PEER_RECONNECTION_INTERVAL, + RGS_SYNC_INTERVAL, }; use connection::ConnectionManager; use event::{EventHandler, EventQueue}; @@ -677,13 +678,10 @@ impl Node { let event_handling_stopped_logger = Arc::clone(&self.logger); let mut event_handling_stopped_receiver = self.event_handling_stopped_sender.subscribe(); - // FIXME: For now, we wait up to 100 secs (BDK_WALLET_SYNC_TIMEOUT_SECS + 10) to allow - // event handling to exit gracefully even if it was blocked on the BDK wallet syncing. We - // should drop this considerably post upgrading to BDK 1.0. let timeout_res = tokio::task::block_in_place(move || { runtime.block_on(async { tokio::time::timeout( - Duration::from_secs(100), + Duration::from_secs(LDK_EVENT_HANDLER_SHUTDOWN_TIMEOUT_SECS), event_handling_stopped_receiver.changed(), ) .await