Skip to content

Commit 81b0841

Browse files
committed
Defer monitor update completions after funding spend
When no_further_updates_allowed() is true and the persister returns Completed, ChainMonitor now overrides the return to InProgress and queues the update_id in deferred_monitor_update_completions. These are resolved in release_pending_monitor_events, so ChannelManager sees them complete together with the force-close MonitorEvents. This eliminates phantom InProgress entries that would never complete: previously, a rejected pre-close update (e.g. commitment_signed arriving after funding spend) returned InProgress with no completion path, blocking MonitorUpdateCompletionActions (PaymentClaimed, PaymentForwarded) indefinitely. A subsequent post-close update returning Completed would then violate the in-order completion invariant. AI tools were used in preparing this commit.
1 parent 674f3a8 commit 81b0841

3 files changed

Lines changed: 211 additions & 47 deletions

File tree

lightning/src/chain/chainmonitor.rs

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,11 @@ struct MonitorHolder<ChannelSigner: EcdsaChannelSigner> {
227227
/// [`ChannelMonitorUpdate`] which was already applied. While this isn't an issue for the
228228
/// LDK-provided update-based [`Persist`], it is somewhat surprising for users so we avoid it.
229229
pending_monitor_updates: Mutex<Vec<u64>>,
230+
/// Monitor update IDs for which the persister returned `Completed` but we overrode the return
231+
/// to `InProgress` because `no_further_updates_allowed()` was true. These are resolved in
232+
/// [`ChainMonitor::release_pending_monitor_events`] so that `ChannelManager` sees them
233+
/// complete together with the force-close `MonitorEvent`s.
234+
deferred_monitor_update_completions: Mutex<Vec<u64>>,
230235
}
231236

232237
impl<ChannelSigner: EcdsaChannelSigner> MonitorHolder<ChannelSigner> {
@@ -1054,7 +1059,11 @@ where
10541059
if let Some(ref chain_source) = self.chain_source {
10551060
monitor.load_outputs_to_watch(chain_source, &self.logger);
10561061
}
1057-
entry.insert(MonitorHolder { monitor, pending_monitor_updates: Mutex::new(Vec::new()) });
1062+
entry.insert(MonitorHolder {
1063+
monitor,
1064+
pending_monitor_updates: Mutex::new(Vec::new()),
1065+
deferred_monitor_update_completions: Mutex::new(Vec::new()),
1066+
});
10581067

10591068
Ok(ChannelMonitorUpdateStatus::Completed)
10601069
}
@@ -1305,6 +1314,7 @@ where
13051314
entry.insert(MonitorHolder {
13061315
monitor,
13071316
pending_monitor_updates: Mutex::new(pending_monitor_updates),
1317+
deferred_monitor_update_completions: Mutex::new(Vec::new()),
13081318
});
13091319
Ok(persist_res)
13101320
}
@@ -1414,7 +1424,26 @@ where
14141424
}
14151425
}
14161426

1417-
if update_res.is_err() {
1427+
if monitor.no_further_updates_allowed()
1428+
&& persist_res == ChannelMonitorUpdateStatus::Completed
1429+
{
1430+
// The channel is post-close (funding spend seen, lockdown, or holder
1431+
// tx signed). Return InProgress so ChannelManager freezes the
1432+
// channel until the force-close MonitorEvents are processed. We
1433+
// track this update_id in deferred_monitor_update_completions so it
1434+
// gets resolved during release_pending_monitor_events, together with
1435+
// those MonitorEvents.
1436+
pending_monitor_updates.push(update_id);
1437+
monitor_state
1438+
.deferred_monitor_update_completions
1439+
.lock()
1440+
.unwrap()
1441+
.push(update_id);
1442+
log_debug!(
1443+
logger,
1444+
"Deferring completion of ChannelMonitorUpdate id {:?} (channel is post-close)",
1445+
update_id,
1446+
);
14181447
ChannelMonitorUpdateStatus::InProgress
14191448
} else {
14201449
persist_res
@@ -1429,8 +1458,36 @@ where
14291458
for (channel_id, update_id) in self.persister.get_and_clear_completed_updates() {
14301459
let _ = self.channel_monitor_updated(channel_id, update_id);
14311460
}
1461+
// Resolve any deferred monitor update completions. These are updates where the
1462+
// persister returned Completed but we overrode the return to InProgress because
1463+
// no_further_updates_allowed() was true. We resolve them here so that ChannelManager
1464+
// sees them complete together with the force-close MonitorEvents.
1465+
let monitors = self.monitors.read().unwrap();
1466+
for monitor_state in monitors.values() {
1467+
let deferred =
1468+
monitor_state.deferred_monitor_update_completions.lock().unwrap().split_off(0);
1469+
for update_id in deferred {
1470+
let mut pending = monitor_state.pending_monitor_updates.lock().unwrap();
1471+
pending.retain(|id| *id != update_id);
1472+
let monitor_is_pending_updates = monitor_state.has_pending_updates(&pending);
1473+
if !monitor_is_pending_updates {
1474+
let funding_txo = monitor_state.monitor.get_funding_txo();
1475+
let channel_id = monitor_state.monitor.channel_id();
1476+
self.pending_monitor_events.lock().unwrap().push((
1477+
funding_txo,
1478+
channel_id,
1479+
vec![MonitorEvent::Completed {
1480+
funding_txo,
1481+
channel_id,
1482+
monitor_update_id: monitor_state.monitor.get_latest_update_id(),
1483+
}],
1484+
monitor_state.monitor.get_counterparty_node_id(),
1485+
));
1486+
}
1487+
}
1488+
}
14321489
let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
1433-
for monitor_state in self.monitors.read().unwrap().values() {
1490+
for monitor_state in monitors.values() {
14341491
let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events();
14351492
if monitor_events.len() > 0 {
14361493
let monitor_funding_txo = monitor_state.monitor.get_funding_txo();

lightning/src/chain/channelmonitor.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6782,6 +6782,7 @@ mod tests {
67826782
};
67836783
use crate::ln::channelmanager::{HTLCSource, PaymentId};
67846784
use crate::ln::functional_test_utils::*;
6785+
use crate::ln::msgs::{BaseMessageHandler, MessageSendEvent};
67856786
use crate::ln::outbound_payment::RecipientOnionFields;
67866787
use crate::ln::script::ShutdownScript;
67876788
use crate::ln::types::ChannelId;
@@ -6890,7 +6891,16 @@ mod tests {
68906891
check_spends!(htlc_txn[0], broadcast_tx);
68916892
check_spends!(htlc_txn[1], broadcast_tx);
68926893

6893-
check_closed_broadcast(&nodes[1], 1, true);
6894+
// When get_and_clear_pending_msg_events triggers release_pending_monitor_events, the
6895+
// deferred completion from the rejected update resolves first, unfreezing the channel and
6896+
// generating a commitment update message. Then the force-close MonitorEvents are processed,
6897+
// generating BroadcastChannelUpdate + HandleError. So we expect 3 messages total.
6898+
let msg_events = nodes[1].node.get_and_clear_pending_msg_events();
6899+
assert_eq!(msg_events.len(), 3);
6900+
assert!(msg_events.iter().any(|e| matches!(e, MessageSendEvent::UpdateHTLCs { .. })));
6901+
assert!(msg_events.iter().any(|e| matches!(e, MessageSendEvent::BroadcastChannelUpdate { .. })));
6902+
assert!(msg_events.iter().any(|e| matches!(e, MessageSendEvent::HandleError { .. })));
6903+
68946904
check_closed_event(&nodes[1], 1, ClosureReason::CommitmentTxConfirmed, &[nodes[0].node.get_our_node_id()], 100000);
68956905
check_added_monitors(&nodes[1], 1);
68966906
}

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 140 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3904,11 +3904,61 @@ fn do_test_durable_preimages_on_closed_channel(
39043904
}
39053905
if !close_chans_before_reload {
39063906
check_closed_broadcast(&nodes[1], 1, false);
3907-
let reason = ClosureReason::CommitmentTxConfirmed;
3908-
check_closed_event(&nodes[1], 1, reason, &[node_a_id], 100000);
3907+
if hold_post_reload_mon_update {
3908+
// When the persister returns InProgress, background events are replayed
3909+
// during check_closed_event but stay pending (InProgress). Only the
3910+
// ChannelClosed event fires.
3911+
check_closed_event(
3912+
&nodes[1],
3913+
1,
3914+
ClosureReason::CommitmentTxConfirmed,
3915+
&[node_a_id],
3916+
100000,
3917+
);
3918+
} else {
3919+
// When the persister returns Completed, check_closed_event triggers
3920+
// process_background_events (replaying the A-B preimage and force-close
3921+
// updates) AND process_pending_monitor_events (resolving the deferred
3922+
// completions). This fires both ChannelClosed and PaymentForwarded.
3923+
let evs = nodes[1].node.get_and_clear_pending_events();
3924+
assert_eq!(evs.len(), 2, "{:?}", evs);
3925+
let mut found_close = false;
3926+
let mut found_forward = false;
3927+
for ev in &evs {
3928+
match ev {
3929+
Event::ChannelClosed {
3930+
reason: ClosureReason::CommitmentTxConfirmed, ..
3931+
} => {
3932+
found_close = true;
3933+
},
3934+
Event::PaymentForwarded {
3935+
claim_from_onchain_tx, next_user_channel_id, ..
3936+
} => {
3937+
assert!(!claim_from_onchain_tx);
3938+
assert!(next_user_channel_id.is_some());
3939+
found_forward = true;
3940+
},
3941+
_ => panic!("Unexpected event: {:?}", ev),
3942+
}
3943+
}
3944+
assert!(found_close && found_forward);
3945+
// The background events replayed mons_added updates to chain_monitor.
3946+
check_added_monitors(&nodes[1], mons_added);
3947+
}
39093948
}
39103949
nodes[1].node.timer_tick_occurred();
3911-
check_added_monitors(&nodes[1], mons_added);
3950+
if hold_post_reload_mon_update {
3951+
check_added_monitors(&nodes[1], mons_added);
3952+
} else if close_chans_before_reload {
3953+
// For close_chans_before_reload with hold=false, the background events are
3954+
// replayed during timer_tick (first process_background_events call).
3955+
check_added_monitors(&nodes[1], mons_added);
3956+
} else {
3957+
// For !close_chans_before_reload with hold=false, the background events were
3958+
// already replayed during check_closed_event above. timer_tick is a no-op for
3959+
// monitor updates.
3960+
check_added_monitors(&nodes[1], 0);
3961+
}
39123962

39133963
// Finally, check that B created a payment preimage transaction and close out the payment.
39143964
let bs_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
@@ -3923,44 +3973,64 @@ fn do_test_durable_preimages_on_closed_channel(
39233973
check_closed_broadcast(&nodes[0], 1, false);
39243974
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
39253975

3976+
if close_chans_before_reload && !hold_post_reload_mon_update {
3977+
// For close_chans_before_reload with hold=false, the deferred completions
3978+
// haven't been processed yet. Trigger process_pending_monitor_events now.
3979+
let _ = nodes[1].node.get_and_clear_pending_msg_events();
3980+
check_added_monitors(&nodes[1], 0);
3981+
}
3982+
39263983
if !close_chans_before_reload || close_only_a {
39273984
// Make sure the B<->C channel is still alive and well by sending a payment over it.
39283985
let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]);
39293986
reconnect_args.pending_responding_commitment_signed.1 = true;
3930-
// The B<->C `ChannelMonitorUpdate` shouldn't be allowed to complete, which is the
3931-
// equivalent to the responding `commitment_signed` being a duplicate for node B, thus we
3932-
// need to set the `pending_responding_commitment_signed_dup` flag.
3933-
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3987+
if hold_post_reload_mon_update {
3988+
// When the A-B update is still InProgress, B-C monitor updates are blocked,
3989+
// so the responding commitment_signed is a duplicate that generates no update.
3990+
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3991+
}
39343992
reconnect_args.pending_raa.1 = true;
39353993

39363994
reconnect_nodes(reconnect_args);
39373995
}
39383996

3939-
// Once the blocked `ChannelMonitorUpdate` *finally* completes, the pending
3940-
// `PaymentForwarded` event will finally be released.
3941-
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
3942-
nodes[1].chain_monitor.chain_monitor.force_channel_monitor_updated(chan_id_ab, ab_update_id);
3997+
if hold_post_reload_mon_update {
3998+
// When the persister returned InProgress, we need to manually complete the
3999+
// A-B monitor update to unblock the PaymentForwarded completion action.
4000+
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
4001+
nodes[1]
4002+
.chain_monitor
4003+
.chain_monitor
4004+
.force_channel_monitor_updated(chan_id_ab, ab_update_id);
4005+
}
39434006

39444007
// If the A<->B channel was closed before we reload, we'll replay the claim against it on
39454008
// reload, causing the `PaymentForwarded` event to get replayed.
39464009
let evs = nodes[1].node.get_and_clear_pending_events();
3947-
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 });
3948-
for ev in evs {
3949-
if let Event::PaymentForwarded { claim_from_onchain_tx, next_user_channel_id, .. } = ev {
3950-
if !claim_from_onchain_tx {
3951-
// If the outbound channel is still open, the `next_user_channel_id` should be available.
3952-
// This was previously broken.
3953-
assert!(next_user_channel_id.is_some())
4010+
if !close_chans_before_reload && !hold_post_reload_mon_update {
4011+
// PaymentForwarded already fired during check_closed_event above.
4012+
assert!(evs.is_empty(), "{:?}", evs);
4013+
} else {
4014+
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 }, "{:?}", evs);
4015+
for ev in evs {
4016+
if let Event::PaymentForwarded { claim_from_onchain_tx, next_user_channel_id, .. } = ev
4017+
{
4018+
if !claim_from_onchain_tx {
4019+
// If the outbound channel is still open, the `next_user_channel_id` should
4020+
// be available. This was previously broken.
4021+
assert!(next_user_channel_id.is_some())
4022+
}
4023+
} else {
4024+
panic!("Unexpected event: {:?}", ev);
39544025
}
3955-
} else {
3956-
panic!();
39574026
}
39584027
}
39594028

39604029
if !close_chans_before_reload || close_only_a {
3961-
// Once we call `process_pending_events` the final `ChannelMonitor` for the B<->C channel
3962-
// will fly, removing the payment preimage from it.
3963-
check_added_monitors(&nodes[1], 1);
4030+
if hold_post_reload_mon_update {
4031+
// The B-C monitor update from the completion action fires now.
4032+
check_added_monitors(&nodes[1], 1);
4033+
}
39644034
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
39654035
send_payment(&nodes[1], &[&nodes[2]], 100_000);
39664036
}
@@ -5178,15 +5248,17 @@ fn test_mpp_claim_to_holding_cell() {
51785248
}
51795249

51805250
#[test]
5181-
#[should_panic(expected = "Watch::update_channel returned Completed while prior updates are still InProgress")]
5182-
fn test_monitor_update_fail_after_funding_spend() {
5183-
// When a counterparty commitment transaction confirms (funding spend), the
5184-
// ChannelMonitor sets funding_spend_seen. If a commitment_signed from the
5185-
// counterparty is then processed (a race between chain events and message
5186-
// processing), update_monitor returns Err because no_further_updates_allowed()
5187-
// is true. ChainMonitor overrides the result to InProgress, permanently
5188-
// freezing the channel. A subsequent preimage claim returning Completed then
5189-
// triggers the per-channel assertion.
5251+
fn test_monitor_update_after_funding_spend() {
5252+
// Test that monitor updates still work after a funding spend is detected by the
5253+
// ChainMonitor but before ChannelManager has processed the corresponding block.
5254+
//
5255+
// When the counterparty commitment transaction confirms (funding spend), the
5256+
// ChannelMonitor sets funding_spend_seen and no_further_updates_allowed() returns
5257+
// true. ChainMonitor overrides all subsequent update_channel results to InProgress
5258+
// to freeze the channel. These overridden updates complete via deferred completions
5259+
// in the next release_pending_monitor_events cycle, together with the force-close
5260+
// MonitorEvent, so that MonitorUpdateCompletionActions (like PaymentClaimed) can
5261+
// still fire.
51905262
let chanmon_cfgs = create_chanmon_cfgs(2);
51915263
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
51925264
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
@@ -5197,7 +5269,7 @@ fn test_monitor_update_fail_after_funding_spend() {
51975269
let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1);
51985270

51995271
// Route payment 1 fully so B can claim it later.
5200-
let (payment_preimage_1, _payment_hash_1, ..) =
5272+
let (payment_preimage_1, payment_hash_1, ..) =
52015273
route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
52025274

52035275
// Get A's commitment tx (this is the "counterparty" commitment from B's perspective).
@@ -5206,12 +5278,14 @@ fn test_monitor_update_fail_after_funding_spend() {
52065278

52075279
// Confirm A's commitment tx on B's chain_monitor ONLY (not on B's ChannelManager).
52085280
// This sets funding_spend_seen in the monitor, making no_further_updates_allowed() true.
5281+
// We also update the best block on the chain_monitor so the broadcaster height is
5282+
// consistent when claiming HTLCs.
52095283
let (block_hash, height) = nodes[1].best_block_info();
52105284
let block = create_dummy_block(block_hash, height + 1, vec![as_commitment_tx[0].clone()]);
52115285
let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
5212-
nodes[1].chain_monitor.chain_monitor.transactions_confirmed(
5213-
&block.header, &txdata, height + 1,
5214-
);
5286+
nodes[1].chain_monitor.chain_monitor.transactions_confirmed(&block.header, &txdata, height + 1);
5287+
nodes[1].chain_monitor.chain_monitor.best_block_updated(&block.header, height + 1);
5288+
nodes[1].blocks.lock().unwrap().push((block, height + 1));
52155289

52165290
// Send payment 2 from A to B.
52175291
let (route, payment_hash_2, _, payment_secret_2) =
@@ -5233,15 +5307,38 @@ fn test_monitor_update_fail_after_funding_spend() {
52335307

52345308
nodes[1].node.handle_update_add_htlc(node_a_id, &payment_event.msgs[0]);
52355309

5236-
// B processes commitment_signed. The monitor's update_monitor succeeds on the
5237-
// update steps, but returns Err at the end because no_further_updates_allowed()
5238-
// is true (funding_spend_seen). ChainMonitor overrides the result to InProgress.
5310+
// B processes commitment_signed. The monitor applies the update but returns Err
5311+
// because no_further_updates_allowed() is true. ChainMonitor overrides to InProgress,
5312+
// freezing the channel.
52395313
nodes[1].node.handle_commitment_signed(node_a_id, &payment_event.commitment_msg[0]);
52405314
check_added_monitors(&nodes[1], 1);
5241-
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
52425315

5243-
// B claims payment 1. The PaymentPreimage monitor update returns Completed
5244-
// (update_monitor succeeds for preimage, and persister returns Completed),
5245-
// but the prior InProgress from the commitment_signed is still pending.
5316+
// B claims payment 1. This triggers process_pending_monitor_events internally, which
5317+
// processes the deferred completions and the force-close MonitorEvent together.
5318+
// ChainMonitor also overrides the preimage update to InProgress (channel is post-close),
5319+
// so no Completed-while-InProgress assertion fires.
52465320
nodes[1].node.claim_funds(payment_preimage_1);
5321+
check_added_monitors(&nodes[1], 1);
5322+
5323+
// Processing events resolves the deferred completions and force-close together.
5324+
// We get both a PaymentClaimed and a ChannelClosed event.
5325+
let events = nodes[1].node.get_and_clear_pending_events();
5326+
assert_eq!(events.len(), 2);
5327+
match &events[0] {
5328+
Event::PaymentClaimed { payment_hash, amount_msat, .. } => {
5329+
assert_eq!(payment_hash_1, *payment_hash);
5330+
assert_eq!(1_000_000, *amount_msat);
5331+
},
5332+
_ => panic!("Unexpected event: {:?}", events[0]),
5333+
}
5334+
match &events[1] {
5335+
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. } => {},
5336+
_ => panic!("Unexpected event: {:?}", events[1]),
5337+
}
5338+
5339+
// The deferred completions unfreeze the channel, then the force-close MonitorEvent
5340+
// closes it. This generates a ChannelForceClosed monitor update and pending messages
5341+
// (revoke_and_ack, commitment update, error, channel update). Clear them.
5342+
check_added_monitors(&nodes[1], 1);
5343+
nodes[1].node.get_and_clear_pending_msg_events();
52475344
}

0 commit comments

Comments
 (0)