Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions lightning/src/chain/chainmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1279,15 +1279,17 @@ where
/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
pub fn flush(&self, count: usize, logger: &L) {
let _guard = self.flush_lock.lock().unwrap();
if count > 0 {
log_info!(logger, "Flushing up to {} monitor operations", count);
if count == 0 {
return;
}
log_info!(logger, "Flushing up to {} monitor operations", count);
for _ in 0..count {
let mut queue = self.pending_ops.lock().unwrap();
let op = match queue.pop_front() {
Some(op) => op,
None => {
debug_assert!(false, "flush count exceeded queue length");
log_error!(logger, "flush count exceeded queue length");
return;
},
Comment on lines +1290 to +1294
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In release builds, this silently returns without logging when count exceeds the queue length. This can happen if flush is called concurrently (e.g., from TestChainMonitor::release_pending_monitor_events auto-flush racing with the background processor's explicit flush). A log_error! before the return would make such races observable in production, rather than silently under-flushing:

Suggested change
None => {
debug_assert!(false, "flush count exceeded queue length");
return;
},
None => {
debug_assert!(false, "flush count exceeded queue length");
log_error!(logger, "flush count exceeded queue length");
return;
},

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ADded

};
Expand Down Expand Up @@ -1341,6 +1343,10 @@ where
},
}
}

// A flushed monitor update may have generated new events, so assume we have
// some and wake the event processor.
self.event_notifier.notify();
}
}

Expand Down
187 changes: 182 additions & 5 deletions lightning/src/chain/channelmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1378,8 +1378,8 @@ pub(crate) struct ChannelMonitorImpl<Signer: EcdsaChannelSigner> {
/// In-memory only HTLC ids used to track upstream HTLCs that have been failed backwards due to
/// a downstream channel force-close remaining unconfirmed by the time the upstream timeout
/// expires. This is used to tell us we already generated an event to fail this HTLC back
/// during a previous block scan.
failed_back_htlc_ids: HashSet<SentHTLCId>,
/// during a previous block scan. Not serialized.
pub(crate) failed_back_htlc_ids: HashSet<SentHTLCId>,

// The auxiliary HTLC data associated with a holder commitment transaction. This includes
// non-dust HTLC sources, along with dust HTLCs and their sources. Note that this assumes any
Expand Down Expand Up @@ -4299,6 +4299,55 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {

self.latest_update_id = updates.update_id;

// If a counterparty commitment update was applied while the funding output has already
// been spent on-chain, fail back the outbound HTLCs from the update. This handles the
// race where a monitor update is dispatched before the channel force-closes but only
// applied after the commitment transaction confirms.
for update in updates.updates.iter() {
match update {
ChannelMonitorUpdateStep::LatestCounterpartyCommitmentTXInfo {
htlc_outputs, ..
} => {
// Only outbound HTLCs have a source; inbound ones are `None`
// and skipped by the `filter_map`.
self.fail_htlcs_from_update_after_funding_spend(
htlc_outputs.iter().filter_map(|(htlc, source)| {
source.as_ref().map(|s| (&**s, htlc.payment_hash, htlc.amount_msat))
}),
logger,
);
},
ChannelMonitorUpdateStep::LatestCounterpartyCommitment {
commitment_txs, htlc_data,
} => {
// On a counterparty commitment, `offered=false` means offered by
// us (outbound). `nondust_htlc_sources` contains sources only for
// these outbound nondust HTLCs, matching the filter order.
debug_assert_eq!(
commitment_txs[0].nondust_htlcs().iter()
.filter(|htlc| !htlc.offered).count(),
htlc_data.nondust_htlc_sources.len(),
);
let nondust = commitment_txs[0]
.nondust_htlcs()
.iter()
.filter(|htlc| !htlc.offered)
.zip(htlc_data.nondust_htlc_sources.iter())
.map(|(htlc, source)| (source, htlc.payment_hash, htlc.amount_msat));
// Only outbound dust HTLCs have a source; inbound ones are `None`
// and skipped by the `filter_map`.
let dust = htlc_data.dust_htlcs.iter().filter_map(|(htlc, source)| {
source.as_ref().map(|s| (s, htlc.payment_hash, htlc.amount_msat))
});
self.fail_htlcs_from_update_after_funding_spend(
nondust.chain(dust),
logger,
);
},
_ => {},
}
}

// Refuse updates after we've detected a spend onchain (or if the channel was otherwise
// closed), but only if the update isn't the kind of update we expect to see after channel
// closure.
Expand Down Expand Up @@ -4345,6 +4394,121 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
self.funding_spend_seen || self.lockdown_from_offchain || self.holder_tx_signed
}

/// Given outbound HTLCs from a counterparty commitment update, checks if the funding output
/// has been spent on-chain. If so, creates `OnchainEvent::HTLCUpdate` entries to fail back
/// HTLCs that weren't already known to the monitor.
///
/// This handles the race where a `ChannelMonitorUpdate` with a new counterparty commitment
/// is dispatched (e.g., via deferred writes) before the channel force-closes, but only
/// applied to the in-memory monitor after the commitment transaction has already confirmed.
///
/// Only truly new HTLCs (not present in any previously-known commitment) need to be failed
/// here. HTLCs that were already tracked by the monitor will be handled by the existing
/// `fail_unbroadcast_htlcs` logic when the spending transaction confirms.
fn fail_htlcs_from_update_after_funding_spend<'a, L: Logger>(
&mut self, htlcs: impl Iterator<Item = (&'a HTLCSource, PaymentHash, u64)>,
logger: &WithContext<L>,
) {
let pending_spend_entry = self
.onchain_events_awaiting_threshold_conf
.iter()
.find(|event| matches!(event.event, OnchainEvent::FundingSpendConfirmation { .. }))
.map(|entry| (entry.txid, entry.transaction.clone(), entry.height, entry.block_hash));
if self.funding_spend_confirmed.is_none() && pending_spend_entry.is_none() {
return;
}

// Check HTLC sources against all previously-known commitments to find truly new
// ones. After the update has been applied, `prev_counterparty_commitment_txid` holds
// what was `current` before this update, so it represents the already-known
// counterparty state. HTLCs already present in any of these will be handled by
// `fail_unbroadcast_htlcs` when the spending transaction confirms.
let is_source_known = |source: &HTLCSource| {
if let Some(ref txid) = self.funding.prev_counterparty_commitment_txid {
if let Some(htlc_list) = self.funding.counterparty_claimable_outpoints.get(txid) {
if htlc_list.iter().any(|(_, s)| s.as_ref().map(|s| s.as_ref()) == Some(source))
{
return true;
}
}
}
Comment on lines +4426 to +4434
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: is_source_known checks self.funding.prev_counterparty_commitment_txid but not self.pending_funding[*].prev_counterparty_commitment_txid. In a splice scenario where the confirmed funding spend is for a pending funding output, an HTLC that was already known in that pending funding's previous counterparty commitment would be incorrectly treated as "new" and failed. Worth a TODO or a check against pending funding scopes as well?

// Note that we don't care about the case where a counterparty sent us a fresh local commitment transaction
// post-closure (with the `ChannelManager` still operating the channel). First of all we only care about
// resolving outbound HTLCs, which fundamentally have to be initiated by us. However we also don't mind
// looking at the current holder commitment transaction's HTLCs as any fresh outbound HTLCs will have to
// first come in a locally-initiated update to the counterparty's commitment transaction which we can, by
// refusing to apply the update, prevent the counterparty from ever seeing (as no messages can be sent until
// the monitor is updated). Thus, the HTLCs we care about can never appear in the holder commitment
// transaction.
Comment on lines +4435 to +4442
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: The comment concludes "the HTLCs we care about can never appear in the holder commitment transaction" — yet lines 4443-4451 proceed to check them anyway. Could you rephrase to say this is a defensive/belt-and-suspenders check? Currently it reads like the check below shouldn't be needed at all.

if holder_commitment_htlcs!(self, CURRENT_WITH_SOURCES).any(|(_, s)| s == Some(source))
{
return true;
}
if let Some(mut iter) = holder_commitment_htlcs!(self, PREV_WITH_SOURCES) {
if iter.any(|(_, s)| s == Some(source)) {
return true;
}
}
false
};
Comment on lines +4421 to +4453
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The is_source_known closure only checks prev_counterparty_commitment_txid (what was current before this update), not all entries in counterparty_claimable_outpoints. The comment at line 4413 explains the reasoning, and the holder commitment fallback covers additional cases.

Just want to confirm a subtle assumption: this correctness relies on the fact that the monitor always holds the immediately-prior counterparty commitment in prev after the update is applied (line 3502). If a single ChannelMonitorUpdate ever contained two LatestCounterpartyCommitment steps, the second step's iteration here would check prev (which is now the first step's commitment, not the original pre-batch state), and HTLCs from the first step would appear "known" even if they were truly new. The code is correct as long as each ChannelMonitorUpdate contains at most one counterparty commitment step — is that guaranteed at the ChannelManager level?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Analysis shows it isn't possible to have two LatestCounterpartyCommitment in the same update. Interesting find though.

for (source, payment_hash, amount_msat) in htlcs {
if is_source_known(source) {
continue;
}
if self.counterparty_fulfilled_htlcs.get(&SentHTLCId::from_source(source)).is_some() {
continue;
}
let htlc_value_satoshis = Some(amount_msat / 1000);
let logger = WithContext::from(logger, None, None, Some(payment_hash));
// Defensively mark the HTLC as failed back so the expiry-based failure
// path in `block_connected` doesn't generate a duplicate `HTLCUpdate`
// event for the same source.
self.failed_back_htlc_ids.insert(SentHTLCId::from_source(source));
if let Some(confirmed_txid) = self.funding_spend_confirmed {
// Funding spend already confirmed past ANTI_REORG_DELAY: resolve immediately.
log_trace!(
logger,
"Failing HTLC from late counterparty commitment update immediately \
(funding spend already confirmed)"
);
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
payment_hash,
payment_preimage: None,
source: source.clone(),
htlc_value_satoshis,
}));
self.htlcs_resolved_on_chain.push(IrrevocablyResolvedHTLC {
commitment_tx_output_idx: None,
resolving_txid: Some(confirmed_txid),
resolving_tx: None,
payment_preimage: None,
});
} else {
// Funding spend still awaiting ANTI_REORG_DELAY: queue the failure.
let (txid, transaction, height, block_hash) = pending_spend_entry.clone().unwrap();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit/perf: pending_spend_entry.clone().unwrap() is called inside the per-HTLC loop, cloning the Option<Transaction> each iteration. For a commitment with many HTLCs, this does N clones of potentially large transaction data. Consider destructuring once before the loop:

Suggested change
let (txid, transaction, height, block_hash) = pending_spend_entry.clone().unwrap();
let pending_data = pending_spend_entry.clone();
for (source, payment_hash, amount_msat) in htlcs {
if is_source_known(source) {
continue;
}
if self.counterparty_fulfilled_htlcs.get(&SentHTLCId::from_source(source)).is_some() {
continue;
}
let htlc_value_satoshis = Some(amount_msat / 1000);
let logger = WithContext::from(logger, None, None, Some(payment_hash));
// Defensively mark the HTLC as failed back so the expiry-based failure
// path in `block_connected` doesn't generate a duplicate `HTLCUpdate`
// event for the same source.
self.failed_back_htlc_ids.insert(SentHTLCId::from_source(source));
if let Some(confirmed_txid) = self.funding_spend_confirmed {

Or alternatively, extract the pending_data tuple once and clone only transaction per entry (Txid and BlockHash are Copy).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if worth it, the tx is the large part of the clone anyway.

let entry = OnchainEventEntry {
txid,
transaction,
height,
block_hash,
event: OnchainEvent::HTLCUpdate {
source: source.clone(),
payment_hash,
htlc_value_satoshis,
commitment_tx_output_idx: None,
},
};
log_trace!(
logger,
"Failing HTLC from late counterparty commitment update, \
waiting for confirmation (at height {})",
entry.confirmation_threshold()
);
self.onchain_events_awaiting_threshold_conf.push(entry);
}
}
}

fn get_latest_update_id(&self) -> u64 {
self.latest_update_id
}
Expand Down Expand Up @@ -6834,7 +6998,7 @@ mod tests {
use bitcoin::{Sequence, Witness};

use crate::chain::chaininterface::LowerBoundedFeeEstimator;
use crate::events::ClosureReason;
use crate::events::{ClosureReason, Event};

use super::ChannelMonitorUpdateStep;
use crate::chain::channelmonitor::{ChannelMonitor, WithChannelMonitor};
Expand Down Expand Up @@ -6957,8 +7121,21 @@ mod tests {
check_spends!(htlc_txn[1], broadcast_tx);

check_closed_broadcast(&nodes[1], 1, true);
check_closed_event(&nodes[1], 1, ClosureReason::CommitmentTxConfirmed, &[nodes[0].node.get_our_node_id()], 100000);
check_added_monitors(&nodes[1], 1);
if !use_local_txn {
// When the counterparty commitment confirms, FundingSpendConfirmation matures
// immediately (no CSV delay), so funding_spend_confirmed is set. The new payment's
// commitment update then triggers immediate HTLC failure, generating payment events
// alongside the channel close event.
Comment on lines +7125 to +7128
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"FundingSpendConfirmation matures immediately (no CSV delay)" is misleading — FundingSpendConfirmation still requires ANTI_REORG_DELAY blocks to mature (the on_local_output_csv only adds additional delay beyond ANTI_REORG_DELAY). It appears that the payment failure events here actually come from the ChannelManager's force-close logic (failing the in-flight HTLC when CommitmentTxConfirmed is processed), not from the monitor's immediate HTLC failure path. Consider clarifying the comment.

let events = nodes[1].node.get_and_clear_pending_events();
assert_eq!(events.len(), 3);
assert!(events.iter().any(|e| matches!(e, Event::PaymentPathFailed { .. })));
assert!(events.iter().any(|e| matches!(e, Event::PaymentFailed { .. })));
assert!(events.iter().any(|e| matches!(e, Event::ChannelClosed { .. })));
check_added_monitors(&nodes[1], 2);
} else {
check_closed_event(&nodes[1], 1, ClosureReason::CommitmentTxConfirmed, &[nodes[0].node.get_our_node_id()], 100000);
check_added_monitors(&nodes[1], 1);
}
}

#[test]
Expand Down
Loading
Loading