diff --git a/e2e-tests/src/lib.rs b/e2e-tests/src/lib.rs index 8b34fd2..1eda51e 100644 --- a/e2e-tests/src/lib.rs +++ b/e2e-tests/src/lib.rs @@ -140,6 +140,9 @@ max_client_to_self_delay = 1024 min_payment_size_msat = 0 max_payment_size_msat = 1000000000 client_trusts_lsp = true + +[metrics] +enabled = true "#, storage_dir = storage_dir.display(), ); diff --git a/e2e-tests/tests/e2e.rs b/e2e-tests/tests/e2e.rs index 7744a0d..f7bc1df 100644 --- a/e2e-tests/tests/e2e.rs +++ b/e2e-tests/tests/e2e.rs @@ -634,3 +634,63 @@ async fn test_forwarded_payment_event() { node_c.stop().unwrap(); } + +#[tokio::test] +async fn test_metrics_endpoint() { + let bitcoind = TestBitcoind::new(); + + // Test with metrics enabled + let server_a = LdkServerHandle::start(&bitcoind).await; + let server_b = LdkServerHandle::start(&bitcoind).await; + + let client = server_a.client(); + let metrics_result = client.get_metrics().await; + + assert!(metrics_result.is_ok(), "Expected metrics to succeed when enabled"); + let metrics = metrics_result.unwrap(); + + // Verify initial state + assert!(metrics.contains("ldk_server_total_peers_count 0")); + assert!(metrics.contains("ldk_server_total_payments_count 0")); + assert!(metrics.contains("ldk_server_total_successful_payments_count 0")); + assert!(metrics.contains("ldk_server_total_pending_payments_count 0")); + assert!(metrics.contains("ldk_server_total_failed_payments_count 0")); + assert!(metrics.contains("ldk_server_total_channels_count 0")); + assert!(metrics.contains("ldk_server_total_public_channels_count 0")); + assert!(metrics.contains("ldk_server_total_private_channels_count 0")); + assert!(metrics.contains("ldk_server_total_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_spendable_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0")); + assert!(metrics.contains("ldk_server_total_lightning_balance_sats 0")); + + // Set up channel and make a payment to trigger metric update + setup_funded_channel(&bitcoind, &server_a, &server_b, 100_000).await; + + let invoice_resp = server_b + .client() + .bolt11_receive(Bolt11ReceiveRequest { + amount_msat: Some(10_000_000), + description: Some(Bolt11InvoiceDescription { + kind: Some(bolt11_invoice_description::Kind::Direct("metrics test".to_string())), + }), + expiry_secs: 3600, + }) + .await + .unwrap(); + + run_cli(&server_a, &["bolt11-send", &invoice_resp.invoice]); + + // Wait to receive the PaymentSuccessful event and update metrics + let timeout = Duration::from_secs(30); + let start = std::time::Instant::now(); + loop { + let metrics = client.get_metrics().await.unwrap(); + if metrics.contains("ldk_server_total_successful_payments_count 1") { + break; + } + if start.elapsed() > timeout { + panic!("Timed out waiting for payment metrics to update"); + } + tokio::time::sleep(Duration::from_millis(500)).await; + } +} diff --git a/ldk-server-client/src/client.rs b/ldk-server-client/src/client.rs index 427274f..bbad46e 100644 --- a/ldk-server-client/src/client.rs +++ b/ldk-server-client/src/client.rs @@ -32,13 +32,15 @@ use ldk_server_protos::api::{ use ldk_server_protos::endpoints::{ BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH, CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH, - FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, - GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, - LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, - ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, - SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, + FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH, + GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, + GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH, + LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH, + ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, + SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, }; use ldk_server_protos::error::{ErrorCode, ErrorResponse}; +use prost::bytes::Bytes; use prost::Message; use reqwest::header::CONTENT_TYPE; use reqwest::{Certificate, Client}; @@ -62,6 +64,11 @@ pub struct LdkServerClient { api_key: String, } +enum RequestType { + Get, + Post, +} + impl LdkServerClient { /// Constructs a [`LdkServerClient`] using `base_url` as the ldk-server endpoint. /// @@ -107,6 +114,18 @@ impl LdkServerClient { self.post_request(&request, &url).await } + /// Retrieve the node metrics in Prometheus format. + pub async fn get_metrics(&self) -> Result { + let url = format!("https://{}/{GET_METRICS_PATH}", self.base_url); + let payload = self.make_request(&url, RequestType::Get, None, false).await?; + String::from_utf8(payload.to_vec()).map_err(|e| { + LdkServerError::new( + InternalError, + format!("Failed to decode metrics response as string: {}", e), + ) + }) + } + /// Retrieves an overview of all known balances. /// For API contract/usage, refer to docs for [`GetBalancesRequest`] and [`GetBalancesResponse`]. pub async fn get_balances( @@ -363,18 +382,37 @@ impl LdkServerClient { &self, request: &Rq, url: &str, ) -> Result { let request_body = request.encode_to_vec(); - let auth_header = self.compute_auth_header(&request_body); - let response_raw = self - .client - .post(url) - .header(CONTENT_TYPE, APPLICATION_OCTET_STREAM) - .header("X-Auth", auth_header) - .body(request_body) - .send() - .await - .map_err(|e| { - LdkServerError::new(InternalError, format!("HTTP request failed: {}", e)) - })?; + let payload = self.make_request(url, RequestType::Post, Some(request_body), true).await?; + Rs::decode(&payload[..]).map_err(|e| { + LdkServerError::new(InternalError, format!("Failed to decode success response: {}", e)) + }) + } + + async fn make_request( + &self, url: &str, request_type: RequestType, body: Option>, authenticated: bool, + ) -> Result { + let builder = match request_type { + RequestType::Get => self.client.get(url), + RequestType::Post => self.client.post(url), + }; + + let builder = if authenticated { + let body_for_auth = body.as_deref().unwrap_or(&[]); + let auth_header = self.compute_auth_header(body_for_auth); + builder.header("X-Auth", auth_header) + } else { + builder + }; + + let builder = if let Some(body_content) = body { + builder.header(CONTENT_TYPE, APPLICATION_OCTET_STREAM).body(body_content) + } else { + builder + }; + + let response_raw = builder.send().await.map_err(|e| { + LdkServerError::new(InternalError, format!("HTTP request failed: {}", e)) + })?; let status = response_raw.status(); let payload = response_raw.bytes().await.map_err(|e| { @@ -382,12 +420,7 @@ impl LdkServerClient { })?; if status.is_success() { - Ok(Rs::decode(&payload[..]).map_err(|e| { - LdkServerError::new( - InternalError, - format!("Failed to decode success response: {}", e), - ) - })?) + Ok(payload) } else { let error_response = ErrorResponse::decode(&payload[..]).map_err(|e| { LdkServerError::new( diff --git a/ldk-server-protos/src/endpoints.rs b/ldk-server-protos/src/endpoints.rs index d47f5c5..f737278 100644 --- a/ldk-server-protos/src/endpoints.rs +++ b/ldk-server-protos/src/endpoints.rs @@ -36,3 +36,4 @@ pub const GRAPH_LIST_CHANNELS_PATH: &str = "GraphListChannels"; pub const GRAPH_GET_CHANNEL_PATH: &str = "GraphGetChannel"; pub const GRAPH_LIST_NODES_PATH: &str = "GraphListNodes"; pub const GRAPH_GET_NODE_PATH: &str = "GraphGetNode"; +pub const GET_METRICS_PATH: &str = "metrics"; diff --git a/ldk-server/ldk-server-config.toml b/ldk-server/ldk-server-config.toml index 5fc8a92..3686a2a 100644 --- a/ldk-server/ldk-server-config.toml +++ b/ldk-server/ldk-server-config.toml @@ -79,3 +79,7 @@ client_trusts_lsp = false ## A token we may require to be sent by the clients. ## If set, only requests matching this token will be accepted. (uncomment and set if required) # require_token = "" + +# Metrics settings +[metrics] +enabled = false diff --git a/ldk-server/src/main.rs b/ldk-server/src/main.rs index e6bce80..0a308ce 100644 --- a/ldk-server/src/main.rs +++ b/ldk-server/src/main.rs @@ -50,6 +50,7 @@ use crate::io::persist::{ use crate::service::NodeService; use crate::util::config::{load_config, ArgsConfig, ChainSource}; use crate::util::logger::ServerLogger; +use crate::util::metrics::{Metrics, BUILD_METRICS_INTERVAL}; use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto}; use crate::util::systemd; use crate::util::tls::get_or_generate_tls_config; @@ -265,6 +266,27 @@ fn main() { } }; let event_node = Arc::clone(&node); + + let metrics: Option> = if config_file.metrics_enabled { + let metrics_node = Arc::clone(&node); + let mut interval = tokio::time::interval(BUILD_METRICS_INTERVAL); + let metrics = Arc::new(Metrics::new()); + let metrics_bg = Arc::clone(&metrics); + + // Initialize metrics that are event-driven to ensure they start with correct values from persistence + metrics.initialize_payment_metrics(&metrics_node); + + runtime.spawn(async move { + loop { + interval.tick().await; + metrics_bg.update_all_pollable_metrics(&metrics_node); + } + }); + Some(metrics) + } else { + None + }; + let rest_svc_listener = TcpListener::bind(config_file.rest_service_addr) .await .expect("Failed to bind listening port"); @@ -331,6 +353,10 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_payments_count(true); + } }, Event::PaymentFailed {payment_id, ..} => { let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1"); @@ -342,6 +368,10 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_payments_count(false); + } }, Event::PaymentClaimable {payment_id, ..} => { if let Some(payment_details) = event_node.payment(&payment_id) { @@ -426,7 +456,7 @@ fn main() { res = rest_svc_listener.accept() => { match res { Ok((stream, _)) => { - let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone()); + let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone(), metrics.clone()); let acceptor = tls_acceptor.clone(); runtime.spawn(async move { match acceptor.accept(stream).await { diff --git a/ldk-server/src/service.rs b/ldk-server/src/service.rs index 3a43b63..607bd6b 100644 --- a/ldk-server/src/service.rs +++ b/ldk-server/src/service.rs @@ -21,11 +21,12 @@ use ldk_node::Node; use ldk_server_protos::endpoints::{ BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH, CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH, - FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, - GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, - LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, - ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, - SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, + FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH, + GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, + GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH, + LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH, + ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, + SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, }; use prost::Message; @@ -59,6 +60,7 @@ use crate::api::spontaneous_send::handle_spontaneous_send_request; use crate::api::update_channel_config::handle_update_channel_config_request; use crate::api::verify_signature::handle_verify_signature_request; use crate::io::persist::paginated_kv_store::PaginatedKVStore; +use crate::util::metrics::Metrics; use crate::util::proto_adapter::to_error_response; // Maximum request body size: 10 MB @@ -70,13 +72,15 @@ pub struct NodeService { node: Arc, paginated_kv_store: Arc, api_key: String, + metrics: Option>, } impl NodeService { pub(crate) fn new( node: Arc, paginated_kv_store: Arc, api_key: String, + metrics: Option>, ) -> Self { - Self { node, paginated_kv_store, api_key } + Self { node, paginated_kv_store, api_key, metrics } } } @@ -160,6 +164,29 @@ impl Service> for NodeService { type Future = Pin> + Send>>; fn call(&self, req: Request) -> Self::Future { + // Handle metrics endpoint separately to bypass auth and return plain text + if req.method() == hyper::Method::GET + && req.uri().path().len() > 1 + && &req.uri().path()[1..] == GET_METRICS_PATH + { + if let Some(metrics) = &self.metrics { + let metrics = Arc::clone(metrics); + return Box::pin(async move { + Ok(Response::builder() + .header("Content-Type", "text/plain") + .body(Full::new(Bytes::from(metrics.gather_metrics()))) + .unwrap()) + }); + } else { + return Box::pin(async move { + Ok(Response::builder() + .status(StatusCode::NOT_FOUND) + .body(Full::new(Bytes::from("Not Found"))) + .unwrap()) + }); + } + } + // Extract auth params from headers (validation happens after body is read) let auth_params = match extract_auth_params(&req) { Ok(params) => params, diff --git a/ldk-server/src/util/config.rs b/ldk-server/src/util/config.rs index dbc452a..5b635ec 100644 --- a/ldk-server/src/util/config.rs +++ b/ldk-server/src/util/config.rs @@ -53,6 +53,7 @@ pub struct Config { pub log_level: LevelFilter, pub log_file_path: Option, pub pathfinding_scores_source_url: Option, + pub metrics_enabled: bool, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -91,6 +92,7 @@ struct ConfigBuilder { log_level: Option, log_file_path: Option, pathfinding_scores_source_url: Option, + metrics_enabled: Option, } impl ConfigBuilder { @@ -150,6 +152,10 @@ impl ConfigBuilder { hosts: tls.hosts.unwrap_or_default(), }); } + + if let Some(metrics) = toml.metrics { + self.metrics_enabled = metrics.enabled.or(self.metrics_enabled); + } } fn merge_args(&mut self, args: &ArgsConfig) { @@ -192,6 +198,10 @@ impl ConfigBuilder { if let Some(pathfinding_scores_source_url) = &args.pathfinding_scores_source_url { self.pathfinding_scores_source_url = Some(pathfinding_scores_source_url.clone()); } + + if args.metrics_enabled { + self.metrics_enabled = Some(true); + } } fn build(self) -> io::Result { @@ -343,6 +353,8 @@ impl ConfigBuilder { let pathfinding_scores_source_url = self.pathfinding_scores_source_url; + let metrics_enabled = self.metrics_enabled.unwrap_or(false); + Ok(Config { network, listening_addrs, @@ -359,6 +371,7 @@ impl ConfigBuilder { log_level, log_file_path: self.log_file_path, pathfinding_scores_source_url, + metrics_enabled, }) } } @@ -375,6 +388,7 @@ pub struct TomlConfig { liquidity: Option, log: Option, tls: Option, + metrics: Option, } #[derive(Deserialize, Serialize)] @@ -434,6 +448,11 @@ struct TomlTlsConfig { hosts: Option>, } +#[derive(Deserialize, Serialize)] +struct MetricsTomlConfig { + enabled: Option, +} + #[derive(Deserialize, Serialize)] struct LiquidityConfig { lsps2_service: Option, @@ -563,6 +582,13 @@ pub struct ArgsConfig { help = "The external scores source that is merged into the local scoring system to improve routing." )] pathfinding_scores_source_url: Option, + + #[arg( + long, + env = "LDK_SERVER_METRICS_ENABLED", + help = "The option to enable the metrics endpoint. WARNING: This endpoint is unauthenticated." + )] + metrics_enabled: bool, } pub fn load_config(args: &ArgsConfig) -> io::Result { @@ -690,6 +716,7 @@ mod tests { storage_dir_path: Some(String::from("/tmp_cli")), node_alias: Some(String::from("LDK Server CLI")), pathfinding_scores_source_url: Some(String::from("https://example.com/")), + metrics_enabled: false, } } @@ -706,6 +733,7 @@ mod tests { bitcoind_rpc_password: None, storage_dir_path: None, pathfinding_scores_source_url: None, + metrics_enabled: false, } } @@ -774,6 +802,7 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: None, + metrics_enabled: false, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -791,6 +820,7 @@ mod tests { assert_eq!(config.log_level, expected.log_level); assert_eq!(config.log_file_path, expected.log_file_path); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); // Test case where only electrum is set @@ -1084,6 +1114,7 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: Some("https://example.com/".to_string()), + metrics_enabled: false, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -1097,6 +1128,7 @@ mod tests { assert_eq!(config.rabbitmq_exchange_name, expected.rabbitmq_exchange_name); assert!(config.lsps2_service_config.is_none()); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); } #[test] @@ -1188,6 +1220,7 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: Some("https://example.com/".to_string()), + metrics_enabled: false, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -1202,6 +1235,7 @@ mod tests { #[cfg(feature = "experimental-lsps2-support")] assert_eq!(config.lsps2_service_config.is_some(), expected.lsps2_service_config.is_some()); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); } #[test] @@ -1223,4 +1257,47 @@ mod tests { let err = result.unwrap_err(); assert_eq!(err.kind(), io::ErrorKind::InvalidInput); } + + #[test] + fn test_metrics_enabled_config() { + let storage_path = std::env::temp_dir(); + let config_file_name = "test_metrics_enabled.toml"; + + let toml_config = r#" + [node] + network = "regtest" + rest_service_address = "127.0.0.1:3002" + + [bitcoind] + rpc_address = "127.0.0.1:8332" + rpc_user = "user" + rpc_password = "password" + + [metrics] + enabled = true + + [rabbitmq] + connection_string = "rabbitmq_connection_string" + exchange_name = "rabbitmq_exchange_name" + + [liquidity.lsps2_service] + advertise_service = false + channel_opening_fee_ppm = 1000 # 0.1% fee + channel_over_provisioning_ppm = 500000 # 50% extra capacity + min_channel_opening_fee_msat = 10000000 # 10,000 satoshis + min_channel_lifetime = 4320 # ~30 days + max_client_to_self_delay = 1440 # ~10 days + min_payment_size_msat = 10000000 # 10,000 satoshis + max_payment_size_msat = 25000000000 # 0.25 BTC + client_trusts_lsp = true + "#; + + fs::write(storage_path.join(config_file_name), toml_config).unwrap(); + let mut args_config = empty_args_config(); + args_config.config_file = + Some(storage_path.join(config_file_name).to_string_lossy().to_string()); + + let config = load_config(&args_config).unwrap(); + assert!(config.metrics_enabled); + } } diff --git a/ldk-server/src/util/metrics.rs b/ldk-server/src/util/metrics.rs new file mode 100644 index 0000000..3fabe20 --- /dev/null +++ b/ldk-server/src/util/metrics.rs @@ -0,0 +1,343 @@ +// This file is Copyright its original authors, visible in version control +// history. +// +// This file is licensed under the Apache License, Version 2.0 or the MIT license +// , at your option. +// You may not use this file except in accordance with one or both of these +// licenses. + +//! This module provides metrics for monitoring the LDK Server node in a Prometheus-compatible format. +//! +//! The `Metrics` struct holds atomic counters and gauges for various aspects of the node's +//! operation, such as peer connections, channels and payments statuses, and balances. +//! +//! The metrics are updated through two main mechanisms: +//! 1. **Periodic Polling**: The `update_all_pollable_metrics` function is called at a regular +//! interval (`BUILD_METRICS_INTERVAL`) to perform a full recount of metrics like peer count, +//! channels count, and balances. +//! 2. **Event-Driven Updates**: For metrics that can change frequently and where a full recount +//! would be inefficient (e.g., total_successful_payments_count), a hybrid approach is used. +//! - `initialize_payment_metrics` is called once at startup to get the accurate persisted state. +//! - `update_payments_count` is called incrementally whenever a relevant event (like +//! `PaymentSuccessful` or `PaymentFailed`) occurs. +//! +//! The `gather_metrics` function collects all current metric values and formats them into the +//! plain-text format that Prometheus scrapers expect. This output is exposed via an +//! unauthenticated `/metrics` HTTP endpoint on the rest service address. + +use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; +use std::time::Duration; + +use ldk_node::payment::PaymentStatus; +use ldk_node::Node; + +pub const BUILD_METRICS_INTERVAL: Duration = Duration::from_secs(60); + +/// Holds all the metrics that are tracked for LDK Server. +/// +/// These metrics are exposed in a Prometheus-compatible format. The values are stored +/// in atomic types to allow for safe concurrent access. +pub struct Metrics { + pub total_peers_count: AtomicI64, + pub total_payments_count: AtomicI64, + pub total_successful_payments_count: AtomicI64, + pub total_pending_payments_count: AtomicI64, + pub total_failed_payments_count: AtomicI64, + pub total_channels_count: AtomicI64, + pub total_public_channels_count: AtomicI64, + pub total_private_channels_count: AtomicI64, + pub total_onchain_balance_sats: AtomicU64, + pub spendable_onchain_balance_sats: AtomicU64, + pub total_anchor_channels_reserve_sats: AtomicU64, + pub total_lightning_balance_sats: AtomicU64, +} + +impl Metrics { + pub fn new() -> Self { + Self { + total_peers_count: AtomicI64::new(0), + total_payments_count: AtomicI64::new(0), + total_successful_payments_count: AtomicI64::new(0), + total_pending_payments_count: AtomicI64::new(0), + total_failed_payments_count: AtomicI64::new(0), + total_channels_count: AtomicI64::new(0), + total_public_channels_count: AtomicI64::new(0), + total_private_channels_count: AtomicI64::new(0), + total_onchain_balance_sats: AtomicU64::new(0), + spendable_onchain_balance_sats: AtomicU64::new(0), + total_anchor_channels_reserve_sats: AtomicU64::new(0), + total_lightning_balance_sats: AtomicU64::new(0), + } + } + + fn update_peer_count(&self, node: &Node) { + let total_peers_count = node.list_peers().len() as i64; + self.total_peers_count.store(total_peers_count, Ordering::Relaxed); + } + + pub fn update_payments_count(&self, is_successful: bool) { + if is_successful { + self.total_successful_payments_count.fetch_add(1, Ordering::Relaxed); + } else { + self.total_failed_payments_count.fetch_add(1, Ordering::Relaxed); + } + } + + pub fn initialize_payment_metrics(&self, node: &Node) { + let mut successful_payments_count = 0; + let mut failed_payments_count = 0; + let mut pending_payments_count = 0; + + for payment_details in node.list_payments() { + match payment_details.status { + PaymentStatus::Succeeded => successful_payments_count += 1, + PaymentStatus::Failed => failed_payments_count += 1, + PaymentStatus::Pending => pending_payments_count += 1, + } + } + self.total_successful_payments_count.store(successful_payments_count, Ordering::Relaxed); + self.total_failed_payments_count.store(failed_payments_count, Ordering::Relaxed); + self.total_pending_payments_count.store(pending_payments_count, Ordering::Relaxed); + } + + fn update_all_balances(&self, node: &Node) { + let all_balances = node.list_balances(); + self.total_onchain_balance_sats + .store(all_balances.total_onchain_balance_sats, Ordering::Relaxed); + + self.spendable_onchain_balance_sats + .store(all_balances.spendable_onchain_balance_sats, Ordering::Relaxed); + + self.total_anchor_channels_reserve_sats + .store(all_balances.total_anchor_channels_reserve_sats, Ordering::Relaxed); + + self.total_lightning_balance_sats + .store(all_balances.total_lightning_balance_sats, Ordering::Relaxed); + } + + pub fn update_all_pollable_metrics(&self, node: &Node) { + let all_payments = node.list_payments(); + let all_channels = node.list_channels(); + + let payments_count = all_payments.len() as i64; + self.total_payments_count.store(payments_count, Ordering::Relaxed); + + let pending_payments_count = all_payments + .iter() + .filter(|payment_details| payment_details.status == PaymentStatus::Pending) + .count() as i64; + self.total_pending_payments_count.store(pending_payments_count, Ordering::Relaxed); + + let channels_count = all_channels.len() as i64; + self.total_channels_count.store(channels_count, Ordering::Relaxed); + + let public_channels_count = + all_channels.iter().filter(|channel_details| channel_details.is_announced).count() + as i64; + self.total_public_channels_count.store(public_channels_count, Ordering::Relaxed); + + let private_channels_count = + all_channels.iter().filter(|channel_details| !channel_details.is_announced).count() + as i64; + self.total_private_channels_count.store(private_channels_count, Ordering::Relaxed); + + self.update_peer_count(node); + self.update_all_balances(node); + } + + /// Gathers all metrics and formats them into the Prometheus text-based format. + /// + /// This function is called by the `/metrics` endpoint to provide the current state + /// of all tracked metrics to a Prometheus scraper. The format is a series of lines, + /// each containing a metric name, and its value, preceded by + /// HELP and TYPE lines as per the Prometheus exposition format specification. + pub fn gather_metrics(&self) -> String { + let mut buffer = String::new(); + + fn format_metric( + buffer: &mut String, name: &str, help: &str, metric_type: &str, + value: impl std::fmt::Display, + ) { + use std::fmt::Write; + let _ = writeln!(buffer, "# HELP {} {}", name, help); + let _ = writeln!(buffer, "# TYPE {} {}", name, metric_type); + let _ = writeln!(buffer, "{} {}", name, value); + } + + format_metric( + &mut buffer, + "ldk_server_total_peers_count", + "Total number of peers", + "gauge", + self.total_peers_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_payments_count", + "Total number of payments", + "counter", + self.total_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_pending_payments_count", + "Total number of pending payments", + "gauge", + self.total_pending_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_successful_payments_count", + "Total number of successful payments", + "counter", + self.total_successful_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_failed_payments_count", + "Total number of failed payments", + "counter", + self.total_failed_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_channels_count", + "Total number of channels", + "gauge", + self.total_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_public_channels_count", + "Total number of public channels", + "gauge", + self.total_public_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_private_channels_count", + "Total number of private channels", + "gauge", + self.total_private_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_onchain_balance_sats", + "Total onchain balance in sats", + "gauge", + self.total_onchain_balance_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_spendable_onchain_balance_sats", + "Spendable onchain balance in sats", + "gauge", + self.spendable_onchain_balance_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_anchor_channels_reserve_sats", + "Total anchor channels reserve in sats", + "gauge", + self.total_anchor_channels_reserve_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_lightning_balance_sats", + "Total lightning balance in sats", + "gauge", + self.total_lightning_balance_sats.load(Ordering::Relaxed), + ); + + buffer + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_initial_metrics_values() { + let metrics = Metrics::new(); + let result = metrics.gather_metrics(); + + // Check that all metrics are present and empty + assert!(result.contains("ldk_server_total_peers_count 0")); + assert!(result.contains("ldk_server_total_payments_count 0")); + assert!(result.contains("ldk_server_total_successful_payments_count 0")); + assert!(result.contains("ldk_server_total_pending_payments_count 0")); + assert!(result.contains("ldk_server_total_failed_payments_count 0")); + assert!(result.contains("ldk_server_total_channels_count 0")); + assert!(result.contains("ldk_server_total_public_channels_count 0")); + assert!(result.contains("ldk_server_total_private_channels_count 0")); + assert!(result.contains("ldk_server_total_onchain_balance_sats 0")); + assert!(result.contains("ldk_server_spendable_onchain_balance_sats 0")); + assert!(result.contains("ldk_server_total_anchor_channels_reserve_sats 0")); + assert!(result.contains("ldk_server_total_lightning_balance_sats 0")); + } + + #[test] + fn test_update_payments_count() { + let metrics = Metrics::new(); + + metrics.total_successful_payments_count.store(10, Ordering::Relaxed); + metrics.total_failed_payments_count.store(5, Ordering::Relaxed); + + metrics.update_payments_count(true); + metrics.update_payments_count(false); + + assert_eq!(metrics.total_successful_payments_count.load(Ordering::Relaxed), 11); + assert_eq!(metrics.total_failed_payments_count.load(Ordering::Relaxed), 6); + } + + #[test] + fn test_metrics_update_and_gather() { + let metrics = Metrics::new(); + + // Manually update metrics to simulate node activity + metrics.total_peers_count.store(5, Ordering::Relaxed); + metrics.total_payments_count.store(10, Ordering::Relaxed); + metrics.total_pending_payments_count.store(1, Ordering::Relaxed); + metrics.total_successful_payments_count.store(8, Ordering::Relaxed); + metrics.total_failed_payments_count.store(2, Ordering::Relaxed); + metrics.total_channels_count.store(3, Ordering::Relaxed); + metrics.total_public_channels_count.store(1, Ordering::Relaxed); + metrics.total_private_channels_count.store(2, Ordering::Relaxed); + metrics.total_onchain_balance_sats.store(100_000, Ordering::Relaxed); + metrics.spendable_onchain_balance_sats.store(50_000, Ordering::Relaxed); + metrics.total_anchor_channels_reserve_sats.store(1_000, Ordering::Relaxed); + metrics.total_lightning_balance_sats.store(250_000, Ordering::Relaxed); + + let result = metrics.gather_metrics(); + + // Check that output contains updated values and correct Prometheus format + assert!(result.contains("# HELP ldk_server_total_peers_count Total number of peers")); + assert!(result.contains("# TYPE ldk_server_total_peers_count gauge")); + assert!(result.contains("ldk_server_total_peers_count 5")); + + assert!(result.contains("ldk_server_total_payments_count 10")); + assert!(result.contains("ldk_server_total_pending_payments_count 1")); + assert!(result.contains("ldk_server_total_successful_payments_count 8")); + assert!(result.contains("ldk_server_total_failed_payments_count 2")); + assert!(result.contains("ldk_server_total_channels_count 3")); + assert!(result.contains("ldk_server_total_public_channels_count 1")); + assert!(result.contains("ldk_server_total_private_channels_count 2")); + assert!(result.contains("ldk_server_total_onchain_balance_sats 100000")); + assert!(result.contains("ldk_server_spendable_onchain_balance_sats 50000")); + assert!(result.contains("ldk_server_total_anchor_channels_reserve_sats 1000")); + assert!(result.contains("ldk_server_total_lightning_balance_sats 250000")); + } +} diff --git a/ldk-server/src/util/mod.rs b/ldk-server/src/util/mod.rs index 5d74de4..a57dbd0 100644 --- a/ldk-server/src/util/mod.rs +++ b/ldk-server/src/util/mod.rs @@ -9,6 +9,7 @@ pub(crate) mod config; pub(crate) mod logger; +pub(crate) mod metrics; pub(crate) mod proto_adapter; pub(crate) mod systemd; pub(crate) mod tls;