From 7dc72d5cba8728c9b01c4720ebd31c89ac308bbc Mon Sep 17 00:00:00 2001 From: peg Date: Thu, 25 Jun 2026 10:27:00 +0200 Subject: [PATCH 1/3] Pccs - sync cache get should do network fetch --- Cargo.lock | 2 + crates/pccs/Cargo.toml | 2 + crates/pccs/README.md | 5 +- crates/pccs/src/lib.rs | 168 ++++++++++++++++++++++++----------------- 4 files changed, 104 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e368a0c..b460822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3110,6 +3110,7 @@ version = "0.0.1" dependencies = [ "anyhow", "dcap-qvl 0.5.2", + "futures-executor", "hex", "mock-tdx", "rcgen 0.14.7", @@ -3122,6 +3123,7 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber", + "ureq", "x509-parser 0.18.1", ] diff --git a/crates/pccs/Cargo.toml b/crates/pccs/Cargo.toml index 4a2e7d0..4cc61cc 100644 --- a/crates/pccs/Cargo.toml +++ b/crates/pccs/Cargo.toml @@ -18,6 +18,8 @@ hex = "0.4.3" anyhow = "1.0.100" reqwest = { workspace = true } x509-parser = "0.18.0" +ureq = "2.12.1" +futures-executor = "0.3.31" [dev-dependencies] rcgen = "0.14.5" diff --git a/crates/pccs/README.md b/crates/pccs/README.md index e1a3cfd..f71dcd9 100644 --- a/crates/pccs/README.md +++ b/crates/pccs/README.md @@ -24,5 +24,6 @@ For Intel's terminology and architecture, see the Intel documentation for the This crate expects to be used from within a Tokio runtime. The above applies even when calling synchronous-looking APIs such as -`get_collateral_sync()` because cache miss repair, proactive refresh, and -startup pre-warm are all driven by Tokio background tasks. +`get_collateral_sync()` because proactive refresh and startup pre-warm are +driven by Tokio background tasks. Cache misses in `get_collateral_sync()` +are fetched synchronously with a `ureq` transport. diff --git a/crates/pccs/src/lib.rs b/crates/pccs/src/lib.rs index ea63b05..0e3383b 100644 --- a/crates/pccs/src/lib.rs +++ b/crates/pccs/src/lib.rs @@ -1,5 +1,6 @@ use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashMap}, + io::Read, sync::{ Arc, RwLock, @@ -9,7 +10,13 @@ use std::{ time::{SystemTime, UNIX_EPOCH}, }; -use dcap_qvl::{QuoteCollateralV3, collateral::CollateralClient, tcb_info::TcbInfo}; +use dcap_qvl::{ + QuoteCollateralV3, + collateral::CollateralClient, + configs::DefaultConfig, + http::{HttpClient, HttpResponse}, + tcb_info::TcbInfo, +}; use thiserror::Error; use time::{OffsetDateTime, format_description::well_known::Rfc3339}; use tokio::{ @@ -37,8 +44,6 @@ pub struct Pccs { url: String, /// The internal cache cache: Arc>>, - /// Dedupes one-shot background refreshes for cache misses - pending_refreshes: Arc>>, /// The state of the initial pre-warm fetch prewarm_stats: Arc, /// Completion signal for startup pre-warm, shared across all clones @@ -84,7 +89,6 @@ impl Pccs { Self { url, cache: RwLock::new(HashMap::new()).into(), - pending_refreshes: RwLock::new(HashSet::new()).into(), prewarm_stats: Arc::new(PrewarmStats::default()), prewarm_outcome_tx: None, } @@ -151,15 +155,14 @@ impl Pccs { upsert_cache_entry(&mut cache, cache_key.clone(), collateral.clone(), next_update); } - self.ensure_refresh_task(&cache_key).await; + self.ensure_refresh_task(&cache_key); Ok((collateral, true)) } /// A synchronous method to get collateral from the cache. /// /// If the requested collateral is not present in the cache, this will - /// return an error rather than waiting to fetch it. But it does - /// begin fetching it in a background task. + /// fetch it synchronously and cache the result. /// /// If the collateral is out of date, this will log a warning and return /// it anyway on a best-effort basis. @@ -183,19 +186,30 @@ impl Pccs { ); drop(cache); - // Start a background task to renew - let pccs = self.clone(); - tokio::spawn(async move { - pccs.ensure_refresh_task(&cache_key).await; - }); + self.ensure_refresh_task(&cache_key); return Ok(collateral); } Ok(entry.collateral.clone()) } else { drop(cache); - self.spawn_background_refresh_for_cache_miss(cache_key.clone()); - Err(PccsError::NoCollateralForFmspc(format!("{cache_key:?}"))) + let collateral = fetch_collateral_sync(&self.url, fmspc.clone(), ca)?; + let next_update = extract_next_update(&collateral, now)?; + + { + let mut cache = self.cache.write().map_err(|_| PccsError::CachePoisoned)?; + if let Some(existing) = cache.get(&cache_key) && + now < existing.next_update + { + return Ok(existing.collateral.clone()); + } + + upsert_cache_entry(&mut cache, cache_key.clone(), collateral.clone(), next_update); + } + + self.ensure_refresh_task(&cache_key); + + Ok(collateral) } } @@ -215,13 +229,13 @@ impl Pccs { let mut cache = self.cache.write().map_err(|_| PccsError::CachePoisoned)?; upsert_cache_entry(&mut cache, cache_key.clone(), collateral.clone(), next_update); } - self.ensure_refresh_task(&cache_key).await; + self.ensure_refresh_task(&cache_key); Ok(collateral) } /// Starts a background refresh loop for a cache key when no task is /// active - async fn ensure_refresh_task(&self, cache_key: &PccsInput) { + fn ensure_refresh_task(&self, cache_key: &PccsInput) { let Ok(mut cache) = self.cache.write() else { tracing::warn!("PCCS cache lock poisoned, cannot ensure refresh task"); return; @@ -241,46 +255,6 @@ impl Pccs { })); } - /// Starts a one-shot background fetch to populate a missing cache entry - fn spawn_background_refresh_for_cache_miss(&self, cache_key: PccsInput) { - { - let Ok(mut pending_refreshes) = self.pending_refreshes.write() else { - tracing::warn!("PCCS pending-refresh lock poisoned, cannot start sync refresh"); - return; - }; - if !pending_refreshes.insert(cache_key.clone()) { - return; - } - } - - let pccs = self.clone(); - tokio::spawn(async move { - let result = pccs - .refresh_collateral( - cache_key.fmspc.clone(), - ca_as_static(&cache_key.ca).expect("unsupported CA in pending refresh"), - ) - .await; - - if let Err(err) = result { - tracing::warn!( - fmspc = cache_key.fmspc, - ca = cache_key.ca, - error = %err, - "Sync-triggered PCCS cache repair failed" - ); - } - - // Always clear the dedupe marker so a later sync miss can - // retry if this repair attempt failed. - if let Ok(mut pending_refreshes) = pccs.pending_refreshes.write() { - pending_refreshes.remove(&cache_key); - } else { - tracing::warn!("PCCS pending-refresh lock poisoned during cleanup"); - } - }); - } - /// Pre-provisions TDX collateral for discovered FMSPC values to reduce /// hot-path fetches async fn startup_prewarm_all_tdx(&self) -> PrewarmOutcome { @@ -430,6 +404,54 @@ async fn fetch_collateral( .map_err(Into::into) } +/// Fetches collateral using dcap-qvl's collateral client with a blocking +/// ureq transport. +fn fetch_collateral_sync( + url: &str, + fmspc: String, + ca: &'static str, +) -> Result { + futures_executor::block_on( + CollateralClient::::new(UreqHttp::new(), url) + .fetch_for_fmspc_without_pck_chain(&fmspc, ca, false), + ) + .map_err(Into::into) +} + +#[derive(Clone)] +struct UreqHttp { + agent: ureq::Agent, +} + +impl UreqHttp { + fn new() -> Self { + Self { agent: ureq::AgentBuilder::new().timeout(Duration::from_secs(15)).build() } + } + + fn response_to_http_response(response: ureq::Response) -> anyhow::Result { + let status = response.status(); + let headers = response + .headers_names() + .into_iter() + .filter_map(|name| response.header(&name).map(|value| (name, value.to_string()))) + .collect::>(); + let mut body = Vec::new(); + response.into_reader().read_to_end(&mut body)?; + Ok(HttpResponse { status, headers, body }) + } +} + +impl HttpClient for UreqHttp { + async fn get(&self, url: &str) -> anyhow::Result { + let response = match self.agent.get(url).call() { + Ok(response) => response, + Err(ureq::Error::Status(_, response)) => response, + Err(err) => return Err(err.into()), + }; + Self::response_to_http_response(response) + } +} + /// Extracts the earliest next update timestamp from collateral metadata /// /// This returns the soonest timestamp from either: @@ -717,8 +739,6 @@ pub enum PccsError { TimeStampExceedsI64, #[error("PCCS cache lock poisoned")] CachePoisoned, - #[error("No collateral in cache for FMSPC {0}")] - NoCollateralForFmspc(String), } #[cfg(test)] @@ -898,7 +918,7 @@ mod tests { } #[tokio::test] - async fn test_get_collateral_sync_repairs_cache_miss_in_background() { + async fn test_get_collateral_sync_fetches_and_caches_on_miss() { let fmspc = mock_tdx_fmspc(); let mock = spawn_mock_pcs_server(MockPcsConfig { include_fmspcs_listing: false, @@ -913,18 +933,24 @@ mod tests { let pccs = Pccs::new_without_prewarm(Some(mock.base_url.clone())); let now = unix_now().unwrap() as u64; - let err = pccs.get_collateral_sync(fmspc.clone(), "processor", now); - assert!(matches!(err, Err(PccsError::NoCollateralForFmspc(_)))); - - for _ in 0..50 { - if pccs.get_collateral_sync(fmspc.clone(), "processor", now).is_ok() { - break; - } - tokio::time::sleep(Duration::from_millis(20)).await; - } + let pccs_for_sync = pccs.clone(); + let fmspc_for_sync = fmspc.clone(); + let collateral = tokio::task::spawn_blocking(move || { + pccs_for_sync.get_collateral_sync(fmspc_for_sync, "processor", now) + }) + .await + .unwrap(); + assert!(collateral.is_ok(), "expected sync miss fetch to populate cache: {collateral:?}"); + assert_eq!(mock.tcb_call_count(), 1); + assert_eq!(mock.qe_call_count(), 1); - let collateral = pccs.get_collateral_sync(fmspc, "processor", now); - assert!(collateral.is_ok(), "expected sync miss repair to populate cache"); + let pccs_for_sync = pccs.clone(); + let cached = tokio::task::spawn_blocking(move || { + pccs_for_sync.get_collateral_sync(fmspc, "processor", now) + }) + .await + .unwrap(); + assert!(cached.is_ok(), "expected cached collateral on second sync hit"); assert_eq!(mock.tcb_call_count(), 1); assert_eq!(mock.qe_call_count(), 1); } From 46beafd60d7bdfb37cad74cd61dce44f19fe2de3 Mon Sep 17 00:00:00 2001 From: peg Date: Thu, 25 Jun 2026 10:44:04 +0200 Subject: [PATCH 2/3] Reduce timeout and add a comment explaining why a short timeout is needed --- crates/pccs/src/lib.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/pccs/src/lib.rs b/crates/pccs/src/lib.rs index 0e3383b..38ef6ec 100644 --- a/crates/pccs/src/lib.rs +++ b/crates/pccs/src/lib.rs @@ -33,6 +33,10 @@ pub const PCS_URL: &str = "https://api.trustedservices.intel.com"; const REFRESH_MARGIN_SECS: i64 = 300; /// How long to wait before retrying when failing to fetch collateral const REFRESH_RETRY_SECS: u64 = 60; +/// Keep this short: sync collateral fetches can run inside rustls verifier +/// callbacks during TLS handshakes, where blocking stalls handshake +/// progress. +const SYNC_COLLATERAL_FETCH_TIMEOUT_SECS: u64 = 2; /// How many collateral fetches to perform concurrently during initial /// pre-warm const STARTUP_PREWARM_CONCURRENCY: usize = 8; @@ -425,7 +429,11 @@ struct UreqHttp { impl UreqHttp { fn new() -> Self { - Self { agent: ureq::AgentBuilder::new().timeout(Duration::from_secs(15)).build() } + Self { + agent: ureq::AgentBuilder::new() + .timeout(Duration::from_secs(SYNC_COLLATERAL_FETCH_TIMEOUT_SECS)) + .build(), + } } fn response_to_http_response(response: ureq::Response) -> anyhow::Result { From 53284a3b63a8ca447c1c96e2eb01e66464e42595 Mon Sep 17 00:00:00 2001 From: peg Date: Thu, 25 Jun 2026 10:47:36 +0200 Subject: [PATCH 3/3] Rm test which is no longer testing desired behavior --- crates/attested-tls/src/lib.rs | 72 ---------------------------------- 1 file changed, 72 deletions(-) diff --git a/crates/attested-tls/src/lib.rs b/crates/attested-tls/src/lib.rs index 1495611..06f5809 100644 --- a/crates/attested-tls/src/lib.rs +++ b/crates/attested-tls/src/lib.rs @@ -1714,78 +1714,6 @@ mod tests { .unwrap(); } - #[tokio::test(flavor = "multi_thread")] - async fn sync_verifier_cache_miss_fails_then_succeeds_after_background_fetch() { - let provider: Arc = aws_lc_rs::default_provider().into(); - let key_pair = KeyPair::generate_for(&PKCS_ECDSA_P256_SHA256).unwrap(); - let resolver = AttestedCertificateResolver::build( - "foo", - AttestationGenerator::new(AttestationType::DcapTdx, None).unwrap(), - ) - .with_crypto_provider(provider.clone()) - .with_key_pair(&key_pair) - .with_certificate_validity(Duration::from_secs(4)) - .finish() - .unwrap(); - let cert = resolver.state.certificate.read().unwrap().first().unwrap().clone(); - - // Mock PCS is set up to not list the FMSPCs, meaning the pre-warm - // wont fetch anything - let mock_pcs = spawn_mock_pcs_server(MockPcsConfig { - include_fmspcs_listing: false, - ..MockPcsConfig::default() - }) - .await - .unwrap(); - - let verifier = AttestedCertificateVerifier::build(AttestationVerifier::mock_with_pccs( - mock_pcs.base_url.clone(), - )) - .with_crypto_provider(provider) - .finish() - .unwrap(); - - let first_result = verify_server_cert_direct( - &verifier, - &cert, - &ServerName::try_from("foo").unwrap(), - UnixTime::now(), - ); - - // Initially verification fails because the PCCS doesn't have the - // collateral associated with the quote - assert_eq!( - first_result.unwrap_err(), - Error::InvalidCertificate(CertificateError::ApplicationVerificationFailure) - ); - - // Now we wait a moment for the PCCS to fetch it in the background - for _ in 0..50 { - if verify_server_cert_direct( - &verifier, - &cert, - &ServerName::try_from("foo").unwrap(), - UnixTime::now(), - ) - .is_ok() - { - break; - } - tokio::time::sleep(Duration::from_millis(20)).await; - } - - // Now verification succeeds - verify_server_cert_direct( - &verifier, - &cert, - &ServerName::try_from("foo").unwrap(), - UnixTime::now(), - ) - .unwrap(); - assert_eq!(mock_pcs.tcb_call_count(), 1); - assert_eq!(mock_pcs.qe_call_count(), 1); - } - /// Helper to create a private certificate authority fn test_ca() -> CaCert { let key = KeyPair::generate_for(&PKCS_ECDSA_P256_SHA256).unwrap();