From dea21405e42de5ce83ba89162e862f96b617bd56 Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Tue, 9 Jun 2026 23:30:04 +0200 Subject: [PATCH 1/7] fix(wasapi): resolve racy timestamp callback padding --- CHANGELOG.md | 4 ++++ src/host/wasapi/device.rs | 27 ++++++++++++++++++++------ src/host/wasapi/stream.rs | 41 ++++++++++++++++++++++----------------- 3 files changed, 48 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e842468fe..ff3da38e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **WASAPI**: The `windows` and `windows-core` dependencies are now both pinned to 0.62. +### Fixed + +- **WASAPI**: Fix output `playback` timestamps occasionally stepping backwards. + ## [0.18.1] - 2026-06-07 ### Fixed diff --git a/src/host/wasapi/device.rs b/src/host/wasapi/device.rs index 3333cd45e..d63c9d186 100644 --- a/src/host/wasapi/device.rs +++ b/src/host/wasapi/device.rs @@ -19,6 +19,7 @@ impl From for DeviceDirection { } } use std::{ + cell::Cell, ffi::OsString, fmt, hash::Hash, @@ -927,7 +928,7 @@ impl Device { // `run()` method and added to the `RunContext`. let client_flow = AudioClientFlow::Capture { capture_client }; - let audio_clock = get_audio_clock(&audio_client)?; + let (audio_clock, clock_frequency) = get_audio_clock(&audio_client)?; let stream_latency = { let hns = audio_client @@ -939,6 +940,8 @@ impl Device { Ok(StreamInner { audio_client, audio_clock, + clock_frequency, + frames_written: Cell::new(0), client_flow, event, playing: false, @@ -1040,7 +1043,7 @@ impl Device { // `run()` method and added to the `RunContext`. let client_flow = AudioClientFlow::Render { render_client }; - let audio_clock = get_audio_clock(&audio_client)?; + let (audio_clock, clock_frequency) = get_audio_clock(&audio_client)?; let stream_latency = { let hns = audio_client @@ -1052,6 +1055,8 @@ impl Device { Ok(StreamInner { audio_client, audio_clock, + clock_frequency, + frames_written: Cell::new(0), client_flow, event, playing: false, @@ -1340,11 +1345,21 @@ pub fn default_output_device() -> Option { current_default_endpoint(Audio::eRender).map(|_| Device::default_output()) } -/// Get the audio clock used to produce `StreamInstant`s. -unsafe fn get_audio_clock(audio_client: &Audio::IAudioClient) -> Result { - audio_client +/// Get the audio clock used to produce `StreamInstant`s, together with its (constant) frequency. +unsafe fn get_audio_clock( + audio_client: &Audio::IAudioClient, +) -> Result<(Audio::IAudioClock, u64), Error> { + let audio_clock = audio_client .GetService::() - .context("Failed to get audio clock") + .context("Failed to get audio clock")?; + let clock_frequency = audio_clock + .GetFrequency() + .context("Failed to get audio clock frequency")?; + debug_assert_ne!( + clock_frequency, 0, + "IAudioClock::GetFrequency returned zero" + ); + Ok((audio_clock, clock_frequency)) } // Sample rate range supported by the Media Foundation Resampler MFT used by AUTOCONVERTPCM. diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs index 817cf3786..ec7fafb93 100644 --- a/src/host/wasapi/stream.rs +++ b/src/host/wasapi/stream.rs @@ -1,4 +1,5 @@ use std::{ + cell::Cell, mem, ops::ControlFlow, ptr, @@ -18,10 +19,7 @@ use windows::Win32::{ }; use crate::{ - host::{ - emit_error, equilibrium::fill_equilibrium, frames_to_duration, latch::Latch, - ErrorCallbackArc, - }, + host::{emit_error, equilibrium::fill_equilibrium, latch::Latch, ErrorCallbackArc}, traits::StreamTrait, Data, Error, ErrorKind, FrameCount, InputCallbackInfo, InputStreamTimestamp, OutputCallbackInfo, OutputStreamTimestamp, ResultExt, SampleFormat, SampleRate, StreamConfig, @@ -290,6 +288,10 @@ pub enum AudioClientFlow { pub struct StreamInner { pub audio_client: Audio::IAudioClient, pub audio_clock: Audio::IAudioClock, + // Cached (constant) frequency of `audio_clock`. + pub clock_frequency: u64, + // Running total of frames submitted to the render buffer. + pub frames_written: Cell, pub client_flow: AudioClientFlow, // Event that is signalled by WASAPI whenever audio data must be written. pub event: Foundation::HANDLE, @@ -809,23 +811,26 @@ fn process_output( let len = byte_count / stream.sample_format.sample_size(); let mut data = Data::from_parts(data, len, stream.sample_format); let sample_rate = stream.config.sample_rate; - let timestamp = output_timestamp(stream, frames_available, sample_rate)?; + let timestamp = output_timestamp(stream, sample_rate)?; let info = OutputCallbackInfo { timestamp }; data_callback(&mut data, &info); render_client .ReleaseBuffer(frames_available, 0) .map_err(Error::from)?; + + stream + .frames_written + .set(stream.frames_written.get() + frames_available as u64); } Ok(()) } -/// Use the stream's `IAudioClock` to produce the current stream instant. -/// -/// Uses the QPC position produced via the `GetPosition` method. +/// Atomically reads the stream's `IAudioClock`, returning the callback [`StreamInstant`] +/// together with the device position (how far playback has progressed. #[inline] -fn stream_instant(stream: &StreamInner) -> Result { +fn clock_position(stream: &StreamInner) -> Result<(StreamInstant, u64), Error> { let mut position: u64 = 0; let mut qpc_position: u64 = 0; unsafe { @@ -840,7 +845,7 @@ fn stream_instant(stream: &StreamInner) -> Result { (nanos / 1_000_000_000) as u64, (nanos % 1_000_000_000) as u32, ); - Ok(instant) + Ok((instant, position)) } /// Produce the input stream timestamp. @@ -859,26 +864,26 @@ fn input_timestamp( (nanos / 1_000_000_000) as u64, (nanos % 1_000_000_000) as u32, ); - let callback = stream_instant(stream)?; + let (callback, _position) = clock_position(stream)?; Ok(InputStreamTimestamp { capture, callback }) } /// Produce the output stream timestamp. /// -/// `frames_available` is the number of frames available for writing as reported by subtracting the -/// result of `GetCurrentPadding` from the maximum buffer size. -/// /// `sample_rate` is the rate at which audio frames are processed by the device. #[inline] fn output_timestamp( stream: &StreamInner, - frames_available: FrameCount, sample_rate: SampleRate, ) -> Result { - let callback = stream_instant(stream)?; + let (callback, position) = clock_position(stream)?; // `padding` is the number of frames already queued in the endpoint buffer ahead of the // frames we are about to write. Those frames must drain before ours are heard. - let padding = stream.max_frames_in_buffer - frames_available; - let playback = callback + (frames_to_duration(padding, sample_rate) + stream.stream_latency); + let consumed_nanos = position as u128 * 1_000_000_000 / stream.clock_frequency as u128; + let written_nanos = stream.frames_written.get() as u128 * 1_000_000_000 / sample_rate as u128; + // The difference is the (small) buffer fill, so it fits in `u64` nanoseconds. + let buffered = Duration::from_nanos(written_nanos.saturating_sub(consumed_nanos) as u64); + + let playback = callback + (buffered + stream.stream_latency); Ok(OutputStreamTimestamp { callback, playback }) } From 0456c981f16e18d1ba14537af4246c06a4fbabc0 Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Tue, 9 Jun 2026 23:31:56 +0200 Subject: [PATCH 2/7] style(wasapi): reorder imports and stray conversion --- src/host/wasapi/device.rs | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/host/wasapi/device.rs b/src/host/wasapi/device.rs index d63c9d186..18ae7060a 100644 --- a/src/host/wasapi/device.rs +++ b/src/host/wasapi/device.rs @@ -1,23 +1,3 @@ -use crate::{ - error::ResultExt, - host::{com::ComString, ErrorCallbackArc}, - BufferSize, Data, DeviceDescription, DeviceDescriptionBuilder, DeviceDirection, DeviceId, - DeviceType, Error, ErrorKind, FrameCount, InputCallbackInfo, InterfaceType, OutputCallbackInfo, - SampleFormat, SampleRate, StreamConfig, SupportedBufferSize, SupportedStreamConfig, - SupportedStreamConfigRange, COMMON_SAMPLE_RATES, -}; - -impl From for DeviceDirection { - fn from(data_flow: Audio::EDataFlow) -> Self { - if data_flow == Audio::eCapture { - DeviceDirection::Input - } else if data_flow == Audio::eRender { - DeviceDirection::Output - } else { - DeviceDirection::Unknown - } - } -} use std::{ cell::Cell, ffi::OsString, @@ -30,6 +10,15 @@ use std::{ time::Duration, }; +use crate::{ + error::ResultExt, + host::{com::ComString, ErrorCallbackArc}, + BufferSize, Data, DeviceDescription, DeviceDescriptionBuilder, DeviceDirection, DeviceId, + DeviceType, Error, ErrorKind, FrameCount, InputCallbackInfo, InterfaceType, OutputCallbackInfo, + SampleFormat, SampleRate, StreamConfig, SupportedBufferSize, SupportedStreamConfig, + SupportedStreamConfigRange, COMMON_SAMPLE_RATES, +}; + use windows::{ core::{Interface, GUID}, Win32::{ @@ -1345,6 +1334,18 @@ pub fn default_output_device() -> Option { current_default_endpoint(Audio::eRender).map(|_| Device::default_output()) } +impl From for DeviceDirection { + fn from(data_flow: Audio::EDataFlow) -> Self { + if data_flow == Audio::eCapture { + DeviceDirection::Input + } else if data_flow == Audio::eRender { + DeviceDirection::Output + } else { + DeviceDirection::Unknown + } + } +} + /// Get the audio clock used to produce `StreamInstant`s, together with its (constant) frequency. unsafe fn get_audio_clock( audio_client: &Audio::IAudioClient, From e88186f3387be3701eeca90ef8586758ce874829 Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Tue, 9 Jun 2026 23:45:34 +0200 Subject: [PATCH 3/7] fix: address review points --- src/host/wasapi/stream.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs index ec7fafb93..19e11ba5d 100644 --- a/src/host/wasapi/stream.rs +++ b/src/host/wasapi/stream.rs @@ -828,7 +828,7 @@ fn process_output( } /// Atomically reads the stream's `IAudioClock`, returning the callback [`StreamInstant`] -/// together with the device position (how far playback has progressed. +/// together with the device position. #[inline] fn clock_position(stream: &StreamInner) -> Result<(StreamInstant, u64), Error> { let mut position: u64 = 0; @@ -881,8 +881,9 @@ fn output_timestamp( // frames we are about to write. Those frames must drain before ours are heard. let consumed_nanos = position as u128 * 1_000_000_000 / stream.clock_frequency as u128; let written_nanos = stream.frames_written.get() as u128 * 1_000_000_000 / sample_rate as u128; - // The difference is the (small) buffer fill, so it fits in `u64` nanoseconds. - let buffered = Duration::from_nanos(written_nanos.saturating_sub(consumed_nanos) as u64); + let buffered_nanos = + u64::try_from(written_nanos.saturating_sub(consumed_nanos)).unwrap_or(u64::MAX); + let buffered = Duration::from_nanos(buffered_nanos); let playback = callback + (buffered + stream.stream_latency); Ok(OutputStreamTimestamp { callback, playback }) From 582d5e7474bc1414f10154180a07c03f3faeb68e Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Thu, 11 Jun 2026 21:14:32 +0200 Subject: [PATCH 4/7] refactor(wasapi): simplify frequency and frames_written handling --- src/host/wasapi/device.rs | 30 ++++++---------------------- src/host/wasapi/stream.rs | 42 ++++++++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/host/wasapi/device.rs b/src/host/wasapi/device.rs index 18ae7060a..f00c7d6cb 100644 --- a/src/host/wasapi/device.rs +++ b/src/host/wasapi/device.rs @@ -1,5 +1,4 @@ use std::{ - cell::Cell, ffi::OsString, fmt, hash::Hash, @@ -917,7 +916,9 @@ impl Device { // `run()` method and added to the `RunContext`. let client_flow = AudioClientFlow::Capture { capture_client }; - let (audio_clock, clock_frequency) = get_audio_clock(&audio_client)?; + let audio_clock = audio_client + .GetService::() + .context("Failed to get audio clock")?; let stream_latency = { let hns = audio_client @@ -929,8 +930,6 @@ impl Device { Ok(StreamInner { audio_client, audio_clock, - clock_frequency, - frames_written: Cell::new(0), client_flow, event, playing: false, @@ -1032,7 +1031,9 @@ impl Device { // `run()` method and added to the `RunContext`. let client_flow = AudioClientFlow::Render { render_client }; - let (audio_clock, clock_frequency) = get_audio_clock(&audio_client)?; + let audio_clock = audio_client + .GetService::() + .context("Failed to get audio clock")?; let stream_latency = { let hns = audio_client @@ -1044,8 +1045,6 @@ impl Device { Ok(StreamInner { audio_client, audio_clock, - clock_frequency, - frames_written: Cell::new(0), client_flow, event, playing: false, @@ -1346,23 +1345,6 @@ impl From for DeviceDirection { } } -/// Get the audio clock used to produce `StreamInstant`s, together with its (constant) frequency. -unsafe fn get_audio_clock( - audio_client: &Audio::IAudioClient, -) -> Result<(Audio::IAudioClock, u64), Error> { - let audio_clock = audio_client - .GetService::() - .context("Failed to get audio clock")?; - let clock_frequency = audio_clock - .GetFrequency() - .context("Failed to get audio clock frequency")?; - debug_assert_ne!( - clock_frequency, 0, - "IAudioClock::GetFrequency returned zero" - ); - Ok((audio_clock, clock_frequency)) -} - // Sample rate range supported by the Media Foundation Resampler MFT used by AUTOCONVERTPCM. const OUTPUT_MIN_SAMPLE_RATE: SampleRate = 8_000; const OUTPUT_MAX_SAMPLE_RATE: SampleRate = 384_000; diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs index 19e11ba5d..0614c93f2 100644 --- a/src/host/wasapi/stream.rs +++ b/src/host/wasapi/stream.rs @@ -1,5 +1,4 @@ use std::{ - cell::Cell, mem, ops::ControlFlow, ptr, @@ -288,10 +287,6 @@ pub enum AudioClientFlow { pub struct StreamInner { pub audio_client: Audio::IAudioClient, pub audio_clock: Audio::IAudioClock, - // Cached (constant) frequency of `audio_clock`. - pub clock_frequency: u64, - // Running total of frames submitted to the render buffer. - pub frames_written: Cell, pub client_flow: AudioClientFlow, // Event that is signalled by WASAPI whenever audio data must be written. pub event: Foundation::HANDLE, @@ -653,6 +648,21 @@ fn run_output( emit_error(error_callback, err); } + // The clock frequency is constant for the stream's lifetime. + let clock_frequency = match unsafe { run_ctxt.stream.audio_clock.GetFrequency() } + .context("Failed to get audio clock frequency") + { + Ok(frequency) => { + debug_assert_ne!(frequency, 0, "IAudioClock::GetFrequency returned zero"); + frequency + } + Err(err) => { + emit_error(error_callback, err); + return; + } + }; + let mut frames_written: u64 = 0; + loop { match process_commands_and_await_signal(&mut run_ctxt, error_callback) { ControlFlow::Break(()) => break, @@ -663,7 +673,13 @@ fn run_output( AudioClientFlow::Render { ref render_client } => render_client.clone(), _ => unreachable!(), }; - if let Err(err) = process_output(&run_ctxt.stream, render_client, data_callback) { + if let Err(err) = process_output( + &run_ctxt.stream, + render_client, + data_callback, + clock_frequency, + &mut frames_written, + ) { emit_error(error_callback, err); break; } @@ -789,6 +805,8 @@ fn process_output( stream: &StreamInner, render_client: Audio::IAudioRenderClient, data_callback: &mut dyn FnMut(&mut Data, &OutputCallbackInfo), + clock_frequency: u64, + frames_written: &mut u64, ) -> Result<(), Error> { // The number of frames available for writing. let frames_available = match get_available_frames(stream)? { @@ -811,7 +829,7 @@ fn process_output( let len = byte_count / stream.sample_format.sample_size(); let mut data = Data::from_parts(data, len, stream.sample_format); let sample_rate = stream.config.sample_rate; - let timestamp = output_timestamp(stream, sample_rate)?; + let timestamp = output_timestamp(stream, sample_rate, clock_frequency, *frames_written)?; let info = OutputCallbackInfo { timestamp }; data_callback(&mut data, &info); @@ -819,9 +837,7 @@ fn process_output( .ReleaseBuffer(frames_available, 0) .map_err(Error::from)?; - stream - .frames_written - .set(stream.frames_written.get() + frames_available as u64); + *frames_written += frames_available as u64; } Ok(()) @@ -875,12 +891,14 @@ fn input_timestamp( fn output_timestamp( stream: &StreamInner, sample_rate: SampleRate, + clock_frequency: u64, + frames_written: u64, ) -> Result { let (callback, position) = clock_position(stream)?; // `padding` is the number of frames already queued in the endpoint buffer ahead of the // frames we are about to write. Those frames must drain before ours are heard. - let consumed_nanos = position as u128 * 1_000_000_000 / stream.clock_frequency as u128; - let written_nanos = stream.frames_written.get() as u128 * 1_000_000_000 / sample_rate as u128; + let consumed_nanos = position as u128 * 1_000_000_000 / clock_frequency as u128; + let written_nanos = frames_written as u128 * 1_000_000_000 / sample_rate as u128; let buffered_nanos = u64::try_from(written_nanos.saturating_sub(consumed_nanos)).unwrap_or(u64::MAX); let buffered = Duration::from_nanos(buffered_nanos); From 3dd3db74c4b8ba7c222a24bdb347c4ba2a4e6939 Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Thu, 11 Jun 2026 21:28:37 +0200 Subject: [PATCH 5/7] fix: reject zero clock frequency --- src/host/wasapi/stream.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs index 0614c93f2..88625ce26 100644 --- a/src/host/wasapi/stream.rs +++ b/src/host/wasapi/stream.rs @@ -652,10 +652,17 @@ fn run_output( let clock_frequency = match unsafe { run_ctxt.stream.audio_clock.GetFrequency() } .context("Failed to get audio clock frequency") { - Ok(frequency) => { - debug_assert_ne!(frequency, 0, "IAudioClock::GetFrequency returned zero"); - frequency + Ok(0) => { + emit_error( + error_callback, + Error::with_message( + ErrorKind::BackendError, + "IAudioClock::GetFrequency returned zero", + ), + ); + return; } + Ok(frequency) => frequency, Err(err) => { emit_error(error_callback, err); return; From 16d78f76a142e56e8a383962cf2691d1b12d56a2 Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Sun, 14 Jun 2026 12:15:35 +0200 Subject: [PATCH 6/7] refactor(wasapi): error conversion and documentation cleanups --- src/host/wasapi/stream.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs index 88625ce26..014eb347b 100644 --- a/src/host/wasapi/stream.rs +++ b/src/host/wasapi/stream.rs @@ -822,9 +822,7 @@ fn process_output( }; unsafe { - let buffer = render_client - .GetBuffer(frames_available) - .map_err(Error::from)?; + let buffer = render_client.GetBuffer(frames_available)?; debug_assert!(!buffer.is_null()); @@ -840,9 +838,7 @@ fn process_output( let info = OutputCallbackInfo { timestamp }; data_callback(&mut data, &info); - render_client - .ReleaseBuffer(frames_available, 0) - .map_err(Error::from)?; + render_client.ReleaseBuffer(frames_available, 0)?; *frames_written += frames_available as u64; } @@ -850,8 +846,8 @@ fn process_output( Ok(()) } -/// Atomically reads the stream's `IAudioClock`, returning the callback [`StreamInstant`] -/// together with the device position. +/// Reads the stream's `IAudioClock` in a single `GetPosition` call, returning the callback +/// [`StreamInstant`] together with the device position from that same snapshot. #[inline] fn clock_position(stream: &StreamInner) -> Result<(StreamInstant, u64), Error> { let mut position: u64 = 0; @@ -894,6 +890,12 @@ fn input_timestamp( /// Produce the output stream timestamp. /// /// `sample_rate` is the rate at which audio frames are processed by the device. +/// +/// `clock_frequency` is the device clock's constant tick rate, used to convert the reported clock +/// position into a played-out duration. +/// +/// `frames_written` is the running total of frames submitted to the render buffer so far, used to +/// derive how much audio is buffered ahead of the device position. #[inline] fn output_timestamp( stream: &StreamInner, From 57892cf1910ae76ba2fc2d2aaea05fdfe5e6400c Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Sun, 14 Jun 2026 12:32:41 +0200 Subject: [PATCH 7/7] fix: address review point --- src/host/wasapi/stream.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs index 014eb347b..4da238001 100644 --- a/src/host/wasapi/stream.rs +++ b/src/host/wasapi/stream.rs @@ -904,8 +904,8 @@ fn output_timestamp( frames_written: u64, ) -> Result { let (callback, position) = clock_position(stream)?; - // `padding` is the number of frames already queued in the endpoint buffer ahead of the - // frames we are about to write. Those frames must drain before ours are heard. + // `buffered` is the amount of audio we've already submitted that has not yet been consumed by + // the device at this instant; it determines when the next written frame will be heard. let consumed_nanos = position as u128 * 1_000_000_000 / clock_frequency as u128; let written_nanos = frames_written as u128 * 1_000_000_000 / sample_rate as u128; let buffered_nanos =