diff --git a/CHANGELOG.md b/CHANGELOG.md index e842468fe..ff3da38e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **WASAPI**: The `windows` and `windows-core` dependencies are now both pinned to 0.62. +### Fixed + +- **WASAPI**: Fix output `playback` timestamps occasionally stepping backwards. + ## [0.18.1] - 2026-06-07 ### Fixed diff --git a/src/host/wasapi/device.rs b/src/host/wasapi/device.rs index 3333cd45e..f00c7d6cb 100644 --- a/src/host/wasapi/device.rs +++ b/src/host/wasapi/device.rs @@ -1,23 +1,3 @@ -use crate::{ - error::ResultExt, - host::{com::ComString, ErrorCallbackArc}, - BufferSize, Data, DeviceDescription, DeviceDescriptionBuilder, DeviceDirection, DeviceId, - DeviceType, Error, ErrorKind, FrameCount, InputCallbackInfo, InterfaceType, OutputCallbackInfo, - SampleFormat, SampleRate, StreamConfig, SupportedBufferSize, SupportedStreamConfig, - SupportedStreamConfigRange, COMMON_SAMPLE_RATES, -}; - -impl From for DeviceDirection { - fn from(data_flow: Audio::EDataFlow) -> Self { - if data_flow == Audio::eCapture { - DeviceDirection::Input - } else if data_flow == Audio::eRender { - DeviceDirection::Output - } else { - DeviceDirection::Unknown - } - } -} use std::{ ffi::OsString, fmt, @@ -29,6 +9,15 @@ use std::{ time::Duration, }; +use crate::{ + error::ResultExt, + host::{com::ComString, ErrorCallbackArc}, + BufferSize, Data, DeviceDescription, DeviceDescriptionBuilder, DeviceDirection, DeviceId, + DeviceType, Error, ErrorKind, FrameCount, InputCallbackInfo, InterfaceType, OutputCallbackInfo, + SampleFormat, SampleRate, StreamConfig, SupportedBufferSize, SupportedStreamConfig, + SupportedStreamConfigRange, COMMON_SAMPLE_RATES, +}; + use windows::{ core::{Interface, GUID}, Win32::{ @@ -927,7 +916,9 @@ impl Device { // `run()` method and added to the `RunContext`. let client_flow = AudioClientFlow::Capture { capture_client }; - let audio_clock = get_audio_clock(&audio_client)?; + let audio_clock = audio_client + .GetService::() + .context("Failed to get audio clock")?; let stream_latency = { let hns = audio_client @@ -1040,7 +1031,9 @@ impl Device { // `run()` method and added to the `RunContext`. let client_flow = AudioClientFlow::Render { render_client }; - let audio_clock = get_audio_clock(&audio_client)?; + let audio_clock = audio_client + .GetService::() + .context("Failed to get audio clock")?; let stream_latency = { let hns = audio_client @@ -1340,11 +1333,16 @@ pub fn default_output_device() -> Option { current_default_endpoint(Audio::eRender).map(|_| Device::default_output()) } -/// Get the audio clock used to produce `StreamInstant`s. -unsafe fn get_audio_clock(audio_client: &Audio::IAudioClient) -> Result { - audio_client - .GetService::() - .context("Failed to get audio clock") +impl From for DeviceDirection { + fn from(data_flow: Audio::EDataFlow) -> Self { + if data_flow == Audio::eCapture { + DeviceDirection::Input + } else if data_flow == Audio::eRender { + DeviceDirection::Output + } else { + DeviceDirection::Unknown + } + } } // Sample rate range supported by the Media Foundation Resampler MFT used by AUTOCONVERTPCM. diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs index 817cf3786..4da238001 100644 --- a/src/host/wasapi/stream.rs +++ b/src/host/wasapi/stream.rs @@ -18,10 +18,7 @@ use windows::Win32::{ }; use crate::{ - host::{ - emit_error, equilibrium::fill_equilibrium, frames_to_duration, latch::Latch, - ErrorCallbackArc, - }, + host::{emit_error, equilibrium::fill_equilibrium, latch::Latch, ErrorCallbackArc}, traits::StreamTrait, Data, Error, ErrorKind, FrameCount, InputCallbackInfo, InputStreamTimestamp, OutputCallbackInfo, OutputStreamTimestamp, ResultExt, SampleFormat, SampleRate, StreamConfig, @@ -651,6 +648,28 @@ fn run_output( emit_error(error_callback, err); } + // The clock frequency is constant for the stream's lifetime. + let clock_frequency = match unsafe { run_ctxt.stream.audio_clock.GetFrequency() } + .context("Failed to get audio clock frequency") + { + Ok(0) => { + emit_error( + error_callback, + Error::with_message( + ErrorKind::BackendError, + "IAudioClock::GetFrequency returned zero", + ), + ); + return; + } + Ok(frequency) => frequency, + Err(err) => { + emit_error(error_callback, err); + return; + } + }; + let mut frames_written: u64 = 0; + loop { match process_commands_and_await_signal(&mut run_ctxt, error_callback) { ControlFlow::Break(()) => break, @@ -661,7 +680,13 @@ fn run_output( AudioClientFlow::Render { ref render_client } => render_client.clone(), _ => unreachable!(), }; - if let Err(err) = process_output(&run_ctxt.stream, render_client, data_callback) { + if let Err(err) = process_output( + &run_ctxt.stream, + render_client, + data_callback, + clock_frequency, + &mut frames_written, + ) { emit_error(error_callback, err); break; } @@ -787,6 +812,8 @@ fn process_output( stream: &StreamInner, render_client: Audio::IAudioRenderClient, data_callback: &mut dyn FnMut(&mut Data, &OutputCallbackInfo), + clock_frequency: u64, + frames_written: &mut u64, ) -> Result<(), Error> { // The number of frames available for writing. let frames_available = match get_available_frames(stream)? { @@ -795,9 +822,7 @@ fn process_output( }; unsafe { - let buffer = render_client - .GetBuffer(frames_available) - .map_err(Error::from)?; + let buffer = render_client.GetBuffer(frames_available)?; debug_assert!(!buffer.is_null()); @@ -809,23 +834,22 @@ fn process_output( let len = byte_count / stream.sample_format.sample_size(); let mut data = Data::from_parts(data, len, stream.sample_format); let sample_rate = stream.config.sample_rate; - let timestamp = output_timestamp(stream, frames_available, sample_rate)?; + let timestamp = output_timestamp(stream, sample_rate, clock_frequency, *frames_written)?; let info = OutputCallbackInfo { timestamp }; data_callback(&mut data, &info); - render_client - .ReleaseBuffer(frames_available, 0) - .map_err(Error::from)?; + render_client.ReleaseBuffer(frames_available, 0)?; + + *frames_written += frames_available as u64; } Ok(()) } -/// Use the stream's `IAudioClock` to produce the current stream instant. -/// -/// Uses the QPC position produced via the `GetPosition` method. +/// Reads the stream's `IAudioClock` in a single `GetPosition` call, returning the callback +/// [`StreamInstant`] together with the device position from that same snapshot. #[inline] -fn stream_instant(stream: &StreamInner) -> Result { +fn clock_position(stream: &StreamInner) -> Result<(StreamInstant, u64), Error> { let mut position: u64 = 0; let mut qpc_position: u64 = 0; unsafe { @@ -840,7 +864,7 @@ fn stream_instant(stream: &StreamInner) -> Result { (nanos / 1_000_000_000) as u64, (nanos % 1_000_000_000) as u32, ); - Ok(instant) + Ok((instant, position)) } /// Produce the input stream timestamp. @@ -859,26 +883,35 @@ fn input_timestamp( (nanos / 1_000_000_000) as u64, (nanos % 1_000_000_000) as u32, ); - let callback = stream_instant(stream)?; + let (callback, _position) = clock_position(stream)?; Ok(InputStreamTimestamp { capture, callback }) } /// Produce the output stream timestamp. /// -/// `frames_available` is the number of frames available for writing as reported by subtracting the -/// result of `GetCurrentPadding` from the maximum buffer size. -/// /// `sample_rate` is the rate at which audio frames are processed by the device. +/// +/// `clock_frequency` is the device clock's constant tick rate, used to convert the reported clock +/// position into a played-out duration. +/// +/// `frames_written` is the running total of frames submitted to the render buffer so far, used to +/// derive how much audio is buffered ahead of the device position. #[inline] fn output_timestamp( stream: &StreamInner, - frames_available: FrameCount, sample_rate: SampleRate, + clock_frequency: u64, + frames_written: u64, ) -> Result { - let callback = stream_instant(stream)?; - // `padding` is the number of frames already queued in the endpoint buffer ahead of the - // frames we are about to write. Those frames must drain before ours are heard. - let padding = stream.max_frames_in_buffer - frames_available; - let playback = callback + (frames_to_duration(padding, sample_rate) + stream.stream_latency); + let (callback, position) = clock_position(stream)?; + // `buffered` is the amount of audio we've already submitted that has not yet been consumed by + // the device at this instant; it determines when the next written frame will be heard. + let consumed_nanos = position as u128 * 1_000_000_000 / clock_frequency as u128; + let written_nanos = frames_written as u128 * 1_000_000_000 / sample_rate as u128; + let buffered_nanos = + u64::try_from(written_nanos.saturating_sub(consumed_nanos)).unwrap_or(u64::MAX); + let buffered = Duration::from_nanos(buffered_nanos); + + let playback = callback + (buffered + stream.stream_latency); Ok(OutputStreamTimestamp { callback, playback }) }