diff --git a/CHANGELOG.md b/CHANGELOG.md
index e842468fe..ff3da38e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - **WASAPI**: The `windows` and `windows-core` dependencies are now both pinned to 0.62.
 
+### Fixed
+
+- **WASAPI**: Fix output `playback` timestamps occasionally stepping backwards.
+
 ## [0.18.1] - 2026-06-07
 
 ### Fixed
diff --git a/src/host/wasapi/device.rs b/src/host/wasapi/device.rs
index 3333cd45e..f00c7d6cb 100644
--- a/src/host/wasapi/device.rs
+++ b/src/host/wasapi/device.rs
@@ -1,23 +1,3 @@
-use crate::{
-    error::ResultExt,
-    host::{com::ComString, ErrorCallbackArc},
-    BufferSize, Data, DeviceDescription, DeviceDescriptionBuilder, DeviceDirection, DeviceId,
-    DeviceType, Error, ErrorKind, FrameCount, InputCallbackInfo, InterfaceType, OutputCallbackInfo,
-    SampleFormat, SampleRate, StreamConfig, SupportedBufferSize, SupportedStreamConfig,
-    SupportedStreamConfigRange, COMMON_SAMPLE_RATES,
-};
-
-impl From<Audio::EDataFlow> for DeviceDirection {
-    fn from(data_flow: Audio::EDataFlow) -> Self {
-        if data_flow == Audio::eCapture {
-            DeviceDirection::Input
-        } else if data_flow == Audio::eRender {
-            DeviceDirection::Output
-        } else {
-            DeviceDirection::Unknown
-        }
-    }
-}
 use std::{
     ffi::OsString,
     fmt,
@@ -29,6 +9,15 @@ use std::{
     time::Duration,
 };
 
+use crate::{
+    error::ResultExt,
+    host::{com::ComString, ErrorCallbackArc},
+    BufferSize, Data, DeviceDescription, DeviceDescriptionBuilder, DeviceDirection, DeviceId,
+    DeviceType, Error, ErrorKind, FrameCount, InputCallbackInfo, InterfaceType, OutputCallbackInfo,
+    SampleFormat, SampleRate, StreamConfig, SupportedBufferSize, SupportedStreamConfig,
+    SupportedStreamConfigRange, COMMON_SAMPLE_RATES,
+};
+
 use windows::{
     core::{Interface, GUID},
     Win32::{
@@ -927,7 +916,9 @@ impl Device {
             // `run()` method and added to the `RunContext`.
             let client_flow = AudioClientFlow::Capture { capture_client };
 
-            let audio_clock = get_audio_clock(&audio_client)?;
+            let audio_clock = audio_client
+                .GetService::<Audio::IAudioClock>()
+                .context("Failed to get audio clock")?;
 
             let stream_latency = {
                 let hns = audio_client
@@ -1040,7 +1031,9 @@ impl Device {
             // `run()` method and added to the `RunContext`.
             let client_flow = AudioClientFlow::Render { render_client };
 
-            let audio_clock = get_audio_clock(&audio_client)?;
+            let audio_clock = audio_client
+                .GetService::<Audio::IAudioClock>()
+                .context("Failed to get audio clock")?;
 
             let stream_latency = {
                 let hns = audio_client
@@ -1340,11 +1333,16 @@ pub fn default_output_device() -> Option<Device> {
     current_default_endpoint(Audio::eRender).map(|_| Device::default_output())
 }
 
-/// Get the audio clock used to produce `StreamInstant`s.
-unsafe fn get_audio_clock(audio_client: &Audio::IAudioClient) -> Result<Audio::IAudioClock, Error> {
-    audio_client
-        .GetService::<Audio::IAudioClock>()
-        .context("Failed to get audio clock")
+impl From<Audio::EDataFlow> for DeviceDirection {
+    fn from(data_flow: Audio::EDataFlow) -> Self {
+        if data_flow == Audio::eCapture {
+            DeviceDirection::Input
+        } else if data_flow == Audio::eRender {
+            DeviceDirection::Output
+        } else {
+            DeviceDirection::Unknown
+        }
+    }
 }
 
 // Sample rate range supported by the Media Foundation Resampler MFT used by AUTOCONVERTPCM.
diff --git a/src/host/wasapi/stream.rs b/src/host/wasapi/stream.rs
index 817cf3786..4da238001 100644
--- a/src/host/wasapi/stream.rs
+++ b/src/host/wasapi/stream.rs
@@ -18,10 +18,7 @@ use windows::Win32::{
 };
 
 use crate::{
-    host::{
-        emit_error, equilibrium::fill_equilibrium, frames_to_duration, latch::Latch,
-        ErrorCallbackArc,
-    },
+    host::{emit_error, equilibrium::fill_equilibrium, latch::Latch, ErrorCallbackArc},
     traits::StreamTrait,
     Data, Error, ErrorKind, FrameCount, InputCallbackInfo, InputStreamTimestamp,
     OutputCallbackInfo, OutputStreamTimestamp, ResultExt, SampleFormat, SampleRate, StreamConfig,
@@ -651,6 +648,28 @@ fn run_output(
         emit_error(error_callback, err);
     }
 
+    // The clock frequency is constant for the stream's lifetime.
+    let clock_frequency = match unsafe { run_ctxt.stream.audio_clock.GetFrequency() }
+        .context("Failed to get audio clock frequency")
+    {
+        Ok(0) => {
+            emit_error(
+                error_callback,
+                Error::with_message(
+                    ErrorKind::BackendError,
+                    "IAudioClock::GetFrequency returned zero",
+                ),
+            );
+            return;
+        }
+        Ok(frequency) => frequency,
+        Err(err) => {
+            emit_error(error_callback, err);
+            return;
+        }
+    };
+    let mut frames_written: u64 = 0;
+
     loop {
         match process_commands_and_await_signal(&mut run_ctxt, error_callback) {
             ControlFlow::Break(()) => break,
@@ -661,7 +680,13 @@ fn run_output(
             AudioClientFlow::Render { ref render_client } => render_client.clone(),
             _ => unreachable!(),
         };
-        if let Err(err) = process_output(&run_ctxt.stream, render_client, data_callback) {
+        if let Err(err) = process_output(
+            &run_ctxt.stream,
+            render_client,
+            data_callback,
+            clock_frequency,
+            &mut frames_written,
+        ) {
             emit_error(error_callback, err);
             break;
         }
@@ -787,6 +812,8 @@ fn process_output(
     stream: &StreamInner,
     render_client: Audio::IAudioRenderClient,
     data_callback: &mut dyn FnMut(&mut Data, &OutputCallbackInfo),
+    clock_frequency: u64,
+    frames_written: &mut u64,
 ) -> Result<(), Error> {
     // The number of frames available for writing.
     let frames_available = match get_available_frames(stream)? {
@@ -795,9 +822,7 @@ fn process_output(
     };
 
     unsafe {
-        let buffer = render_client
-            .GetBuffer(frames_available)
-            .map_err(Error::from)?;
+        let buffer = render_client.GetBuffer(frames_available)?;
 
         debug_assert!(!buffer.is_null());
 
@@ -809,23 +834,22 @@ fn process_output(
         let len = byte_count / stream.sample_format.sample_size();
         let mut data = Data::from_parts(data, len, stream.sample_format);
         let sample_rate = stream.config.sample_rate;
-        let timestamp = output_timestamp(stream, frames_available, sample_rate)?;
+        let timestamp = output_timestamp(stream, sample_rate, clock_frequency, *frames_written)?;
         let info = OutputCallbackInfo { timestamp };
         data_callback(&mut data, &info);
 
-        render_client
-            .ReleaseBuffer(frames_available, 0)
-            .map_err(Error::from)?;
+        render_client.ReleaseBuffer(frames_available, 0)?;
+
+        *frames_written += frames_available as u64;
     }
 
     Ok(())
 }
 
-/// Use the stream's `IAudioClock` to produce the current stream instant.
-///
-/// Uses the QPC position produced via the `GetPosition` method.
+/// Reads the stream's `IAudioClock` in a single `GetPosition` call, returning the callback
+/// [`StreamInstant`] together with the device position from that same snapshot.
 #[inline]
-fn stream_instant(stream: &StreamInner) -> Result<StreamInstant, Error> {
+fn clock_position(stream: &StreamInner) -> Result<(StreamInstant, u64), Error> {
     let mut position: u64 = 0;
     let mut qpc_position: u64 = 0;
     unsafe {
@@ -840,7 +864,7 @@ fn stream_instant(stream: &StreamInner) -> Result<StreamInstant, Error> {
         (nanos / 1_000_000_000) as u64,
         (nanos % 1_000_000_000) as u32,
     );
-    Ok(instant)
+    Ok((instant, position))
 }
 
 /// Produce the input stream timestamp.
@@ -859,26 +883,35 @@ fn input_timestamp(
         (nanos / 1_000_000_000) as u64,
         (nanos % 1_000_000_000) as u32,
     );
-    let callback = stream_instant(stream)?;
+    let (callback, _position) = clock_position(stream)?;
     Ok(InputStreamTimestamp { capture, callback })
 }
 
 /// Produce the output stream timestamp.
 ///
-/// `frames_available` is the number of frames available for writing as reported by subtracting the
-/// result of `GetCurrentPadding` from the maximum buffer size.
-///
 /// `sample_rate` is the rate at which audio frames are processed by the device.
+///
+/// `clock_frequency` is the device clock's constant tick rate, used to convert the reported clock
+/// position into a played-out duration.
+///
+/// `frames_written` is the running total of frames submitted to the render buffer so far, used to
+/// derive how much audio is buffered ahead of the device position.
 #[inline]
 fn output_timestamp(
     stream: &StreamInner,
-    frames_available: FrameCount,
     sample_rate: SampleRate,
+    clock_frequency: u64,
+    frames_written: u64,
 ) -> Result<OutputStreamTimestamp, Error> {
-    let callback = stream_instant(stream)?;
-    // `padding` is the number of frames already queued in the endpoint buffer ahead of the
-    // frames we are about to write. Those frames must drain before ours are heard.
-    let padding = stream.max_frames_in_buffer - frames_available;
-    let playback = callback + (frames_to_duration(padding, sample_rate) + stream.stream_latency);
+    let (callback, position) = clock_position(stream)?;
+    // `buffered` is the amount of audio we've already submitted that has not yet been consumed by
+    // the device at this instant; it determines when the next written frame will be heard.
+    let consumed_nanos = position as u128 * 1_000_000_000 / clock_frequency as u128;
+    let written_nanos = frames_written as u128 * 1_000_000_000 / sample_rate as u128;
+    let buffered_nanos =
+        u64::try_from(written_nanos.saturating_sub(consumed_nanos)).unwrap_or(u64::MAX);
+    let buffered = Duration::from_nanos(buffered_nanos);
+
+    let playback = callback + (buffered + stream.stream_latency);
     Ok(OutputStreamTimestamp { callback, playback })
 }