diff --git a/apps/desktop/src/routes/editor/PerformanceOverlay.tsx b/apps/desktop/src/routes/editor/PerformanceOverlay.tsx index 6f3307a28b..10b46ad7d6 100644 --- a/apps/desktop/src/routes/editor/PerformanceOverlay.tsx +++ b/apps/desktop/src/routes/editor/PerformanceOverlay.tsx @@ -9,6 +9,7 @@ import { } from "solid-js"; import toast from "solid-toast"; import { useEditorContext } from "./context"; +import { getFpsStats } from "~/utils/socket"; type PerformanceOverlayProps = { size: { width: number; height: number }; @@ -24,6 +25,52 @@ type FrameStats = { totalFrames: number; }; +type TransportStats = { + renderFps: number; + mbPerSec: number; + sabResizes: number; + sabFallbacks: number; + sabOversizeFallbacks: number; + sabRetryLimitFallbacks: number; + sabRetriesInFlight: number; + sabSlotSizeBytes: number; + sabSlotCount: number; + sabTotalBytes: number; + workerFramesInFlight: number; + workerInFlightBackpressureHits: number; + workerInFlightBackpressureWindowHits: number; + workerFramesInFlightPeakWindow: number; + workerFramesInFlightPeakTotal: number; + workerInFlightSupersededDrops: number; + workerInFlightSupersededDropsWindow: number; + renderedFromSharedTotal: number; + renderedFromSharedWindow: number; + renderedFromWorkerTotal: number; + renderedFromWorkerWindow: number; + queuedOutOfOrderDropsTotal: number; + queuedOutOfOrderDropsWindow: number; + directOutOfOrderDropsTotal: number; + directOutOfOrderDropsWindow: number; + directIngressOutOfOrderDropsTotal: number; + directIngressOutOfOrderDropsWindow: number; + directResponseOutOfOrderDropsTotal: number; + directResponseOutOfOrderDropsWindow: number; + strideCorrectionInFlight: number; + strideCorrectionPending: number; + strideCorrectionDispatchesTotal: number; + strideCorrectionDispatchesWindow: number; + strideCorrectionSupersededDropsTotal: number; + strideCorrectionSupersededDropsWindow: number; + strideCorrectionErrorsTotal: number; + strideCorrectionErrorsWindow: number; + sabTotalRetryAttempts: number; + sabTotalFramesReceived: number; + sabTotalFramesWrittenToSharedBuffer: number; + sabTotalFramesSentToWorker: number; + sabTotalWorkerFallbackBytes: number; + sabTotalSupersededDrops: number; +}; + const STATS_WINDOW_MS = 1000; const MAX_TIMESTAMPS = 120; @@ -45,6 +92,51 @@ export function PerformanceOverlay(_props: PerformanceOverlayProps) { droppedFrames: 0, totalFrames: 0, }); + const [transportStats, setTransportStats] = createSignal({ + renderFps: 0, + mbPerSec: 0, + sabResizes: 0, + sabFallbacks: 0, + sabOversizeFallbacks: 0, + sabRetryLimitFallbacks: 0, + sabRetriesInFlight: 0, + sabSlotSizeBytes: 0, + sabSlotCount: 0, + sabTotalBytes: 0, + workerFramesInFlight: 0, + workerInFlightBackpressureHits: 0, + workerInFlightBackpressureWindowHits: 0, + workerFramesInFlightPeakWindow: 0, + workerFramesInFlightPeakTotal: 0, + workerInFlightSupersededDrops: 0, + workerInFlightSupersededDropsWindow: 0, + renderedFromSharedTotal: 0, + renderedFromSharedWindow: 0, + renderedFromWorkerTotal: 0, + renderedFromWorkerWindow: 0, + queuedOutOfOrderDropsTotal: 0, + queuedOutOfOrderDropsWindow: 0, + directOutOfOrderDropsTotal: 0, + directOutOfOrderDropsWindow: 0, + directIngressOutOfOrderDropsTotal: 0, + directIngressOutOfOrderDropsWindow: 0, + directResponseOutOfOrderDropsTotal: 0, + directResponseOutOfOrderDropsWindow: 0, + strideCorrectionInFlight: 0, + strideCorrectionPending: 0, + strideCorrectionDispatchesTotal: 0, + strideCorrectionDispatchesWindow: 0, + strideCorrectionSupersededDropsTotal: 0, + strideCorrectionSupersededDropsWindow: 0, + strideCorrectionErrorsTotal: 0, + strideCorrectionErrorsWindow: 0, + sabTotalRetryAttempts: 0, + sabTotalFramesReceived: 0, + sabTotalFramesWrittenToSharedBuffer: 0, + sabTotalFramesSentToWorker: 0, + sabTotalWorkerFallbackBytes: 0, + sabTotalSupersededDrops: 0, + }); const calculateStats = (): FrameStats => { const now = performance.now(); @@ -156,26 +248,236 @@ export function PerformanceOverlay(_props: PerformanceOverlayProps) { }); }; + const resetTransportStats = () => { + setTransportStats({ + renderFps: 0, + mbPerSec: 0, + sabResizes: 0, + sabFallbacks: 0, + sabOversizeFallbacks: 0, + sabRetryLimitFallbacks: 0, + sabRetriesInFlight: 0, + sabSlotSizeBytes: 0, + sabSlotCount: 0, + sabTotalBytes: 0, + workerFramesInFlight: 0, + workerInFlightBackpressureHits: 0, + workerInFlightBackpressureWindowHits: 0, + workerFramesInFlightPeakWindow: 0, + workerFramesInFlightPeakTotal: 0, + workerInFlightSupersededDrops: 0, + workerInFlightSupersededDropsWindow: 0, + renderedFromSharedTotal: 0, + renderedFromSharedWindow: 0, + renderedFromWorkerTotal: 0, + renderedFromWorkerWindow: 0, + queuedOutOfOrderDropsTotal: 0, + queuedOutOfOrderDropsWindow: 0, + directOutOfOrderDropsTotal: 0, + directOutOfOrderDropsWindow: 0, + directIngressOutOfOrderDropsTotal: 0, + directIngressOutOfOrderDropsWindow: 0, + directResponseOutOfOrderDropsTotal: 0, + directResponseOutOfOrderDropsWindow: 0, + strideCorrectionInFlight: 0, + strideCorrectionPending: 0, + strideCorrectionDispatchesTotal: 0, + strideCorrectionDispatchesWindow: 0, + strideCorrectionSupersededDropsTotal: 0, + strideCorrectionSupersededDropsWindow: 0, + strideCorrectionErrorsTotal: 0, + strideCorrectionErrorsWindow: 0, + sabTotalRetryAttempts: 0, + sabTotalFramesReceived: 0, + sabTotalFramesWrittenToSharedBuffer: 0, + sabTotalFramesSentToWorker: 0, + sabTotalWorkerFallbackBytes: 0, + sabTotalSupersededDrops: 0, + }); + }; + createEffect(() => { if (!performanceMode()) { resetStats(); + resetTransportStats(); + } + }); + + createEffect(() => { + if (!performanceMode()) { + return; } + const updateTransportStats = () => { + const socketStats = getFpsStats(); + if (!socketStats) { + return; + } + setTransportStats({ + renderFps: socketStats.renderFps, + mbPerSec: socketStats.mbPerSec, + sabResizes: socketStats.sabResizes, + sabFallbacks: socketStats.sabFallbacks, + sabOversizeFallbacks: socketStats.sabOversizeFallbacks, + sabRetryLimitFallbacks: socketStats.sabRetryLimitFallbacks, + sabRetriesInFlight: socketStats.sabRetriesInFlight, + sabSlotSizeBytes: socketStats.sabSlotSizeBytes, + sabSlotCount: socketStats.sabSlotCount, + sabTotalBytes: socketStats.sabTotalBytes, + workerFramesInFlight: socketStats.workerFramesInFlight, + workerInFlightBackpressureHits: + socketStats.workerInFlightBackpressureHits, + workerInFlightBackpressureWindowHits: + socketStats.workerInFlightBackpressureWindowHits, + workerFramesInFlightPeakWindow: + socketStats.workerFramesInFlightPeakWindow, + workerFramesInFlightPeakTotal: + socketStats.workerFramesInFlightPeakTotal, + workerInFlightSupersededDrops: + socketStats.workerInFlightSupersededDrops, + workerInFlightSupersededDropsWindow: + socketStats.workerInFlightSupersededDropsWindow, + renderedFromSharedTotal: socketStats.renderedFromSharedTotal, + renderedFromSharedWindow: socketStats.renderedFromSharedWindow, + renderedFromWorkerTotal: socketStats.renderedFromWorkerTotal, + renderedFromWorkerWindow: socketStats.renderedFromWorkerWindow, + queuedOutOfOrderDropsTotal: socketStats.queuedOutOfOrderDropsTotal, + queuedOutOfOrderDropsWindow: socketStats.queuedOutOfOrderDropsWindow, + directOutOfOrderDropsTotal: socketStats.directOutOfOrderDropsTotal, + directOutOfOrderDropsWindow: socketStats.directOutOfOrderDropsWindow, + directIngressOutOfOrderDropsTotal: + socketStats.directIngressOutOfOrderDropsTotal, + directIngressOutOfOrderDropsWindow: + socketStats.directIngressOutOfOrderDropsWindow, + directResponseOutOfOrderDropsTotal: + socketStats.directResponseOutOfOrderDropsTotal, + directResponseOutOfOrderDropsWindow: + socketStats.directResponseOutOfOrderDropsWindow, + strideCorrectionInFlight: socketStats.strideCorrectionInFlight, + strideCorrectionPending: socketStats.strideCorrectionPending, + strideCorrectionDispatchesTotal: + socketStats.strideCorrectionDispatchesTotal, + strideCorrectionDispatchesWindow: + socketStats.strideCorrectionDispatchesWindow, + strideCorrectionSupersededDropsTotal: + socketStats.strideCorrectionSupersededDropsTotal, + strideCorrectionSupersededDropsWindow: + socketStats.strideCorrectionSupersededDropsWindow, + strideCorrectionErrorsTotal: socketStats.strideCorrectionErrorsTotal, + strideCorrectionErrorsWindow: socketStats.strideCorrectionErrorsWindow, + sabTotalRetryAttempts: socketStats.sabTotalRetryAttempts, + sabTotalFramesReceived: socketStats.sabTotalFramesReceived, + sabTotalFramesWrittenToSharedBuffer: + socketStats.sabTotalFramesWrittenToSharedBuffer, + sabTotalFramesSentToWorker: socketStats.sabTotalFramesSentToWorker, + sabTotalWorkerFallbackBytes: socketStats.sabTotalWorkerFallbackBytes, + sabTotalSupersededDrops: socketStats.sabTotalSupersededDrops, + }); + }; + updateTransportStats(); + const interval = setInterval(updateTransportStats, 250); + onCleanup(() => clearInterval(interval)); }); onCleanup(() => { resetStats(); + resetTransportStats(); }); const formatFps = (fps: number) => fps.toFixed(1); const formatMs = (ms: number) => ms.toFixed(2); + const formatMb = (value: number) => value.toFixed(1); + const formatSlotMb = (bytes: number) => (bytes / (1024 * 1024)).toFixed(1); + const formatPct = (value: number) => value.toFixed(1); + const totalTransportedFrames = () => + transportStats().sabTotalFramesWrittenToSharedBuffer + + transportStats().sabTotalFramesSentToWorker; + const sabFrameSharePct = () => { + const total = totalTransportedFrames(); + return total > 0 + ? (transportStats().sabTotalFramesWrittenToSharedBuffer / total) * 100 + : 0; + }; + const workerFrameSharePct = () => { + const total = totalTransportedFrames(); + return total > 0 + ? (transportStats().sabTotalFramesSentToWorker / total) * 100 + : 0; + }; + const supersededDropPct = () => + transportStats().sabTotalFramesReceived > 0 + ? (transportStats().sabTotalSupersededDrops / + transportStats().sabTotalFramesReceived) * + 100 + : 0; const copyStatsToClipboard = async () => { const s = stats(); + const t = transportStats(); + const totalTransported = + t.sabTotalFramesWrittenToSharedBuffer + t.sabTotalFramesSentToWorker; + const sabSharePct = + totalTransported > 0 + ? (t.sabTotalFramesWrittenToSharedBuffer / totalTransported) * 100 + : 0; + const workerSharePct = + totalTransported > 0 + ? (t.sabTotalFramesSentToWorker / totalTransported) * 100 + : 0; + const supersededPct = + t.sabTotalFramesReceived > 0 + ? (t.sabTotalSupersededDrops / t.sabTotalFramesReceived) * 100 + : 0; const statsText = [ `FPS: ${formatFps(s.fps)}`, `Frame: ${formatMs(s.avgFrameMs)}ms avg`, `Range: ${formatMs(s.minFrameMs)} - ${formatMs(s.maxFrameMs)}ms`, `Jitter: ±${formatMs(s.jitter)}ms`, + `Render FPS: ${formatFps(t.renderFps)}`, + `Transport: ${formatMb(t.mbPerSec)} MB/s`, + `SAB Slot: ${formatSlotMb(t.sabSlotSizeBytes)} MB`, + `SAB Slot Count: ${t.sabSlotCount}`, + `SAB Total: ${formatSlotMb(t.sabTotalBytes)} MB`, + `Worker Frames In Flight: ${t.workerFramesInFlight}`, + `Worker In-Flight Cap Hits: ${t.workerInFlightBackpressureHits}`, + `Worker In-Flight Cap Hits (Window): ${t.workerInFlightBackpressureWindowHits}`, + `Worker In-Flight Peak (Window): ${t.workerFramesInFlightPeakWindow}`, + `Worker In-Flight Peak (Total): ${t.workerFramesInFlightPeakTotal}`, + `Worker In-Flight Superseded Drops: ${t.workerInFlightSupersededDrops}`, + `Worker In-Flight Superseded Drops (Window): ${t.workerInFlightSupersededDropsWindow}`, + `Rendered From Shared (Total): ${t.renderedFromSharedTotal}`, + `Rendered From Shared (Window): ${t.renderedFromSharedWindow}`, + `Rendered From Worker (Total): ${t.renderedFromWorkerTotal}`, + `Rendered From Worker (Window): ${t.renderedFromWorkerWindow}`, + `Queued Out-Of-Order Drops (Total): ${t.queuedOutOfOrderDropsTotal}`, + `Queued Out-Of-Order Drops (Window): ${t.queuedOutOfOrderDropsWindow}`, + `Direct Out-Of-Order Drops (Total): ${t.directOutOfOrderDropsTotal}`, + `Direct Out-Of-Order Drops (Window): ${t.directOutOfOrderDropsWindow}`, + `Direct Ingress Out-Of-Order Drops (Total): ${t.directIngressOutOfOrderDropsTotal}`, + `Direct Ingress Out-Of-Order Drops (Window): ${t.directIngressOutOfOrderDropsWindow}`, + `Direct Response Out-Of-Order Drops (Total): ${t.directResponseOutOfOrderDropsTotal}`, + `Direct Response Out-Of-Order Drops (Window): ${t.directResponseOutOfOrderDropsWindow}`, + `Stride Correction In Flight: ${t.strideCorrectionInFlight}`, + `Stride Correction Pending: ${t.strideCorrectionPending}`, + `Stride Correction Dispatches (Total): ${t.strideCorrectionDispatchesTotal}`, + `Stride Correction Dispatches (Window): ${t.strideCorrectionDispatchesWindow}`, + `Stride Correction Superseded Drops (Total): ${t.strideCorrectionSupersededDropsTotal}`, + `Stride Correction Superseded Drops (Window): ${t.strideCorrectionSupersededDropsWindow}`, + `Stride Correction Errors (Total): ${t.strideCorrectionErrorsTotal}`, + `Stride Correction Errors (Window): ${t.strideCorrectionErrorsWindow}`, + `SAB Retry Attempts: ${t.sabTotalRetryAttempts}`, + `SAB Frames Received: ${t.sabTotalFramesReceived}`, + `SAB Frames Written: ${t.sabTotalFramesWrittenToSharedBuffer}`, + `SAB Frames Sent to Worker: ${t.sabTotalFramesSentToWorker}`, + `SAB Fallback Transfer: ${formatSlotMb(t.sabTotalWorkerFallbackBytes)} MB`, + `SAB Superseded Drops: ${t.sabTotalSupersededDrops}`, + `SAB Frame Share: ${formatPct(sabSharePct)}%`, + `Worker Frame Share: ${formatPct(workerSharePct)}%`, + `Superseded Drop Share: ${formatPct(supersededPct)}%`, + `SAB Resizes: ${t.sabResizes}`, + `SAB Fallbacks: ${t.sabFallbacks}`, + `SAB Oversize Fallbacks: ${t.sabOversizeFallbacks}`, + `SAB Retry Limit Fallbacks: ${t.sabRetryLimitFallbacks}`, + `SAB Retries In Flight: ${t.sabRetriesInFlight}`, s.droppedFrames > 0 ? `Dropped: ${s.droppedFrames}/${s.totalFrames}` : null, @@ -258,6 +560,196 @@ export function PerformanceOverlay(_props: PerformanceOverlayProps) { ±{formatMs(stats().jitter)}ms +
+ Render: + + {formatFps(transportStats().renderFps)} fps + +
+
+ Transport: + + {formatMb(transportStats().mbPerSec)} MB/s + +
+
+ SAB: + + {formatSlotMb(transportStats().sabSlotSizeBytes)}MB slot + + + {" "} + / {transportStats().sabSlotCount} slots /{" "} + {formatSlotMb(transportStats().sabTotalBytes)}MB total /{" "} + {transportStats().sabResizes} resizes + +
+ 0}> +
+ SAB totals: + + {transportStats().sabTotalFramesReceived} recv + + + {" "} + / {transportStats().sabTotalFramesWrittenToSharedBuffer} sab /{" "} + {transportStats().sabTotalFramesSentToWorker} worker /{" "} + {transportStats().sabTotalSupersededDrops} superseded /{" "} + {transportStats().sabTotalRetryAttempts} retries /{" "} + {formatSlotMb(transportStats().sabTotalWorkerFallbackBytes)}MB + fallback + +
+
+ 0}> +
+ Transport split: + + {formatPct(sabFrameSharePct())}% SAB + + + {" "} + / {formatPct(workerFrameSharePct())}% worker /{" "} + {formatPct(supersededDropPct())}% superseded + +
+
+ 0}> +
+ SAB fallback {transportStats().sabFallbacks} (oversize{" "} + {transportStats().sabOversizeFallbacks}, retry-limit{" "} + {transportStats().sabRetryLimitFallbacks}) +
+
+ 0}> +
+ SAB retries in flight: {transportStats().sabRetriesInFlight} +
+
+ 0}> +
+ Worker frames in flight: {transportStats().workerFramesInFlight} +
+
+ 0}> +
+ Worker in-flight peak:{" "} + {transportStats().workerFramesInFlightPeakWindow} window /{" "} + {transportStats().workerFramesInFlightPeakTotal} total +
+
+ 0 || + transportStats().renderedFromWorkerTotal > 0 + } + > +
+ Render source: {transportStats().renderedFromSharedWindow}{" "} + shared / {transportStats().renderedFromWorkerWindow} worker + window + + {" "} + ({transportStats().renderedFromSharedTotal} shared /{" "} + {transportStats().renderedFromWorkerTotal} worker total) + +
+
+ 0}> +
+ Queued out-of-order drops:{" "} + {transportStats().queuedOutOfOrderDropsTotal} + 0}> + + {" "} + (window {transportStats().queuedOutOfOrderDropsWindow}) + + +
+
+ 0}> +
+ Direct out-of-order drops:{" "} + {transportStats().directOutOfOrderDropsTotal} + 0}> + + {" "} + (window {transportStats().directOutOfOrderDropsWindow}) + + + 0 || + transportStats().directResponseOutOfOrderDropsTotal > 0 + } + > + + {" "} + (ingress{" "} + {transportStats().directIngressOutOfOrderDropsTotal} / + response{" "} + {transportStats().directResponseOutOfOrderDropsTotal}) + + +
+
+ 0 || + transportStats().strideCorrectionPending > 0 || + transportStats().strideCorrectionDispatchesTotal > 0 || + transportStats().strideCorrectionErrorsTotal > 0 + } + > +
+ Stride correction: in-flight{" "} + {transportStats().strideCorrectionInFlight} / pending{" "} + {transportStats().strideCorrectionPending} + + {" "} + (dispatches{" "} + {transportStats().strideCorrectionDispatchesWindow} window /{" "} + {transportStats().strideCorrectionDispatchesTotal} total, + superseded{" "} + {transportStats().strideCorrectionSupersededDropsWindow}{" "} + window /{" "} + {transportStats().strideCorrectionSupersededDropsTotal} total, + errors {transportStats().strideCorrectionErrorsWindow} window + / {transportStats().strideCorrectionErrorsTotal} total) + +
+
+ 0}> +
+ Worker in-flight cap hits:{" "} + {transportStats().workerInFlightBackpressureHits} +
+
+ 0} + > +
+ Worker cap hits (window):{" "} + {transportStats().workerInFlightBackpressureWindowHits} +
+
+ 0}> +
+ Worker in-flight superseded drops:{" "} + {transportStats().workerInFlightSupersededDrops} + 0 + } + > + + {" "} + (window{" "} + {transportStats().workerInFlightSupersededDropsWindow}) + + +
+
0}>
Dropped: {stats().droppedFrames}/{stats().totalFrames} diff --git a/apps/desktop/src/utils/frame-order.test.ts b/apps/desktop/src/utils/frame-order.test.ts new file mode 100644 index 0000000000..f77ef290fb --- /dev/null +++ b/apps/desktop/src/utils/frame-order.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from "vitest"; +import { + frameNumberForwardDelta, + isFrameNumberNewer, + shouldDropOutOfOrderFrame, +} from "./frame-order"; + +describe("frame-order utilities", () => { + it("treats positive forward deltas as newer", () => { + expect(frameNumberForwardDelta(41, 40)).toBe(1); + expect(isFrameNumberNewer(41, 40)).toBe(true); + }); + + it("treats wraparound forward deltas as newer", () => { + expect(frameNumberForwardDelta(2, 0xffffffff)).toBe(3); + expect(isFrameNumberNewer(2, 0xffffffff)).toBe(true); + }); + + it("drops duplicate frame numbers", () => { + expect(shouldDropOutOfOrderFrame(120, 120)).toBe(true); + }); + + it("drops slightly older out-of-order frames inside stale window", () => { + expect(shouldDropOutOfOrderFrame(119, 120, 30)).toBe(true); + expect(shouldDropOutOfOrderFrame(90, 120, 30)).toBe(true); + }); + + it("keeps older frames beyond stale window as seek candidates", () => { + expect(shouldDropOutOfOrderFrame(89, 120, 30)).toBe(false); + }); + + it("keeps forward frames", () => { + expect(shouldDropOutOfOrderFrame(121, 120, 30)).toBe(false); + }); +}); diff --git a/apps/desktop/src/utils/frame-order.ts b/apps/desktop/src/utils/frame-order.ts new file mode 100644 index 0000000000..e65b53c4bd --- /dev/null +++ b/apps/desktop/src/utils/frame-order.ts @@ -0,0 +1,27 @@ +export const FRAME_ORDER_STALE_WINDOW = 30; + +export function frameNumberForwardDelta( + candidate: number, + reference: number, +): number { + return (candidate - reference) >>> 0; +} + +export function isFrameNumberNewer( + candidate: number, + reference: number, +): boolean { + const delta = frameNumberForwardDelta(candidate, reference); + return delta !== 0 && delta < 0x80000000; +} + +export function shouldDropOutOfOrderFrame( + candidate: number, + reference: number, + staleWindow: number = FRAME_ORDER_STALE_WINDOW, +): boolean { + if (candidate === reference) return true; + if (isFrameNumberNewer(candidate, reference)) return false; + const backwardDelta = frameNumberForwardDelta(reference, candidate); + return backwardDelta <= staleWindow; +} diff --git a/apps/desktop/src/utils/frame-transport-config.test.ts b/apps/desktop/src/utils/frame-transport-config.test.ts new file mode 100644 index 0000000000..f382da4d22 --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-config.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from "vitest"; +import { + DEFAULT_FRAME_BUFFER_CONFIG, + FRAME_BUFFER_MAX_SLOT_SIZE, + FRAME_BUFFER_MAX_TOTAL_BYTES, + computeSharedBufferConfig, +} from "./frame-transport-config"; + +describe("frame-transport-config", () => { + it("keeps default config for small frames", () => { + const config = computeSharedBufferConfig(4 * 1024 * 1024); + expect(config.slotSize).toBe(DEFAULT_FRAME_BUFFER_CONFIG.slotSize); + expect(config.slotCount).toBe(DEFAULT_FRAME_BUFFER_CONFIG.slotCount); + }); + + it("increases slot size with aligned headroom", () => { + const config = computeSharedBufferConfig(22 * 1024 * 1024); + expect(config.slotSize).toBe(28 * 1024 * 1024); + expect(config.slotCount).toBe(4); + }); + + it("caps slot size and total memory budget", () => { + const config = computeSharedBufferConfig(80 * 1024 * 1024); + expect(config.slotSize).toBe(FRAME_BUFFER_MAX_SLOT_SIZE); + expect(config.slotCount).toBe(2); + expect(config.slotSize * config.slotCount).toBeLessThanOrEqual( + FRAME_BUFFER_MAX_TOTAL_BYTES, + ); + }); +}); diff --git a/apps/desktop/src/utils/frame-transport-config.ts b/apps/desktop/src/utils/frame-transport-config.ts new file mode 100644 index 0000000000..a48fa2bc0a --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-config.ts @@ -0,0 +1,37 @@ +import type { SharedFrameBufferConfig } from "./shared-frame-buffer"; + +export const DEFAULT_FRAME_BUFFER_CONFIG: SharedFrameBufferConfig = { + slotCount: 6, + slotSize: 16 * 1024 * 1024, +}; + +export const FRAME_BUFFER_RESIZE_ALIGNMENT = 2 * 1024 * 1024; +export const FRAME_BUFFER_MAX_SLOT_SIZE = 64 * 1024 * 1024; +export const FRAME_BUFFER_MAX_TOTAL_BYTES = 128 * 1024 * 1024; +export const FRAME_BUFFER_MIN_SLOT_COUNT = 2; + +export function alignUp(value: number, alignment: number): number { + if (alignment <= 0) return value; + return Math.ceil(value / alignment) * alignment; +} + +export function computeSharedBufferConfig( + requiredBytes: number, + baseConfig: SharedFrameBufferConfig = DEFAULT_FRAME_BUFFER_CONFIG, +): SharedFrameBufferConfig { + const safeRequired = Math.max(requiredBytes, 0); + const withHeadroom = Math.ceil(safeRequired * 1.25); + const alignedBytes = alignUp(withHeadroom, FRAME_BUFFER_RESIZE_ALIGNMENT); + const slotSize = Math.max( + baseConfig.slotSize, + Math.min(FRAME_BUFFER_MAX_SLOT_SIZE, alignedBytes), + ); + + const maxSlotsByBudget = Math.max( + FRAME_BUFFER_MIN_SLOT_COUNT, + Math.floor(FRAME_BUFFER_MAX_TOTAL_BYTES / slotSize), + ); + const slotCount = Math.min(baseConfig.slotCount, maxSlotsByBudget); + + return { slotCount, slotSize }; +} diff --git a/apps/desktop/src/utils/frame-transport-inflight.test.ts b/apps/desktop/src/utils/frame-transport-inflight.test.ts new file mode 100644 index 0000000000..037fb5e3d0 --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-inflight.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from "vitest"; +import { + decideWorkerInflightDispatch, + updateWorkerInflightPeaks, +} from "./frame-transport-inflight"; + +describe("frame-transport-inflight", () => { + it("dispatches when worker inflight is below limit", () => { + expect(decideWorkerInflightDispatch(1, 2, false)).toEqual({ + action: "dispatch", + nextWorkerFramesInFlight: 2, + backpressureHitsIncrement: 0, + supersededDropsIncrement: 0, + }); + }); + + it("returns backpressure without superseded increment when queue empty", () => { + expect(decideWorkerInflightDispatch(2, 2, false)).toEqual({ + action: "backpressure", + nextWorkerFramesInFlight: 2, + backpressureHitsIncrement: 1, + supersededDropsIncrement: 0, + }); + }); + + it("returns backpressure with superseded increment when queue occupied", () => { + expect(decideWorkerInflightDispatch(4, 2, true)).toEqual({ + action: "backpressure", + nextWorkerFramesInFlight: 4, + backpressureHitsIncrement: 1, + supersededDropsIncrement: 1, + }); + }); + + it("updates worker inflight peaks", () => { + expect(updateWorkerInflightPeaks(3, 2, 5)).toEqual({ + peakWindow: 3, + peakTotal: 5, + }); + }); +}); diff --git a/apps/desktop/src/utils/frame-transport-inflight.ts b/apps/desktop/src/utils/frame-transport-inflight.ts new file mode 100644 index 0000000000..6751150e99 --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-inflight.ts @@ -0,0 +1,39 @@ +export type WorkerInflightDispatchDecision = { + action: "dispatch" | "backpressure"; + nextWorkerFramesInFlight: number; + backpressureHitsIncrement: number; + supersededDropsIncrement: number; +}; + +export function decideWorkerInflightDispatch( + workerFramesInFlight: number, + limit: number, + hasQueuedNextFrame: boolean, +): WorkerInflightDispatchDecision { + if (workerFramesInFlight >= limit) { + return { + action: "backpressure", + nextWorkerFramesInFlight: workerFramesInFlight, + backpressureHitsIncrement: 1, + supersededDropsIncrement: hasQueuedNextFrame ? 1 : 0, + }; + } + + return { + action: "dispatch", + nextWorkerFramesInFlight: workerFramesInFlight + 1, + backpressureHitsIncrement: 0, + supersededDropsIncrement: 0, + }; +} + +export function updateWorkerInflightPeaks( + workerFramesInFlight: number, + peakWindow: number, + peakTotal: number, +) { + return { + peakWindow: Math.max(peakWindow, workerFramesInFlight), + peakTotal: Math.max(peakTotal, workerFramesInFlight), + }; +} diff --git a/apps/desktop/src/utils/frame-transport-order.test.ts b/apps/desktop/src/utils/frame-transport-order.test.ts new file mode 100644 index 0000000000..f4311dfd66 --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-order.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, it } from "vitest"; +import { decideFrameOrder } from "./frame-transport-order"; + +describe("decideFrameOrder", () => { + it("accepts frame when candidate is missing", () => { + const decision = decideFrameOrder(null, 120, 30); + expect(decision).toEqual({ + action: "accept", + nextLatestFrameNumber: 120, + dropsIncrement: 0, + }); + }); + + it("accepts first frame and seeds latest", () => { + const decision = decideFrameOrder(120, null, 30); + expect(decision).toEqual({ + action: "accept", + nextLatestFrameNumber: 120, + dropsIncrement: 0, + }); + }); + + it("drops short backward stale frames", () => { + const decision = decideFrameOrder(119, 120, 30); + expect(decision).toEqual({ + action: "drop", + nextLatestFrameNumber: 120, + dropsIncrement: 1, + }); + }); + + it("drops duplicate frame numbers", () => { + const decision = decideFrameOrder(120, 120, 30); + expect(decision).toEqual({ + action: "drop", + nextLatestFrameNumber: 120, + dropsIncrement: 1, + }); + }); + + it("accepts large backward jumps for seeks", () => { + const decision = decideFrameOrder(80, 120, 30); + expect(decision).toEqual({ + action: "accept", + nextLatestFrameNumber: 80, + dropsIncrement: 0, + }); + }); + + it("accepts forward progression", () => { + const decision = decideFrameOrder(121, 120, 30); + expect(decision).toEqual({ + action: "accept", + nextLatestFrameNumber: 121, + dropsIncrement: 0, + }); + }); + + it("accepts wraparound forward progression", () => { + const decision = decideFrameOrder(2, 0xffffffff, 30); + expect(decision).toEqual({ + action: "accept", + nextLatestFrameNumber: 2, + dropsIncrement: 0, + }); + }); +}); diff --git a/apps/desktop/src/utils/frame-transport-order.ts b/apps/desktop/src/utils/frame-transport-order.ts new file mode 100644 index 0000000000..75978d7870 --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-order.ts @@ -0,0 +1,49 @@ +import { shouldDropOutOfOrderFrame } from "./frame-order"; + +export type FrameOrderDecision = { + action: "accept" | "drop"; + nextLatestFrameNumber: number | null; + dropsIncrement: number; +}; + +export function decideFrameOrder( + candidateFrameNumber: number | null, + latestFrameNumber: number | null, + staleWindow: number, +): FrameOrderDecision { + if (candidateFrameNumber === null) { + return { + action: "accept", + nextLatestFrameNumber: latestFrameNumber, + dropsIncrement: 0, + }; + } + + if (latestFrameNumber === null) { + return { + action: "accept", + nextLatestFrameNumber: candidateFrameNumber, + dropsIncrement: 0, + }; + } + + if ( + shouldDropOutOfOrderFrame( + candidateFrameNumber, + latestFrameNumber, + staleWindow, + ) + ) { + return { + action: "drop", + nextLatestFrameNumber: latestFrameNumber, + dropsIncrement: 1, + }; + } + + return { + action: "accept", + nextLatestFrameNumber: candidateFrameNumber, + dropsIncrement: 0, + }; +} diff --git a/apps/desktop/src/utils/frame-transport-retry.test.ts b/apps/desktop/src/utils/frame-transport-retry.test.ts new file mode 100644 index 0000000000..eca0186a2a --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-retry.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from "vitest"; +import { decideSabWriteFailure } from "./frame-transport-retry"; + +describe("frame-transport-retry", () => { + it("falls back immediately for oversized frames", () => { + const decision = decideSabWriteFailure(true, 0, 2); + expect(decision).toEqual({ + action: "fallback_oversize", + nextRetryCount: 0, + }); + }); + + it("retries while below retry limit", () => { + const decision = decideSabWriteFailure(false, 1, 2); + expect(decision).toEqual({ + action: "retry", + nextRetryCount: 2, + }); + }); + + it("falls back when retry limit is reached", () => { + const decision = decideSabWriteFailure(false, 2, 2); + expect(decision).toEqual({ + action: "fallback_retry_limit", + nextRetryCount: 0, + }); + }); +}); diff --git a/apps/desktop/src/utils/frame-transport-retry.ts b/apps/desktop/src/utils/frame-transport-retry.ts new file mode 100644 index 0000000000..53d5d99ccc --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-retry.ts @@ -0,0 +1,20 @@ +export type SabWriteFailureDecision = + | { action: "retry"; nextRetryCount: number } + | { action: "fallback_oversize"; nextRetryCount: number } + | { action: "fallback_retry_limit"; nextRetryCount: number }; + +export function decideSabWriteFailure( + isOversized: boolean, + currentRetryCount: number, + retryLimit: number, +): SabWriteFailureDecision { + if (isOversized) { + return { action: "fallback_oversize", nextRetryCount: 0 }; + } + + if (currentRetryCount >= retryLimit) { + return { action: "fallback_retry_limit", nextRetryCount: 0 }; + } + + return { action: "retry", nextRetryCount: currentRetryCount + 1 }; +} diff --git a/apps/desktop/src/utils/frame-transport-stride.test.ts b/apps/desktop/src/utils/frame-transport-stride.test.ts new file mode 100644 index 0000000000..a7c9c2a13f --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-stride.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from "vitest"; +import { decideStrideCorrectionDispatch } from "./frame-transport-stride"; + +describe("decideStrideCorrectionDispatch", () => { + it("dispatches immediately when no request is in flight", () => { + const decision = decideStrideCorrectionDispatch(false, false); + expect(decision).toEqual({ + action: "dispatch", + nextInFlight: true, + nextHasPending: false, + supersededDropsIncrement: 0, + dispatchesIncrement: 1, + }); + }); + + it("queues request when worker is in flight without pending", () => { + const decision = decideStrideCorrectionDispatch(true, false); + expect(decision).toEqual({ + action: "queue", + nextInFlight: true, + nextHasPending: true, + supersededDropsIncrement: 0, + dispatchesIncrement: 0, + }); + }); + + it("queues and supersedes older pending request", () => { + const decision = decideStrideCorrectionDispatch(true, true); + expect(decision).toEqual({ + action: "queue", + nextInFlight: true, + nextHasPending: true, + supersededDropsIncrement: 1, + dispatchesIncrement: 0, + }); + }); +}); diff --git a/apps/desktop/src/utils/frame-transport-stride.ts b/apps/desktop/src/utils/frame-transport-stride.ts new file mode 100644 index 0000000000..fdb83e3367 --- /dev/null +++ b/apps/desktop/src/utils/frame-transport-stride.ts @@ -0,0 +1,30 @@ +export type StrideCorrectionDispatchDecision = { + action: "dispatch" | "queue"; + nextInFlight: boolean; + nextHasPending: boolean; + supersededDropsIncrement: number; + dispatchesIncrement: number; +}; + +export function decideStrideCorrectionDispatch( + inFlight: boolean, + hasPending: boolean, +): StrideCorrectionDispatchDecision { + if (!inFlight) { + return { + action: "dispatch", + nextInFlight: true, + nextHasPending: hasPending, + supersededDropsIncrement: 0, + dispatchesIncrement: 1, + }; + } + + return { + action: "queue", + nextInFlight: true, + nextHasPending: true, + supersededDropsIncrement: hasPending ? 1 : 0, + dispatchesIncrement: 0, + }; +} diff --git a/apps/desktop/src/utils/frame-worker.ts b/apps/desktop/src/utils/frame-worker.ts index 0afb995088..7941677ff7 100644 --- a/apps/desktop/src/utils/frame-worker.ts +++ b/apps/desktop/src/utils/frame-worker.ts @@ -1,10 +1,14 @@ import { type Consumer, createConsumer } from "./shared-frame-buffer"; +import { + frameNumberForwardDelta, + isFrameNumberNewer, + shouldDropOutOfOrderFrame, +} from "./frame-order"; import { disposeWebGPU, initWebGPU, isWebGPUSupported, renderFrameWebGPU, - renderNv12FrameWebGPU, type WebGPURenderer, } from "./webgpu-renderer"; @@ -45,12 +49,7 @@ interface FrameRenderedMessage { type: "frame-rendered"; width: number; height: number; -} - -interface FrameQueuedMessage { - type: "frame-queued"; - width: number; - height: number; + source: "shared" | "worker"; } interface RendererModeMessage { @@ -58,13 +57,6 @@ interface RendererModeMessage { mode: "webgpu" | "canvas2d"; } -interface DecodedFrame { - type: "decoded"; - bitmap: ImageBitmap; - width: number; - height: number; -} - interface ErrorMessage { type: "error"; message: string; @@ -76,9 +68,7 @@ interface RequestFrameMessage { export type { FrameRenderedMessage, - FrameQueuedMessage, RendererModeMessage, - DecodedFrame, ErrorMessage, ReadyMessage, RequestFrameMessage, @@ -97,38 +87,29 @@ interface FrameTiming { targetTimeNs: bigint; } +type FrameSource = "shared" | "worker"; + interface PendingFrameCanvas2D { mode: "canvas2d"; imageData: ImageData; width: number; height: number; timing: FrameTiming; + source: FrameSource; } interface PendingFrameWebGPURgba { mode: "webgpu"; - pixelFormat: "rgba"; data: Uint8ClampedArray; width: number; height: number; strideBytes: number; timing: FrameTiming; + source: FrameSource; releaseCallback?: () => void; } -interface PendingFrameWebGPUNv12 { - mode: "webgpu"; - pixelFormat: "nv12"; - data: Uint8ClampedArray; - width: number; - height: number; - yStride: number; - timing: FrameTiming; - releaseCallback?: () => void; -} - -type PendingFrameWebGPU = PendingFrameWebGPURgba | PendingFrameWebGPUNv12; -type PendingFrame = PendingFrameCanvas2D | PendingFrameWebGPU; +type PendingFrame = PendingFrameCanvas2D | PendingFrameWebGPURgba; let workerReady = false; let isInitializing = false; @@ -149,44 +130,19 @@ let cachedImageData: ImageData | null = null; let cachedWidth = 0; let cachedHeight = 0; -let lastRawFrameData: Uint8ClampedArray | null = null; -let lastRawFrameWidth = 0; -let lastRawFrameHeight = 0; - let consumer: Consumer | null = null; let useSharedBuffer = false; -let sharedReadBuffer: Uint8Array | null = null; -let sharedReadBufferSize = 0; -const FRAME_QUEUE_SIZE = 5; -let frameQueue: PendingFrame[] = []; +let queuedFrame: PendingFrame | null = null; let _rafId: number | null = null; let rafRunning = false; let playbackStartTime: number | null = null; let playbackStartTargetTimeNs: bigint | null = null; let lastRenderedFrameNumber = -1; +const FRAME_ORDER_SEEK_THRESHOLD = 30; -function tryPollSharedBuffer(): boolean { - if (!consumer || !useSharedBuffer) return false; - - if (renderMode !== "webgpu") { - if (!sharedReadBuffer || sharedReadBufferSize < consumer.getSlotSize()) { - sharedReadBuffer = new Uint8Array(consumer.getSlotSize()); - sharedReadBufferSize = sharedReadBuffer.byteLength; - } - - const size = consumer.readInto(sharedReadBuffer, 0); - if (size != null && size > 0) { - queueFrameFromBytes(sharedReadBuffer.subarray(0, size)); - return true; - } - } - return false; -} - -interface FrameMetadataRgba { - format: "rgba"; +interface FrameMetadata { width: number; height: number; strideBytes: number; @@ -195,63 +151,9 @@ interface FrameMetadataRgba { availableLength: number; } -interface FrameMetadataNv12 { - format: "nv12"; - width: number; - height: number; - yStride: number; - frameNumber: number; - targetTimeNs: bigint; - ySize: number; - uvSize: number; - totalSize: number; -} - -type FrameMetadata = FrameMetadataRgba | FrameMetadataNv12; - -const NV12_MAGIC = 0x4e563132; - function parseFrameMetadata(bytes: Uint8Array): FrameMetadata | null { if (bytes.byteLength < 24) return null; - if (bytes.byteLength >= 28) { - const formatOffset = bytes.byteOffset + bytes.byteLength - 4; - const formatView = new DataView(bytes.buffer, formatOffset, 4); - const formatFlag = formatView.getUint32(0, true); - - if (formatFlag === NV12_MAGIC) { - const metadataOffset = bytes.byteOffset + bytes.byteLength - 28; - const meta = new DataView(bytes.buffer, metadataOffset, 28); - const yStride = meta.getUint32(0, true); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); - const frameNumber = meta.getUint32(12, true); - const targetTimeNs = meta.getBigUint64(16, true); - - if (!width || !height) return null; - - const ySize = yStride * height; - const uvSize = width * (height / 2); - const totalSize = ySize + uvSize; - - if (bytes.byteLength - 28 < totalSize) { - return null; - } - - return { - format: "nv12", - width, - height, - yStride, - frameNumber, - targetTimeNs, - ySize, - uvSize, - totalSize, - }; - } - } - const metadataOffset = bytes.byteOffset + bytes.byteLength - 24; const meta = new DataView(bytes.buffer, metadataOffset, 24); const strideBytes = meta.getUint32(0, true); @@ -274,7 +176,6 @@ function parseFrameMetadata(bytes: Uint8Array): FrameMetadata | null { } return { - format: "rgba", width, height, strideBytes, @@ -284,62 +185,6 @@ function parseFrameMetadata(bytes: Uint8Array): FrameMetadata | null { }; } -let nv12ConversionBuffer: Uint8ClampedArray | null = null; -let nv12ConversionBufferSize = 0; - -function convertNv12ToRgba( - nv12Data: Uint8ClampedArray, - width: number, - height: number, - yStride: number, -): Uint8ClampedArray { - const rgbaSize = width * height * 4; - if (!nv12ConversionBuffer || nv12ConversionBufferSize < rgbaSize) { - nv12ConversionBuffer = new Uint8ClampedArray(rgbaSize); - nv12ConversionBufferSize = rgbaSize; - } - const rgba = nv12ConversionBuffer; - - const ySize = yStride * height; - const yPlane = nv12Data; - const uvPlane = nv12Data.subarray(ySize); - const uvStride = width; - - for (let row = 0; row < height; row++) { - const yRowOffset = row * yStride; - const uvRowOffset = Math.floor(row / 2) * uvStride; - const rgbaRowOffset = row * width * 4; - - for (let col = 0; col < width; col++) { - const y = yPlane[yRowOffset + col] - 16; - - const uvCol = Math.floor(col / 2) * 2; - const u = uvPlane[uvRowOffset + uvCol] - 128; - const v = uvPlane[uvRowOffset + uvCol + 1] - 128; - - const c = 298 * y; - const d = u; - const e = v; - - let r = (c + 409 * e + 128) >> 8; - let g = (c - 100 * d - 208 * e + 128) >> 8; - let b = (c + 516 * d + 128) >> 8; - - r = r < 0 ? 0 : r > 255 ? 255 : r; - g = g < 0 ? 0 : g > 255 ? 255 : g; - b = b < 0 ? 0 : b > 255 ? 255 : b; - - const rgbaOffset = rgbaRowOffset + col * 4; - rgba[rgbaOffset] = r; - rgba[rgbaOffset + 1] = g; - rgba[rgbaOffset + 2] = b; - rgba[rgbaOffset + 3] = 255; - } - } - - return rgba.subarray(0, rgbaSize); -} - function renderBorrowedWebGPU(bytes: Uint8Array, release: () => void): boolean { if ( (renderMode !== "webgpu" && renderMode !== "pending") || @@ -357,10 +202,23 @@ function renderBorrowedWebGPU(bytes: Uint8Array, release: () => void): boolean { const { width, height, frameNumber, targetTimeNs } = meta; + if ( + lastRenderedFrameNumber >= 0 && + shouldDropOutOfOrderFrame( + frameNumber, + lastRenderedFrameNumber, + FRAME_ORDER_SEEK_THRESHOLD, + ) + ) { + release(); + return false; + } + const isSeek = lastRenderedFrameNumber >= 0 && - (frameNumber < lastRenderedFrameNumber || - frameNumber > lastRenderedFrameNumber + 30); + (!isFrameNumberNewer(frameNumber, lastRenderedFrameNumber) || + frameNumberForwardDelta(frameNumber, lastRenderedFrameNumber) > + FRAME_ORDER_SEEK_THRESHOLD); if ( playbackStartTime === null || @@ -373,40 +231,19 @@ function renderBorrowedWebGPU(bytes: Uint8Array, release: () => void): boolean { lastRenderedFrameNumber = frameNumber; - if (meta.format === "nv12") { - const frameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - meta.totalSize, - ); - renderNv12FrameWebGPU( - webgpuRenderer, - frameData, - width, - height, - meta.yStride, - ); - release(); - } else { - const frameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - bytes.byteLength - 24, - ).subarray(0, meta.availableLength); - renderFrameWebGPU( - webgpuRenderer, - frameData, - width, - height, - meta.strideBytes, - ); - release(); - } + const frameData = new Uint8ClampedArray( + bytes.buffer, + bytes.byteOffset, + bytes.byteLength - 24, + ).subarray(0, meta.availableLength); + renderFrameWebGPU(webgpuRenderer, frameData, width, height, meta.strideBytes); + release(); self.postMessage({ type: "frame-rendered", width, height, + source: "shared", } satisfies FrameRenderedMessage); return true; @@ -435,62 +272,84 @@ function drainAndRenderLatestSharedWebGPU(maxDrain: number): boolean { return renderBorrowedWebGPU(latest.bytes, latest.release); } +function drainAndQueueLatestSharedFrame(maxDrain: number): boolean { + if (!consumer || !useSharedBuffer || consumer.isShutdown()) return false; + if (renderMode === "webgpu") return false; + + let latest: { bytes: Uint8Array; release: () => void } | null = null; + + for (let i = 0; i < maxDrain; i += 1) { + const borrowed = consumer.borrow(0); + if (!borrowed) break; + + if (latest) { + latest.release(); + } + latest = { bytes: borrowed.data, release: borrowed.release }; + } + + if (!latest) return false; + + queueFrameFromBytes(latest.bytes, latest.release, "shared"); + return true; +} + +function clearQueuedFrames() { + if (queuedFrame?.mode === "webgpu" && queuedFrame.releaseCallback) { + queuedFrame.releaseCallback(); + } + queuedFrame = null; +} + function queueFrameFromBytes( bytes: Uint8Array, releaseCallback?: () => void, -): void { + source: FrameSource = "worker", +): boolean { const meta = parseFrameMetadata(bytes); if (!meta) { releaseCallback?.(); - return; + return false; } const { width, height, frameNumber, targetTimeNs } = meta; const timing: FrameTiming = { frameNumber, targetTimeNs }; + const referenceFrameNumber = + queuedFrame?.timing.frameNumber ?? + (lastRenderedFrameNumber >= 0 ? lastRenderedFrameNumber : null); + + if ( + referenceFrameNumber !== null && + shouldDropOutOfOrderFrame( + frameNumber, + referenceFrameNumber, + FRAME_ORDER_SEEK_THRESHOLD, + ) + ) { + releaseCallback?.(); + return false; + } if (renderMode === "webgpu" || renderMode === "pending") { - for (const queued of frameQueue) { - if (queued.mode === "webgpu" && queued.releaseCallback) { - queued.releaseCallback(); - } - } - frameQueue = frameQueue.filter((f) => f.mode !== "webgpu"); + clearQueuedFrames(); - if (meta.format === "nv12") { - const frameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - meta.totalSize, - ); - frameQueue.push({ - mode: "webgpu", - pixelFormat: "nv12", - data: frameData, - width, - height, - yStride: meta.yStride, - timing, - releaseCallback, - }); - } else { - const metadataSize = 24; - const frameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - bytes.byteLength - metadataSize, - ); - frameQueue.push({ - mode: "webgpu", - pixelFormat: "rgba", - data: frameData.subarray(0, meta.availableLength), - width, - height, - strideBytes: meta.strideBytes, - timing, - releaseCallback, - }); - } - } else if (meta.format === "rgba") { + const metadataSize = 24; + const frameData = new Uint8ClampedArray( + bytes.buffer, + bytes.byteOffset, + bytes.byteLength - metadataSize, + ); + queuedFrame = { + mode: "webgpu", + data: frameData.subarray(0, meta.availableLength), + width, + height, + strideBytes: meta.strideBytes, + timing, + source, + releaseCallback, + }; + } else { const expectedRowBytes = width * 4; const metadataSize = 24; const frameData = new Uint8ClampedArray( @@ -519,13 +378,6 @@ function queueFrameFromBytes( processedFrameData = strideBuffer.subarray(0, expectedLength); } - if (!lastRawFrameData || lastRawFrameData.length < expectedLength) { - lastRawFrameData = new Uint8ClampedArray(expectedLength); - } - lastRawFrameData.set(processedFrameData); - lastRawFrameWidth = width; - lastRawFrameHeight = height; - if (!cachedImageData || cachedWidth !== width || cachedHeight !== height) { cachedImageData = new ImageData(width, height); cachedWidth = width; @@ -535,25 +387,20 @@ function queueFrameFromBytes( lastImageData = cachedImageData; releaseCallback?.(); + clearQueuedFrames(); - while (frameQueue.length >= FRAME_QUEUE_SIZE) { - frameQueue.shift(); - } - - frameQueue.push({ + queuedFrame = { mode: "canvas2d", imageData: cachedImageData, width, height, timing, - }); + source, + }; } - self.postMessage({ - type: "frame-queued", - width, - height, - } satisfies FrameQueuedMessage); + startRenderLoop(); + return true; } function renderLoop() { @@ -565,7 +412,7 @@ function renderLoop() { : offscreenCanvas !== null && offscreenCtx !== null; if (!hasRenderer) { - if (renderMode === "pending" && frameQueue.length > 0) { + if (renderMode === "pending" && queuedFrame !== null) { _rafId = requestAnimationFrame(renderLoop); return; } @@ -582,51 +429,23 @@ function renderLoop() { } } - let polled = 0; - while (polled < 4 && tryPollSharedBuffer()) { - polled++; - } - } - - let frameToRender: PendingFrame | null = null; - let frameIndex = -1; - - for (let i = 0; i < frameQueue.length; i++) { - const frame = frameQueue[i]; - if ( - frameToRender === null || - frame.timing.frameNumber > frameToRender.timing.frameNumber - ) { - frameToRender = frame; - frameIndex = i; + if (renderMode === "canvas2d") { + drainAndQueueLatestSharedFrame(4); + } else if (renderMode === "pending") { + drainAndQueueLatestSharedFrame(4); } } - if (frameToRender !== null) { - for (let i = frameQueue.length - 1; i >= 0; i--) { - if (i !== frameIndex) { - const oldFrame = frameQueue[i]; - if (oldFrame.mode === "webgpu" && oldFrame.releaseCallback) { - oldFrame.releaseCallback(); - } - frameQueue.splice(i, 1); - if (i < frameIndex) { - frameIndex--; - } - } - } - } - - if (frameToRender !== null && frameIndex >= 0) { - const frame = frameToRender; + const frame = queuedFrame; + if (frame) { if (frame.mode === "webgpu" && !webgpuRenderer) { if (renderMode === "pending") { _rafId = requestAnimationFrame(renderLoop); return; } if (renderMode === "canvas2d" && offscreenCanvas && offscreenCtx) { - frameQueue.splice(frameIndex, 1); + queuedFrame = null; lastRenderedFrameNumber = frame.timing.frameNumber; if ( @@ -637,34 +456,25 @@ function renderLoop() { offscreenCanvas.height = frame.height; } + const expectedRowBytes = frame.width * 4; let rgbaData: Uint8ClampedArray; - if (frame.pixelFormat === "nv12") { - rgbaData = convertNv12ToRgba( - frame.data, - frame.width, - frame.height, - frame.yStride, - ); + if (frame.strideBytes === expectedRowBytes) { + rgbaData = frame.data; } else { - const expectedRowBytes = frame.width * 4; - if (frame.strideBytes === expectedRowBytes) { - rgbaData = frame.data; - } else { - const expectedLength = expectedRowBytes * frame.height; - if (!strideBuffer || strideBufferSize < expectedLength) { - strideBuffer = new Uint8ClampedArray(expectedLength); - strideBufferSize = expectedLength; - } - for (let row = 0; row < frame.height; row += 1) { - const srcStart = row * frame.strideBytes; - const destStart = row * expectedRowBytes; - strideBuffer.set( - frame.data.subarray(srcStart, srcStart + expectedRowBytes), - destStart, - ); - } - rgbaData = strideBuffer.subarray(0, expectedLength); + const expectedLength = expectedRowBytes * frame.height; + if (!strideBuffer || strideBufferSize < expectedLength) { + strideBuffer = new Uint8ClampedArray(expectedLength); + strideBufferSize = expectedLength; } + for (let row = 0; row < frame.height; row += 1) { + const srcStart = row * frame.strideBytes; + const destStart = row * expectedRowBytes; + strideBuffer.set( + frame.data.subarray(srcStart, srcStart + expectedRowBytes), + destStart, + ); + } + rgbaData = strideBuffer.subarray(0, expectedLength); } if ( @@ -687,10 +497,11 @@ function renderLoop() { type: "frame-rendered", width: frame.width, height: frame.height, + source: frame.source, } satisfies FrameRenderedMessage); const shouldContinue = - frameQueue.length > 0 || + queuedFrame !== null || (useSharedBuffer && consumer && !consumer.isShutdown()); if (shouldContinue) { @@ -704,27 +515,17 @@ function renderLoop() { return; } - frameQueue.splice(frameIndex, 1); + queuedFrame = null; lastRenderedFrameNumber = frame.timing.frameNumber; if (frame.mode === "webgpu" && webgpuRenderer) { - if (frame.pixelFormat === "nv12") { - renderNv12FrameWebGPU( - webgpuRenderer, - frame.data, - frame.width, - frame.height, - frame.yStride, - ); - } else { - renderFrameWebGPU( - webgpuRenderer, - frame.data, - frame.width, - frame.height, - frame.strideBytes, - ); - } + renderFrameWebGPU( + webgpuRenderer, + frame.data, + frame.width, + frame.height, + frame.strideBytes, + ); if (frame.releaseCallback) { frame.releaseCallback(); } @@ -745,11 +546,12 @@ function renderLoop() { type: "frame-rendered", width: frame.width, height: frame.height, + source: frame.source, } satisfies FrameRenderedMessage); } const shouldContinue = - frameQueue.length > 0 || + queuedFrame !== null || (useSharedBuffer && consumer && !consumer.isShutdown()); if (shouldContinue) { @@ -789,12 +591,7 @@ function stopRenderLoop() { function cleanup() { stopRenderLoop(); - for (const frame of frameQueue) { - if (frame.mode === "webgpu" && frame.releaseCallback) { - frame.releaseCallback(); - } - } - frameQueue = []; + clearQueuedFrames(); if (webgpuRenderer) { disposeWebGPU(webgpuRenderer); webgpuRenderer = null; @@ -803,17 +600,12 @@ function cleanup() { offscreenCtx = null; consumer = null; useSharedBuffer = false; - sharedReadBuffer = null; - sharedReadBufferSize = 0; lastImageData = null; cachedImageData = null; cachedWidth = 0; cachedHeight = 0; strideBuffer = null; strideBufferSize = 0; - lastRawFrameData = null; - lastRawFrameWidth = 0; - lastRawFrameHeight = 0; playbackStartTime = null; playbackStartTargetTimeNs = null; lastRenderedFrameNumber = -1; @@ -882,21 +674,22 @@ async function initCanvas(canvas: OffscreenCanvas): Promise { if ( renderMode === "webgpu" && webgpuRenderer && - lastRawFrameData && - lastRawFrameWidth > 0 && - lastRawFrameHeight > 0 + lastImageData && + lastImageData.width > 0 && + lastImageData.height > 0 ) { renderFrameWebGPU( webgpuRenderer, - lastRawFrameData, - lastRawFrameWidth, - lastRawFrameHeight, - lastRawFrameWidth * 4, + lastImageData.data, + lastImageData.width, + lastImageData.height, + lastImageData.width * 4, ); self.postMessage({ type: "frame-rendered", - width: lastRawFrameWidth, - height: lastRawFrameHeight, + width: lastImageData.width, + height: lastImageData.height, + source: "worker", } satisfies FrameRenderedMessage); frameRendered = true; } else if (renderMode === "canvas2d" && lastImageData && offscreenCtx) { @@ -907,6 +700,7 @@ async function initCanvas(canvas: OffscreenCanvas): Promise { type: "frame-rendered", width: lastImageData.width, height: lastImageData.height, + source: "worker", } satisfies FrameRenderedMessage); frameRendered = true; } else if (renderMode === "canvas2d" && offscreenCtx) { @@ -929,155 +723,6 @@ async function initCanvas(canvas: OffscreenCanvas): Promise { return initializationPromise; } -type DecodeResult = FrameQueuedMessage | DecodedFrame | ErrorMessage; - -function processFrameBytesSync( - bytes: Uint8Array, - releaseCallback?: () => void, -): DecodeResult { - if (bytes.byteLength < 24) { - releaseCallback?.(); - return { - type: "error", - message: "Received frame too small to contain metadata", - }; - } - - const meta = parseFrameMetadata(bytes); - if (!meta) { - releaseCallback?.(); - return { - type: "error", - message: "Failed to parse frame metadata", - }; - } - - const { width, height, frameNumber, targetTimeNs } = meta; - const timing: FrameTiming = { frameNumber, targetTimeNs }; - - if (renderMode === "webgpu" || renderMode === "pending") { - while (frameQueue.length >= FRAME_QUEUE_SIZE) { - const dropped = frameQueue.shift(); - if (dropped?.mode === "webgpu" && dropped.releaseCallback) { - dropped.releaseCallback(); - } - } - - if (meta.format === "nv12") { - const frameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - meta.totalSize, - ); - frameQueue.push({ - mode: "webgpu", - pixelFormat: "nv12", - data: frameData, - width, - height, - yStride: meta.yStride, - timing, - releaseCallback, - }); - } else { - const frameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - bytes.byteLength - 24, - ); - frameQueue.push({ - mode: "webgpu", - pixelFormat: "rgba", - data: frameData.subarray(0, meta.availableLength), - width, - height, - strideBytes: meta.strideBytes, - timing, - releaseCallback, - }); - } - startRenderLoop(); - return { type: "frame-queued", width, height }; - } - - const expectedRowBytes = width * 4; - const expectedLength = expectedRowBytes * height; - let processedFrameData: Uint8ClampedArray; - - if (meta.format === "nv12") { - const nv12FrameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - meta.totalSize, - ); - processedFrameData = convertNv12ToRgba( - nv12FrameData, - width, - height, - meta.yStride, - ); - } else { - const frameData = new Uint8ClampedArray( - bytes.buffer, - bytes.byteOffset, - bytes.byteLength - 24, - ); - - if (meta.strideBytes === expectedRowBytes) { - processedFrameData = frameData.subarray(0, expectedLength); - } else { - if (!strideBuffer || strideBufferSize < expectedLength) { - strideBuffer = new Uint8ClampedArray(expectedLength); - strideBufferSize = expectedLength; - } - for (let row = 0; row < height; row += 1) { - const srcStart = row * meta.strideBytes; - const destStart = row * expectedRowBytes; - strideBuffer.set( - frameData.subarray(srcStart, srcStart + expectedRowBytes), - destStart, - ); - } - processedFrameData = strideBuffer.subarray(0, expectedLength); - } - } - - if (!lastRawFrameData || lastRawFrameData.length < expectedLength) { - lastRawFrameData = new Uint8ClampedArray(expectedLength); - } - lastRawFrameData.set(processedFrameData); - lastRawFrameWidth = width; - lastRawFrameHeight = height; - - if (!cachedImageData || cachedWidth !== width || cachedHeight !== height) { - cachedImageData = new ImageData(width, height); - cachedWidth = width; - cachedHeight = height; - } - cachedImageData.data.set(processedFrameData); - lastImageData = cachedImageData; - - releaseCallback?.(); - - while (frameQueue.length >= FRAME_QUEUE_SIZE) { - frameQueue.shift(); - } - - frameQueue.push({ - mode: "canvas2d", - imageData: cachedImageData, - width, - height, - timing, - }); - - if (offscreenCanvas && offscreenCtx) { - startRenderLoop(); - } - - return { type: "frame-queued", width, height }; -} - self.onmessage = async (e: MessageEvent) => { if (e.data.type === "cleanup") { cleanup(); @@ -1088,20 +733,13 @@ self.onmessage = async (e: MessageEvent) => { lastRenderedFrameNumber = -1; playbackStartTime = null; playbackStartTargetTimeNs = null; - for (const frame of frameQueue) { - if (frame.mode === "webgpu" && frame.releaseCallback) { - frame.releaseCallback(); - } - } - frameQueue = []; + clearQueuedFrames(); return; } if (e.data.type === "init-shared-buffer") { consumer = createConsumer(e.data.buffer); useSharedBuffer = true; - sharedReadBuffer = null; - sharedReadBufferSize = 0; if (workerReady) { startRenderLoop(); @@ -1143,10 +781,16 @@ self.onmessage = async (e: MessageEvent) => { } if (e.data.type === "frame") { - const result = processFrameBytesSync(new Uint8Array(e.data.buffer)); - if (result.type === "frame-queued") { - self.postMessage(result); - } else if (result.type === "error") { + const queued = queueFrameFromBytes( + new Uint8Array(e.data.buffer), + undefined, + "worker", + ); + if (!queued) { + const result: ErrorMessage = { + type: "error", + message: "Failed to parse frame metadata", + }; self.postMessage(result); } } diff --git a/apps/desktop/src/utils/shared-frame-buffer.test.ts b/apps/desktop/src/utils/shared-frame-buffer.test.ts new file mode 100644 index 0000000000..718057b1f8 --- /dev/null +++ b/apps/desktop/src/utils/shared-frame-buffer.test.ts @@ -0,0 +1,164 @@ +import { describe, expect, it } from "vitest"; +import { + createConsumer, + createProducer, + createSharedFrameBuffer, + frameAge, +} from "./shared-frame-buffer"; + +function makeFrame(...bytes: number[]): ArrayBuffer { + return new Uint8Array(bytes).buffer; +} + +function readFirstByte(frame: ArrayBuffer | null): number | null { + if (!frame) return null; + const view = new Uint8Array(frame); + if (view.byteLength === 0) return null; + return view[0]; +} + +describe("shared-frame-buffer", () => { + it("computes frame age across u32 wrap", () => { + expect(frameAge(1, 0xffffffff)).toBe(2); + expect(frameAge(0xffffffff, 0xfffffffe)).toBe(1); + expect(frameAge(100, 100)).toBe(0); + }); + + it("claims alternate writable slot when write index slot is busy", () => { + const init = createSharedFrameBuffer({ slotCount: 3, slotSize: 64 }); + const producer = createProducer(init); + const consumer = createConsumer(init.buffer); + + expect(producer.write(makeFrame(1))).toBe(true); + expect(producer.write(makeFrame(2))).toBe(true); + + const held = consumer.borrow(0); + expect(held).not.toBeNull(); + expect(held && held.data[0]).toBe(1); + + const next = consumer.read(0); + expect(readFirstByte(next)).toBe(2); + + expect(producer.write(makeFrame(3))).toBe(true); + expect(producer.write(makeFrame(4))).toBe(true); + + held?.release(); + + const firstRemaining = consumer.read(0); + const secondRemaining = consumer.read(0); + const remaining = [ + readFirstByte(firstRemaining), + readFirstByte(secondRemaining), + ] + .filter((value): value is number => value !== null) + .sort((a, b) => a - b); + expect(remaining).toEqual([3, 4]); + }); + + it("reads ready slot beyond current read index", () => { + const init = createSharedFrameBuffer({ slotCount: 3, slotSize: 64 }); + const producer = createProducer(init); + const consumer = createConsumer(init.buffer); + + expect(producer.write(makeFrame(10))).toBe(true); + expect(producer.write(makeFrame(11))).toBe(true); + + const held = consumer.borrow(0); + expect(held).not.toBeNull(); + expect(held && held.data[0]).toBe(10); + + const bypassRead = consumer.read(0); + expect(readFirstByte(bypassRead)).toBe(11); + + held?.release(); + }); + + it("readInto consumes sparse-ready slot and returns size", () => { + const init = createSharedFrameBuffer({ slotCount: 3, slotSize: 64 }); + const producer = createProducer(init); + const consumer = createConsumer(init.buffer); + + expect(producer.write(makeFrame(21, 22, 23))).toBe(true); + expect(producer.write(makeFrame(31, 32))).toBe(true); + + const held = consumer.borrow(0); + expect(held?.data[0]).toBe(21); + + const target = new Uint8Array(64); + const bytesRead = consumer.readInto(target, 0); + expect(bytesRead).toBe(2); + expect(Array.from(target.subarray(0, 2))).toEqual([31, 32]); + + held?.release(); + }); + + it("overwrites ready slot when ring is full", () => { + const init = createSharedFrameBuffer({ slotCount: 2, slotSize: 64 }); + const producer = createProducer(init); + const consumer = createConsumer(init.buffer); + + expect(producer.write(makeFrame(1))).toBe(true); + expect(producer.write(makeFrame(2))).toBe(true); + expect(producer.write(makeFrame(3))).toBe(true); + + const first = readFirstByte(consumer.read(0)); + const second = readFirstByte(consumer.read(0)); + const values = [first, second] + .filter((value): value is number => value !== null) + .sort((a, b) => a - b); + + expect(values).toEqual([2, 3]); + }); + + it("does not overwrite reading slots when full", () => { + const init = createSharedFrameBuffer({ slotCount: 2, slotSize: 64 }); + const producer = createProducer(init); + const consumer = createConsumer(init.buffer); + + expect(producer.write(makeFrame(1))).toBe(true); + expect(producer.write(makeFrame(2))).toBe(true); + + const heldFirst = consumer.borrow(0); + expect(heldFirst?.data[0]).toBe(1); + + expect(producer.write(makeFrame(3))).toBe(true); + expect(readFirstByte(consumer.read(0))).toBe(3); + + const heldSecond = consumer.borrow(0); + expect(heldSecond).toBeNull(); + + heldFirst?.release(); + expect(readFirstByte(consumer.read(0))).toBeNull(); + }); + + it("prefers replacing oldest ready slot under full pressure", () => { + const init = createSharedFrameBuffer({ slotCount: 4, slotSize: 64 }); + const producer = createProducer(init); + const consumer = createConsumer(init.buffer); + + expect(producer.write(makeFrame(1))).toBe(true); + expect(producer.write(makeFrame(2))).toBe(true); + expect(producer.write(makeFrame(3))).toBe(true); + expect(producer.write(makeFrame(4))).toBe(true); + + const held = consumer.borrow(0); + expect(held?.data[0]).toBe(1); + + expect(producer.write(makeFrame(5))).toBe(true); + held?.release(); + + expect(producer.write(makeFrame(6))).toBe(true); + expect(producer.write(makeFrame(7))).toBe(true); + + const values = [ + readFirstByte(consumer.read(0)), + readFirstByte(consumer.read(0)), + readFirstByte(consumer.read(0)), + readFirstByte(consumer.read(0)), + ] + .filter((value): value is number => value !== null) + .sort((a, b) => a - b); + + expect(values).toEqual([4, 5, 6, 7]); + }); +}); diff --git a/apps/desktop/src/utils/shared-frame-buffer.ts b/apps/desktop/src/utils/shared-frame-buffer.ts index 24a4637242..7c2e82eae3 100644 --- a/apps/desktop/src/utils/shared-frame-buffer.ts +++ b/apps/desktop/src/utils/shared-frame-buffer.ts @@ -34,6 +34,15 @@ export interface SharedFrameBufferInit { config: SharedFrameBufferConfig; } +export function frameAge( + currentFrameNumber: number, + candidateFrameNumber: number, +): number { + const current = currentFrameNumber >>> 0; + const candidate = candidateFrameNumber >>> 0; + return (current - candidate) >>> 0; +} + export function isSharedArrayBufferSupported(): boolean { try { return ( @@ -112,7 +121,7 @@ export function createProducer(init: SharedFrameBufferInit): Producer { const metadataView = new Int32Array(buffer); const metadataOffset = controlView[CONTROL_METADATA_OFFSET]; const dataOffset = controlView[CONTROL_DATA_OFFSET]; - let frameCounter = 0; + let frameCounter = 0 >>> 0; return { write(frameData: ArrayBuffer): boolean { @@ -130,33 +139,92 @@ export function createProducer(init: SharedFrameBufferInit): Producer { return false; } - const writeIdx = Atomics.load(controlView, CONTROL_WRITE_INDEX); - const slotMetaIdx = (metadataOffset + writeIdx * METADATA_ENTRY_SIZE) / 4; + const initialWriteIdx = Atomics.load(controlView, CONTROL_WRITE_INDEX); + let writeIdx = -1; + let slotMetaIdx = -1; - const currentState = Atomics.load( - metadataView, - slotMetaIdx + META_SLOT_STATE, - ); - if (currentState !== SLOT_STATE.EMPTY) { - return false; - } + for (let probe = 0; probe < config.slotCount; probe++) { + const candidateIdx = (initialWriteIdx + probe) % config.slotCount; + const candidateMetaIdx = + (metadataOffset + candidateIdx * METADATA_ENTRY_SIZE) / 4; - const exchanged = Atomics.compareExchange( - metadataView, - slotMetaIdx + META_SLOT_STATE, - SLOT_STATE.EMPTY, - SLOT_STATE.WRITING, - ); - if (exchanged !== SLOT_STATE.EMPTY) { - return false; - } + const currentState = Atomics.load( + metadataView, + candidateMetaIdx + META_SLOT_STATE, + ); + if (currentState !== SLOT_STATE.EMPTY) { + continue; + } - if (writeIdx < 0 || writeIdx >= config.slotCount) { - Atomics.store( + const exchanged = Atomics.compareExchange( metadataView, - slotMetaIdx + META_SLOT_STATE, + candidateMetaIdx + META_SLOT_STATE, SLOT_STATE.EMPTY, + SLOT_STATE.WRITING, ); + if (exchanged === SLOT_STATE.EMPTY) { + writeIdx = candidateIdx; + slotMetaIdx = candidateMetaIdx; + break; + } + } + + if (writeIdx < 0 || slotMetaIdx < 0) { + const MAX_READY_RECLAIM_RETRIES = 4; + for ( + let reclaimAttempt = 0; + reclaimAttempt < MAX_READY_RECLAIM_RETRIES; + reclaimAttempt++ + ) { + let oldestFrameAge = -1; + let oldestIdx = -1; + let oldestMetaIdx = -1; + + for (let probe = 0; probe < config.slotCount; probe++) { + const candidateIdx = (initialWriteIdx + probe) % config.slotCount; + const candidateMetaIdx = + (metadataOffset + candidateIdx * METADATA_ENTRY_SIZE) / 4; + + const currentState = Atomics.load( + metadataView, + candidateMetaIdx + META_SLOT_STATE, + ); + if (currentState !== SLOT_STATE.READY) { + continue; + } + + const frameNumber = + Atomics.load( + metadataView, + candidateMetaIdx + META_FRAME_NUMBER, + ) >>> 0; + const candidateAge = frameAge(frameCounter, frameNumber); + if (candidateAge > oldestFrameAge) { + oldestFrameAge = candidateAge; + oldestIdx = candidateIdx; + oldestMetaIdx = candidateMetaIdx; + } + } + + if (oldestIdx < 0 || oldestMetaIdx < 0) { + break; + } + + const exchanged = Atomics.compareExchange( + metadataView, + oldestMetaIdx + META_SLOT_STATE, + SLOT_STATE.READY, + SLOT_STATE.WRITING, + ); + if (exchanged === SLOT_STATE.READY) { + writeIdx = oldestIdx; + slotMetaIdx = oldestMetaIdx; + break; + } + } + } + + if (writeIdx < 0 || slotMetaIdx < 0) { return false; } @@ -183,7 +251,7 @@ export function createProducer(init: SharedFrameBufferInit): Producer { frameData.byteLength, ); const currentFrame = frameCounter; - frameCounter = (frameCounter + 1) | 0; + frameCounter = (frameCounter + 1) >>> 0; Atomics.store( metadataView, slotMetaIdx + META_FRAME_NUMBER, @@ -191,10 +259,10 @@ export function createProducer(init: SharedFrameBufferInit): Producer { ); const MAX_CAS_RETRIES = 10; - let observed = writeIdx; + let observed = Atomics.load(controlView, CONTROL_WRITE_INDEX); for (let casAttempt = 0; casAttempt < MAX_CAS_RETRIES; casAttempt++) { - const nextIdx = (observed + 1) % config.slotCount; + const nextIdx = (writeIdx + 1) % config.slotCount; const oldValue = Atomics.compareExchange( controlView, CONTROL_WRITE_INDEX, @@ -209,6 +277,7 @@ export function createProducer(init: SharedFrameBufferInit): Producer { SLOT_STATE.READY, ); Atomics.notify(metadataView, slotMetaIdx + META_SLOT_STATE, 1); + Atomics.notify(metadataView, CONTROL_WRITE_INDEX, 1); return true; } @@ -293,6 +362,31 @@ export function createConsumer(buffer: SharedArrayBuffer): Consumer { const dataOffset = controlView[CONTROL_DATA_OFFSET]; const metadataView = new Int32Array(buffer); + function claimReadySlot( + baseReadIdx: number, + ): { readIdx: number; slotMetaIdx: number } | null { + for (let probe = 0; probe < slotCount; probe++) { + const readIdx = (baseReadIdx + probe) % slotCount; + const slotMetaIdx = (metadataOffset + readIdx * METADATA_ENTRY_SIZE) / 4; + const state = Atomics.load(metadataView, slotMetaIdx + META_SLOT_STATE); + if (state !== SLOT_STATE.READY) { + continue; + } + + const exchangedState = Atomics.compareExchange( + metadataView, + slotMetaIdx + META_SLOT_STATE, + SLOT_STATE.READY, + SLOT_STATE.READING, + ); + if (exchangedState === SLOT_STATE.READY) { + return { readIdx, slotMetaIdx }; + } + } + + return null; + } + return { read(timeoutMs: number = 100): ArrayBuffer | null { const MAX_CAS_RETRIES = 3; @@ -303,17 +397,17 @@ export function createConsumer(buffer: SharedArrayBuffer): Consumer { return null; } - const readIdx = Atomics.load(controlView, CONTROL_READ_INDEX); - const slotMetaIdx = - (metadataOffset + readIdx * METADATA_ENTRY_SIZE) / 4; - - let state = Atomics.load(metadataView, slotMetaIdx + META_SLOT_STATE); - - if (state !== SLOT_STATE.READY) { + const baseReadIdx = Atomics.load(controlView, CONTROL_READ_INDEX); + const claimed = claimReadySlot(baseReadIdx); + if (!claimed) { + const observedWriteIdx = Atomics.load( + controlView, + CONTROL_WRITE_INDEX, + ); const waitResult = Atomics.wait( metadataView, - slotMetaIdx + META_SLOT_STATE, - state, + CONTROL_WRITE_INDEX, + observedWriteIdx, timeoutMs, ); if (waitResult === "timed-out") { @@ -324,22 +418,9 @@ export function createConsumer(buffer: SharedArrayBuffer): Consumer { if (shutdownCheck) { return null; } - - state = Atomics.load(metadataView, slotMetaIdx + META_SLOT_STATE); - if (state !== SLOT_STATE.READY) { - continue; - } - } - - const exchangedState = Atomics.compareExchange( - metadataView, - slotMetaIdx + META_SLOT_STATE, - SLOT_STATE.READY, - SLOT_STATE.READING, - ); - if (exchangedState !== SLOT_STATE.READY) { continue; } + const { readIdx, slotMetaIdx } = claimed; const frameSize = Atomics.load( metadataView, @@ -393,17 +474,17 @@ export function createConsumer(buffer: SharedArrayBuffer): Consumer { return null; } - const readIdx = Atomics.load(controlView, CONTROL_READ_INDEX); - const slotMetaIdx = - (metadataOffset + readIdx * METADATA_ENTRY_SIZE) / 4; - - let state = Atomics.load(metadataView, slotMetaIdx + META_SLOT_STATE); - - if (state !== SLOT_STATE.READY) { + const baseReadIdx = Atomics.load(controlView, CONTROL_READ_INDEX); + const claimed = claimReadySlot(baseReadIdx); + if (!claimed) { + const observedWriteIdx = Atomics.load( + controlView, + CONTROL_WRITE_INDEX, + ); const waitResult = Atomics.wait( metadataView, - slotMetaIdx + META_SLOT_STATE, - state, + CONTROL_WRITE_INDEX, + observedWriteIdx, timeoutMs, ); if (waitResult === "timed-out") { @@ -414,22 +495,9 @@ export function createConsumer(buffer: SharedArrayBuffer): Consumer { if (shutdownCheck) { return null; } - - state = Atomics.load(metadataView, slotMetaIdx + META_SLOT_STATE); - if (state !== SLOT_STATE.READY) { - continue; - } - } - - const exchangedState = Atomics.compareExchange( - metadataView, - slotMetaIdx + META_SLOT_STATE, - SLOT_STATE.READY, - SLOT_STATE.READING, - ); - if (exchangedState !== SLOT_STATE.READY) { continue; } + const { readIdx, slotMetaIdx } = claimed; const frameSize = Atomics.load( metadataView, @@ -481,17 +549,17 @@ export function createConsumer(buffer: SharedArrayBuffer): Consumer { return null; } - const readIdx = Atomics.load(controlView, CONTROL_READ_INDEX); - const slotMetaIdx = - (metadataOffset + readIdx * METADATA_ENTRY_SIZE) / 4; - - let state = Atomics.load(metadataView, slotMetaIdx + META_SLOT_STATE); - - if (state !== SLOT_STATE.READY) { + const baseReadIdx = Atomics.load(controlView, CONTROL_READ_INDEX); + const claimed = claimReadySlot(baseReadIdx); + if (!claimed) { + const observedWriteIdx = Atomics.load( + controlView, + CONTROL_WRITE_INDEX, + ); const waitResult = Atomics.wait( metadataView, - slotMetaIdx + META_SLOT_STATE, - state, + CONTROL_WRITE_INDEX, + observedWriteIdx, timeoutMs, ); if (waitResult === "timed-out") { @@ -502,22 +570,9 @@ export function createConsumer(buffer: SharedArrayBuffer): Consumer { if (shutdownCheck) { return null; } - - state = Atomics.load(metadataView, slotMetaIdx + META_SLOT_STATE); - if (state !== SLOT_STATE.READY) { - continue; - } - } - - const exchangedState = Atomics.compareExchange( - metadataView, - slotMetaIdx + META_SLOT_STATE, - SLOT_STATE.READY, - SLOT_STATE.READING, - ); - if (exchangedState !== SLOT_STATE.READY) { continue; } + const { readIdx, slotMetaIdx } = claimed; const frameSize = Atomics.load( metadataView, diff --git a/apps/desktop/src/utils/socket.ts b/apps/desktop/src/utils/socket.ts index 923f049cb0..f026eb2c8b 100644 --- a/apps/desktop/src/utils/socket.ts +++ b/apps/desktop/src/utils/socket.ts @@ -8,25 +8,34 @@ import { type Producer, type SharedFrameBufferConfig, } from "./shared-frame-buffer"; -import type { StrideCorrectionResponse } from "./stride-correction-worker"; +import { + DEFAULT_FRAME_BUFFER_CONFIG, + computeSharedBufferConfig, +} from "./frame-transport-config"; +import { + decideWorkerInflightDispatch, + updateWorkerInflightPeaks, +} from "./frame-transport-inflight"; +import { decideFrameOrder } from "./frame-transport-order"; +import { decideSabWriteFailure } from "./frame-transport-retry"; +import { decideStrideCorrectionDispatch } from "./frame-transport-stride"; +import type { + ErrorResponse, + StrideCorrectionResponse, +} from "./stride-correction-worker"; import StrideCorrectionWorker from "./stride-correction-worker?worker"; import { disposeWebGPU, initWebGPU, isWebGPUSupported, renderFrameWebGPU, - renderNv12FrameWebGPU, type WebGPURenderer, } from "./webgpu-renderer"; const SAB_SUPPORTED = isSharedArrayBufferSupported(); -const FRAME_BUFFER_CONFIG: SharedFrameBufferConfig = { - slotCount: 6, - slotSize: 16 * 1024 * 1024, -}; - -let mainThreadNv12Buffer: Uint8ClampedArray | null = null; -let mainThreadNv12BufferSize = 0; +const SAB_WRITE_RETRY_LIMIT = 2; +const WORKER_IN_FLIGHT_LIMIT = 2; +const FRAME_ORDER_STALE_WINDOW = 30; export type FpsStats = { fps: number; @@ -35,6 +44,47 @@ export type FpsStats = { minFrameMs: number; maxFrameMs: number; mbPerSec: number; + sabResizes: number; + sabFallbacks: number; + sabOversizeFallbacks: number; + sabRetryLimitFallbacks: number; + sabRetriesInFlight: number; + sabSlotSizeBytes: number; + sabSlotCount: number; + sabTotalBytes: number; + workerFramesInFlight: number; + workerInFlightBackpressureHits: number; + workerInFlightBackpressureWindowHits: number; + workerFramesInFlightPeakWindow: number; + workerFramesInFlightPeakTotal: number; + workerInFlightSupersededDrops: number; + workerInFlightSupersededDropsWindow: number; + renderedFromSharedTotal: number; + renderedFromSharedWindow: number; + renderedFromWorkerTotal: number; + renderedFromWorkerWindow: number; + queuedOutOfOrderDropsTotal: number; + queuedOutOfOrderDropsWindow: number; + directOutOfOrderDropsTotal: number; + directOutOfOrderDropsWindow: number; + directIngressOutOfOrderDropsTotal: number; + directIngressOutOfOrderDropsWindow: number; + directResponseOutOfOrderDropsTotal: number; + directResponseOutOfOrderDropsWindow: number; + strideCorrectionInFlight: number; + strideCorrectionPending: number; + strideCorrectionDispatchesTotal: number; + strideCorrectionDispatchesWindow: number; + strideCorrectionSupersededDropsTotal: number; + strideCorrectionSupersededDropsWindow: number; + strideCorrectionErrorsTotal: number; + strideCorrectionErrorsWindow: number; + sabTotalRetryAttempts: number; + sabTotalFramesReceived: number; + sabTotalFramesWrittenToSharedBuffer: number; + sabTotalFramesSentToWorker: number; + sabTotalWorkerFallbackBytes: number; + sabTotalSupersededDrops: number; }; let globalFpsStatsGetter: (() => FpsStats) | null = null; @@ -46,59 +96,6 @@ export function getFpsStats(): FpsStats | null { return null; } -function convertNv12ToRgbaMainThread( - nv12Data: Uint8ClampedArray, - width: number, - height: number, - yStride: number, -): Uint8ClampedArray { - const rgbaSize = width * height * 4; - if (!mainThreadNv12Buffer || mainThreadNv12BufferSize < rgbaSize) { - mainThreadNv12Buffer = new Uint8ClampedArray(rgbaSize); - mainThreadNv12BufferSize = rgbaSize; - } - const rgba = mainThreadNv12Buffer; - - const ySize = yStride * height; - const yPlane = nv12Data; - const uvPlane = nv12Data.subarray(ySize); - const uvStride = width; - - for (let row = 0; row < height; row++) { - const yRowOffset = row * yStride; - const uvRowOffset = Math.floor(row / 2) * uvStride; - const rgbaRowOffset = row * width * 4; - - for (let col = 0; col < width; col++) { - const y = yPlane[yRowOffset + col] - 16; - - const uvCol = Math.floor(col / 2) * 2; - const u = uvPlane[uvRowOffset + uvCol] - 128; - const v = uvPlane[uvRowOffset + uvCol + 1] - 128; - - const c = 298 * y; - const d = u; - const e = v; - - let r = (c + 409 * e + 128) >> 8; - let g = (c - 100 * d - 208 * e + 128) >> 8; - let b = (c + 516 * d + 128) >> 8; - - r = r < 0 ? 0 : r > 255 ? 255 : r; - g = g < 0 ? 0 : g > 255 ? 255 : g; - b = b < 0 ? 0 : b > 255 ? 255 : b; - - const rgbaOffset = rgbaRowOffset + col * 4; - rgba[rgbaOffset] = r; - rgba[rgbaOffset + 1] = g; - rgba[rgbaOffset + 2] = b; - rgba[rgbaOffset + 3] = 255; - } - } - - return rgba.subarray(0, rgbaSize); -} - export type FrameData = { width: number; height: number; @@ -122,19 +119,7 @@ interface FrameRenderedMessage { type: "frame-rendered"; width: number; height: number; -} - -interface FrameQueuedMessage { - type: "frame-queued"; - width: number; - height: number; -} - -interface DecodedFrame { - type: "decoded"; - bitmap: ImageBitmap; - width: number; - height: number; + source: "shared" | "worker"; } interface ErrorMessage { @@ -149,8 +134,6 @@ interface RequestFrameMessage { type WorkerMessage = | ReadyMessage | FrameRenderedMessage - | FrameQueuedMessage - | DecodedFrame | ErrorMessage | RequestFrameMessage; @@ -172,22 +155,72 @@ export function createImageDataWS( let pendingFrame: ArrayBuffer | null = null; let isProcessing = false; let nextFrame: ArrayBuffer | null = null; + let workerFramesInFlight = 0; let producer: Producer | null = null; - if (SAB_SUPPORTED) { + let sharedBufferConfig: SharedFrameBufferConfig | null = null; + let sharedBufferResizeFailed = false; + let sharedBufferResizeCount = 0; + let sabFallbackCount = 0; + let sabOversizeFallbackCount = 0; + let sabRetryLimitFallbackCount = 0; + let sabFallbackWindowCount = 0; + let sabOversizeFallbackWindowCount = 0; + let sabRetryLimitFallbackWindowCount = 0; + let sabWriteRetryCount = 0; + let sabRetryScheduled = false; + let processNextScheduled = false; + + function initializeSharedBuffer(config: SharedFrameBufferConfig): boolean { try { - const init = createSharedFrameBuffer(FRAME_BUFFER_CONFIG); - producer = createProducer(init); + const init = createSharedFrameBuffer(config); + const nextProducer = createProducer(init); + producer?.signalShutdown(); + producer = nextProducer; + sharedBufferConfig = config; + sharedBufferResizeCount += 1; worker.postMessage({ type: "init-shared-buffer", buffer: init.buffer, }); + return true; } catch (e) { console.error( "[socket] SharedArrayBuffer allocation failed, falling back to non-SAB mode:", e instanceof Error ? e.message : e, ); + return false; + } + } + + function ensureSharedBufferCapacity(requiredBytes: number) { + if ( + !producer || + !sharedBufferConfig || + sharedBufferResizeFailed || + requiredBytes <= sharedBufferConfig.slotSize + ) { + return; + } + + const config = computeSharedBufferConfig( + requiredBytes, + DEFAULT_FRAME_BUFFER_CONFIG, + ); + if (config.slotSize <= sharedBufferConfig.slotSize) { + return; + } + + const initialized = initializeSharedBuffer(config); + if (!initialized) { + sharedBufferResizeFailed = true; + } + } + + if (SAB_SUPPORTED) { + if (!initializeSharedBuffer(DEFAULT_FRAME_BUFFER_CONFIG)) { producer = null; + sharedBufferConfig = null; } } @@ -197,6 +230,14 @@ export function createImageDataWS( let directCanvas: HTMLCanvasElement | null = null; let directCtx: CanvasRenderingContext2D | null = null; let strideWorker: Worker | null = null; + let strideWorkerInFlight = false; + let pendingStrideCorrection: { + buffer: ArrayBuffer; + strideBytes: number; + width: number; + height: number; + frameNumber: number; + } | null = null; let cachedDirectImageData: ImageData | null = null; let cachedDirectWidth = 0; @@ -208,41 +249,29 @@ export function createImageDataWS( let mainThreadWebGPU: WebGPURenderer | null = null; let mainThreadWebGPUInitializing = false; - let pendingNv12Frame: ArrayBuffer | null = null; + let latestQueuedFrameNumber: number | null = null; + let latestDirectAcceptedFrameNumber: number | null = null; + let lastDirectRenderedFrameNumber: number | null = null; let lastRenderedFrameData: { data: Uint8ClampedArray; width: number; height: number; - yStride: number; - isNv12: boolean; + strideBytes: number; } | null = null; function storeRenderedFrame( frameData: Uint8ClampedArray, width: number, height: number, - yStride: number, - isNv12: boolean, + strideBytes: number, ) { - if ( - lastRenderedFrameData && - lastRenderedFrameData.data.length === frameData.length - ) { - lastRenderedFrameData.data.set(frameData); - lastRenderedFrameData.width = width; - lastRenderedFrameData.height = height; - lastRenderedFrameData.yStride = yStride; - lastRenderedFrameData.isNv12 = isNv12; - } else { - lastRenderedFrameData = { - data: new Uint8ClampedArray(frameData), - width, - height, - yStride, - isNv12, - }; - } + lastRenderedFrameData = { + data: frameData, + width, + height, + strideBytes, + }; if (!hasRenderedFrame()) { setHasRenderedFrame(true); } @@ -260,22 +289,38 @@ export function createImageDataWS( worker.onmessage = null; worker.terminate(); - if (strideWorker) { - strideWorker.onmessage = null; - strideWorker.terminate(); - strideWorker = null; - } + teardownStrideWorker(); pendingFrame = null; nextFrame = null; isProcessing = false; + workerFramesInFlight = 0; + sabRetryScheduled = false; + processNextScheduled = false; + sabFallbackWindowCount = 0; + sabOversizeFallbackWindowCount = 0; + sabRetryLimitFallbackWindowCount = 0; + workerInFlightBackpressureWindowHits = 0; + workerFramesInFlightPeakWindow = 0; + workerInFlightSupersededDropsWindow = 0; + renderedFromSharedWindow = 0; + renderedFromWorkerWindow = 0; + queuedOutOfOrderDropsWindow = 0; + directOutOfOrderDropsWindow = 0; + directIngressOutOfOrderDropsWindow = 0; + directResponseOutOfOrderDropsWindow = 0; + strideCorrectionDispatchesWindow = 0; + strideCorrectionSupersededDropsWindow = 0; + strideCorrectionErrorsWindow = 0; + latestQueuedFrameNumber = null; + latestDirectAcceptedFrameNumber = null; + lastDirectRenderedFrameNumber = null; if (mainThreadWebGPU) { disposeWebGPU(mainThreadWebGPU); mainThreadWebGPU = null; } - pendingNv12Frame = null; cachedDirectImageData = null; cachedDirectWidth = 0; cachedDirectHeight = 0; @@ -288,95 +333,138 @@ export function createImageDataWS( setIsConnected(false); } - function renderPendingNv12Frame() { - if (!pendingNv12Frame || !mainThreadWebGPU || !directCanvas) return; - - const buffer = pendingNv12Frame; - pendingNv12Frame = null; - - const NV12_MAGIC = 0x4e563132; - if (buffer.byteLength < 28) return; - - const formatCheck = new DataView(buffer, buffer.byteLength - 4, 4); - if (formatCheck.getUint32(0, true) !== NV12_MAGIC) return; - - const metadataOffset = buffer.byteLength - 28; - const meta = new DataView(buffer, metadataOffset, 28); - const yStride = meta.getUint32(0, true); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); - - if (width > 0 && height > 0) { - const ySize = yStride * height; - const uvSize = width * (height / 2); - const totalSize = ySize + uvSize; - - const frameData = new Uint8ClampedArray(buffer, 0, totalSize); - - if (directCanvas.width !== width || directCanvas.height !== height) { - directCanvas.width = width; - directCanvas.height = height; - } + function teardownStrideWorker() { + if (!strideWorker) return; + strideWorker.onmessage = null; + strideWorker.terminate(); + strideWorker = null; + strideWorkerInFlight = false; + pendingStrideCorrection = null; + } - renderNv12FrameWebGPU( - mainThreadWebGPU, - frameData, - width, - height, - yStride, - ); + function dispatchStrideCorrection(request: { + buffer: ArrayBuffer; + strideBytes: number; + width: number; + height: number; + frameNumber: number; + }) { + if (!strideWorker) return; + strideWorkerInFlight = true; + strideCorrectionDispatchesTotal++; + strideCorrectionDispatchesWindow++; + strideWorker.postMessage( + { + type: "correct-stride", + buffer: request.buffer, + strideBytes: request.strideBytes, + width: request.width, + height: request.height, + frameNumber: request.frameNumber, + }, + [request.buffer], + ); + } - storeRenderedFrame(frameData, width, height, yStride, true); - onmessage({ width, height }); + function queueStrideCorrection(request: { + buffer: ArrayBuffer; + strideBytes: number; + width: number; + height: number; + frameNumber: number; + }) { + if (!strideWorker) return; + const decision = decideStrideCorrectionDispatch( + strideWorkerInFlight, + pendingStrideCorrection !== null, + ); + if (decision.action === "dispatch") { + dispatchStrideCorrection(request); + strideWorkerInFlight = decision.nextInFlight; + return; } + strideWorkerInFlight = decision.nextInFlight; + if (decision.supersededDropsIncrement > 0) { + framesDropped += decision.supersededDropsIncrement; + strideCorrectionSupersededDropsTotal += decision.supersededDropsIncrement; + strideCorrectionSupersededDropsWindow += + decision.supersededDropsIncrement; + } + pendingStrideCorrection = request; } - function renderPendingFrameCanvas2D() { - if (!pendingNv12Frame || !directCanvas || !directCtx) return; - - const buffer = pendingNv12Frame; - pendingNv12Frame = null; - - const NV12_MAGIC = 0x4e563132; - if (buffer.byteLength < 28) return; - - const formatCheck = new DataView(buffer, buffer.byteLength - 4, 4); - if (formatCheck.getUint32(0, true) !== NV12_MAGIC) return; + function setupStrideWorker() { + if (strideWorker) return; + const createdWorker = new StrideCorrectionWorker(); + createdWorker.onmessage = ( + e: MessageEvent, + ) => { + const flushPending = () => { + if (pendingStrideCorrection && strideWorker) { + const nextRequest = pendingStrideCorrection; + pendingStrideCorrection = null; + dispatchStrideCorrection(nextRequest); + } + }; - const metadataOffset = buffer.byteLength - 28; - const meta = new DataView(buffer, metadataOffset, 28); - const yStride = meta.getUint32(0, true); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); + if (e.data.type === "error") { + strideCorrectionErrorsTotal++; + strideCorrectionErrorsWindow++; + strideWorkerInFlight = false; + flushPending(); + return; + } - if (width > 0 && height > 0) { - const ySize = yStride * height; - const uvSize = width * (height / 2); - const totalSize = ySize + uvSize; + if (e.data.type !== "corrected" || !directCanvas || !directCtx) { + strideWorkerInFlight = false; + flushPending(); + return; + } + strideWorkerInFlight = false; - const frameData = new Uint8ClampedArray(buffer, 0, totalSize); + const { buffer, width, height, frameNumber } = e.data; + const responseOrderDecision = decideFrameOrder( + frameNumber, + lastDirectRenderedFrameNumber, + FRAME_ORDER_STALE_WINDOW, + ); + if (responseOrderDecision.action === "drop") { + framesDropped += responseOrderDecision.dropsIncrement; + directOutOfOrderDropsTotal += responseOrderDecision.dropsIncrement; + directOutOfOrderDropsWindow += responseOrderDecision.dropsIncrement; + directResponseOutOfOrderDropsTotal += + responseOrderDecision.dropsIncrement; + directResponseOutOfOrderDropsWindow += + responseOrderDecision.dropsIncrement; + return; + } + lastDirectRenderedFrameNumber = + responseOrderDecision.nextLatestFrameNumber; if (directCanvas.width !== width || directCanvas.height !== height) { directCanvas.width = width; directCanvas.height = height; } - const rgba = convertNv12ToRgbaMainThread( - frameData, - width, - height, - yStride, - ); - const imageData = new ImageData( - new Uint8ClampedArray(rgba), - width, - height, - ); - directCtx.putImageData(imageData, 0, 0); + const frameData = new Uint8ClampedArray(buffer); + if ( + !cachedStrideImageData || + cachedStrideWidth !== width || + cachedStrideHeight !== height + ) { + cachedStrideImageData = new ImageData(width, height); + cachedStrideWidth = width; + cachedStrideHeight = height; + } + cachedStrideImageData.data.set(frameData); + directCtx.putImageData(cachedStrideImageData, 0, 0); - storeRenderedFrame(frameData, width, height, yStride, true); + actualRendersCount++; + storeRenderedFrame(cachedStrideImageData.data, width, height, width * 4); + flushPending(); onmessage({ width, height }); - } + }; + strideWorker = createdWorker; } const canvasControls: CanvasControls = { @@ -395,8 +483,12 @@ export function createImageDataWS( disposeWebGPU(mainThreadWebGPU); mainThreadWebGPU = null; } + teardownStrideWorker(); directCtx = null; mainThreadWebGPUInitializing = false; + latestQueuedFrameNumber = null; + latestDirectAcceptedFrameNumber = null; + lastDirectRenderedFrameNumber = null; } directCanvas = canvas; @@ -409,9 +501,7 @@ export function createImageDataWS( .then((renderer) => { mainThreadWebGPU = renderer; mainThreadWebGPUInitializing = false; - if (pendingNv12Frame && directCanvas) { - renderPendingNv12Frame(); - } + teardownStrideWorker(); onRequestFrame?.(); }) .catch((e) => { @@ -419,8 +509,8 @@ export function createImageDataWS( console.error("[Socket] Main thread WebGPU init failed:", e); directCtx = directCanvas?.getContext("2d", { alpha: false }) ?? null; - if (pendingNv12Frame && directCanvas && directCtx) { - renderPendingFrameCanvas2D(); + if (directCtx) { + setupStrideWorker(); } onRequestFrame?.(); }); @@ -428,62 +518,30 @@ export function createImageDataWS( mainThreadWebGPUInitializing = false; directCtx = directCanvas?.getContext("2d", { alpha: false }) ?? null; - if (pendingNv12Frame && directCanvas && directCtx) { - renderPendingFrameCanvas2D(); + if (directCtx) { + setupStrideWorker(); } onRequestFrame?.(); } }); } - - strideWorker = new StrideCorrectionWorker(); - strideWorker.onmessage = (e: MessageEvent) => { - if (e.data.type !== "corrected" || !directCanvas || !directCtx) return; - - const { buffer, width, height } = e.data; - if (directCanvas.width !== width || directCanvas.height !== height) { - directCanvas.width = width; - directCanvas.height = height; - } - - const frameData = new Uint8ClampedArray(buffer); - if ( - !cachedStrideImageData || - cachedStrideWidth !== width || - cachedStrideHeight !== height - ) { - cachedStrideImageData = new ImageData(width, height); - cachedStrideWidth = width; - cachedStrideHeight = height; - } - cachedStrideImageData.data.set(frameData); - directCtx.putImageData(cachedStrideImageData, 0, 0); - - storeRenderedFrame( - cachedStrideImageData.data, - width, - height, - width * 4, - false, - ); - onmessage({ width, height }); - }; }, resetFrameState: () => { + latestQueuedFrameNumber = null; + latestDirectAcceptedFrameNumber = null; + lastDirectRenderedFrameNumber = null; worker.postMessage({ type: "reset-frame-state" }); }, captureFrame: async () => { if (!lastRenderedFrameData) { return null; } - const { data, width, height, yStride, isNv12 } = lastRenderedFrameData; - let imageData: ImageData; - if (isNv12) { - const rgba = convertNv12ToRgbaMainThread(data, width, height, yStride); - imageData = new ImageData(new Uint8ClampedArray(rgba), width, height); - } else { - imageData = new ImageData(new Uint8ClampedArray(data), width, height); - } + const { data, width, height } = lastRenderedFrameData; + const imageData = new ImageData( + new Uint8ClampedArray(data), + width, + height, + ); const canvas = document.createElement("canvas"); canvas.width = width; canvas.height = height; @@ -506,26 +564,34 @@ export function createImageDataWS( if (e.data.type === "error") { console.error("[FrameWorker]", e.data.message); + if (workerFramesInFlight > 0) { + workerFramesInFlight--; + } isProcessing = false; - processNextFrame(); - return; - } - - if (e.data.type === "frame-queued") { - const { width, height } = e.data; - onmessage({ width, height }); - isProcessing = false; - processNextFrame(); + scheduleProcessNextFrame(); return; } if (e.data.type === "frame-rendered") { - const { width, height } = e.data; + const { width, height, source } = e.data; onmessage({ width, height }); + if (source === "worker" && workerFramesInFlight > 0) { + workerFramesInFlight--; + } + if (source === "worker") { + renderedFromWorkerTotal++; + renderedFromWorkerWindow++; + } else { + renderedFromSharedTotal++; + renderedFromSharedWindow++; + } actualRendersCount++; if (!hasRenderedFrame()) { setHasRenderedFrame(true); } + if (nextFrame || pendingFrame) { + scheduleProcessNextFrame(); + } return; } @@ -533,14 +599,90 @@ export function createImageDataWS( onRequestFrame?.(); return; } + }; - if (e.data.type === "decoded") { - const { bitmap, width, height } = e.data; - onmessage({ width, height, bitmap }); - isProcessing = false; + function readFrameNumber(buffer: ArrayBuffer): number | null { + if (buffer.byteLength < 24) return null; + const metadataOffset = buffer.byteLength - 24; + const meta = new DataView(buffer, metadataOffset, 24); + return meta.getUint32(12, true); + } + + function scheduleProcessNextFrame() { + if (processNextScheduled) return; + processNextScheduled = true; + queueMicrotask(() => { + processNextScheduled = false; processNextFrame(); + }); + } + + function enqueueFrameBuffer(buffer: ArrayBuffer) { + const frameNumber = readFrameNumber(buffer); + const orderDecision = decideFrameOrder( + frameNumber, + latestQueuedFrameNumber, + FRAME_ORDER_STALE_WINDOW, + ); + latestQueuedFrameNumber = orderDecision.nextLatestFrameNumber; + if (orderDecision.action === "drop") { + framesDropped += orderDecision.dropsIncrement; + totalSupersededDrops += orderDecision.dropsIncrement; + queuedOutOfOrderDropsTotal += orderDecision.dropsIncrement; + queuedOutOfOrderDropsWindow += orderDecision.dropsIncrement; + return; + } + + if (isProcessing) { + if (nextFrame) { + framesDropped++; + totalSupersededDrops++; + } + nextFrame = buffer; + } else { + pendingFrame = buffer; + scheduleProcessNextFrame(); + } + } + + function dispatchToWorker(buffer: ArrayBuffer): boolean { + const decision = decideWorkerInflightDispatch( + workerFramesInFlight, + WORKER_IN_FLIGHT_LIMIT, + nextFrame !== null, + ); + + totalWorkerInFlightBackpressureHits += decision.backpressureHitsIncrement; + workerInFlightBackpressureWindowHits += decision.backpressureHitsIncrement; + + if (decision.supersededDropsIncrement > 0) { + framesDropped += decision.supersededDropsIncrement; + totalSupersededDrops += decision.supersededDropsIncrement; + totalWorkerInFlightSupersededDrops += decision.supersededDropsIncrement; + workerInFlightSupersededDropsWindow += decision.supersededDropsIncrement; } - }; + + if (decision.action === "backpressure") { + nextFrame = buffer; + return false; + } + + framesSentToWorker++; + totalFramesSentToWorker++; + totalWorkerFallbackBytes += buffer.byteLength; + workerFramesInFlight = decision.nextWorkerFramesInFlight; + + const peaks = updateWorkerInflightPeaks( + workerFramesInFlight, + workerFramesInFlightPeakWindow, + workerFramesInFlightPeakTotal, + ); + workerFramesInFlightPeakWindow = peaks.peakWindow; + workerFramesInFlightPeakTotal = peaks.peakTotal; + + worker.postMessage({ type: "frame", buffer }, [buffer]); + return true; + } function processNextFrame() { if (isProcessing) return; @@ -557,12 +699,67 @@ export function createImageDataWS( isProcessing = true; if (producer) { + ensureSharedBufferCapacity(buffer.byteLength); + const slotSize = sharedBufferConfig?.slotSize ?? 0; + const isOversized = slotSize > 0 && buffer.byteLength > slotSize; const written = producer.write(buffer); if (!written) { - worker.postMessage({ type: "frame", buffer }, [buffer]); + sabFallbackCount += 1; + sabFallbackWindowCount += 1; + const decision = decideSabWriteFailure( + isOversized, + sabWriteRetryCount, + SAB_WRITE_RETRY_LIMIT, + ); + sabWriteRetryCount = decision.nextRetryCount; + + if (decision.action === "retry") { + isProcessing = false; + totalSabRetryAttempts++; + if (nextFrame) { + framesDropped++; + totalSupersededDrops++; + } + nextFrame = buffer; + if (!sabRetryScheduled) { + sabRetryScheduled = true; + requestAnimationFrame(() => { + sabRetryScheduled = false; + processNextFrame(); + }); + } + return; + } + if (decision.action === "fallback_oversize") { + sabOversizeFallbackCount += 1; + sabOversizeFallbackWindowCount += 1; + } else { + sabRetryLimitFallbackCount += 1; + sabRetryLimitFallbackWindowCount += 1; + } + const dispatched = dispatchToWorker(buffer); + isProcessing = false; + if (dispatched && (nextFrame || pendingFrame)) { + scheduleProcessNextFrame(); + } + return; + } else { + sabWriteRetryCount = 0; + totalFramesWrittenToSharedBuffer++; + isProcessing = false; + if (nextFrame || pendingFrame) { + scheduleProcessNextFrame(); + } + return; } } else { - worker.postMessage({ type: "frame", buffer }, [buffer]); + sabWriteRetryCount = 0; + const dispatched = dispatchToWorker(buffer); + isProcessing = false; + if (dispatched && (nextFrame || pendingFrame)) { + scheduleProcessNextFrame(); + } + return; } } @@ -582,12 +779,41 @@ export function createImageDataWS( let frameCount = 0; let frameTimeSum = 0; let totalBytesReceived = 0; + let totalSabRetryAttempts = 0; + let totalFramesReceived = 0; + let totalFramesWrittenToSharedBuffer = 0; + let totalFramesSentToWorker = 0; + let totalWorkerFallbackBytes = 0; + let totalWorkerInFlightBackpressureHits = 0; + let workerInFlightBackpressureWindowHits = 0; + let workerFramesInFlightPeakWindow = 0; + let workerFramesInFlightPeakTotal = 0; + let totalWorkerInFlightSupersededDrops = 0; + let workerInFlightSupersededDropsWindow = 0; + let renderedFromSharedTotal = 0; + let renderedFromSharedWindow = 0; + let renderedFromWorkerTotal = 0; + let renderedFromWorkerWindow = 0; + let queuedOutOfOrderDropsTotal = 0; + let queuedOutOfOrderDropsWindow = 0; + let directOutOfOrderDropsTotal = 0; + let directOutOfOrderDropsWindow = 0; + let directIngressOutOfOrderDropsTotal = 0; + let directIngressOutOfOrderDropsWindow = 0; + let directResponseOutOfOrderDropsTotal = 0; + let directResponseOutOfOrderDropsWindow = 0; + let strideCorrectionDispatchesTotal = 0; + let strideCorrectionDispatchesWindow = 0; + let strideCorrectionSupersededDropsTotal = 0; + let strideCorrectionSupersededDropsWindow = 0; + let strideCorrectionErrorsTotal = 0; + let strideCorrectionErrorsWindow = 0; + let totalSupersededDrops = 0; let lastLogTime = 0; let framesReceived = 0; let framesDropped = 0; let framesSentToWorker = 0; let actualRendersCount = 0; - let renderFrameCount = 0; let minFrameTime = Number.MAX_VALUE; let maxFrameTime = 0; @@ -601,25 +827,61 @@ export function createImageDataWS( minFrameMs: minFrameTime === Number.MAX_VALUE ? 0 : minFrameTime, maxFrameMs: maxFrameTime, mbPerSec: totalBytesReceived / 1_000_000, + sabResizes: sharedBufferResizeCount, + sabFallbacks: sabFallbackCount, + sabOversizeFallbacks: sabOversizeFallbackCount, + sabRetryLimitFallbacks: sabRetryLimitFallbackCount, + sabRetriesInFlight: sabWriteRetryCount, + sabSlotSizeBytes: sharedBufferConfig?.slotSize ?? 0, + sabSlotCount: sharedBufferConfig?.slotCount ?? 0, + sabTotalBytes: + (sharedBufferConfig?.slotSize ?? 0) * + (sharedBufferConfig?.slotCount ?? 0), + workerFramesInFlight, + workerInFlightBackpressureHits: totalWorkerInFlightBackpressureHits, + workerInFlightBackpressureWindowHits, + workerFramesInFlightPeakWindow, + workerFramesInFlightPeakTotal, + workerInFlightSupersededDrops: totalWorkerInFlightSupersededDrops, + workerInFlightSupersededDropsWindow: workerInFlightSupersededDropsWindow, + renderedFromSharedTotal, + renderedFromSharedWindow, + renderedFromWorkerTotal, + renderedFromWorkerWindow, + queuedOutOfOrderDropsTotal, + queuedOutOfOrderDropsWindow, + directOutOfOrderDropsTotal, + directOutOfOrderDropsWindow, + directIngressOutOfOrderDropsTotal, + directIngressOutOfOrderDropsWindow, + directResponseOutOfOrderDropsTotal, + directResponseOutOfOrderDropsWindow, + strideCorrectionInFlight: strideWorkerInFlight ? 1 : 0, + strideCorrectionPending: pendingStrideCorrection ? 1 : 0, + strideCorrectionDispatchesTotal, + strideCorrectionDispatchesWindow, + strideCorrectionSupersededDropsTotal, + strideCorrectionSupersededDropsWindow, + strideCorrectionErrorsTotal, + strideCorrectionErrorsWindow, + sabTotalRetryAttempts: totalSabRetryAttempts, + sabTotalFramesReceived: totalFramesReceived, + sabTotalFramesWrittenToSharedBuffer: totalFramesWrittenToSharedBuffer, + sabTotalFramesSentToWorker: totalFramesSentToWorker, + sabTotalWorkerFallbackBytes: totalWorkerFallbackBytes, + sabTotalSupersededDrops: totalSupersededDrops, }); globalFpsStatsGetter = getLocalFpsStats; (globalThis as Record).__capFpsStats = getLocalFpsStats; - const NV12_MAGIC = 0x4e563132; - ws.binaryType = "arraybuffer"; ws.onmessage = (event) => { const buffer = event.data as ArrayBuffer; const now = performance.now(); totalBytesReceived += buffer.byteLength; framesReceived++; - - let isNv12Format = false; - if (buffer.byteLength >= 28) { - const formatCheck = new DataView(buffer, buffer.byteLength - 4, 4); - isNv12Format = formatCheck.getUint32(0, true) === NV12_MAGIC; - } + totalFramesReceived++; if (lastFrameTime > 0) { const delta = now - lastFrameTime; @@ -639,7 +901,7 @@ export function createImageDataWS( framesReceived > 0 ? (framesDropped / framesReceived) * 100 : 0; console.log( - `[Frame] recv: ${recvFps.toFixed(1)}/s, sent: ${sentFps.toFixed(1)}/s, ACTUAL: ${actualFps.toFixed(1)}/s, dropped: ${dropRate.toFixed(0)}%, delta: ${avgDelta.toFixed(1)}ms, ${mbPerSec.toFixed(1)} MB/s, ${isNv12Format ? "NV12" : "RGBA"}`, + `[Frame] recv: ${recvFps.toFixed(1)}/s, sent: ${sentFps.toFixed(1)}/s, ACTUAL: ${actualFps.toFixed(1)}/s, dropped: ${dropRate.toFixed(0)}%, delta: ${avgDelta.toFixed(1)}ms, ${mbPerSec.toFixed(1)} MB/s, RGBA, sab_resizes: ${sharedBufferResizeCount}, sab_fallbacks_window: ${sabFallbackWindowCount}, sab_fallbacks_total: ${sabFallbackCount}, sab_oversize_fallbacks_window: ${sabOversizeFallbackWindowCount}, sab_oversize_fallbacks_total: ${sabOversizeFallbackCount}, sab_retry_limit_fallbacks_window: ${sabRetryLimitFallbackWindowCount}, sab_retry_limit_fallbacks_total: ${sabRetryLimitFallbackCount}, sab_retries: ${sabWriteRetryCount}, worker_inflight: ${workerFramesInFlight}, worker_inflight_peak_window: ${workerFramesInFlightPeakWindow}, worker_inflight_peak_total: ${workerFramesInFlightPeakTotal}, worker_cap_hits_window: ${workerInFlightBackpressureWindowHits}, worker_cap_hits_total: ${totalWorkerInFlightBackpressureHits}, worker_superseded_window: ${workerInFlightSupersededDropsWindow}, worker_superseded_total: ${totalWorkerInFlightSupersededDrops}, rendered_shared_window: ${renderedFromSharedWindow}, rendered_shared_total: ${renderedFromSharedTotal}, rendered_worker_window: ${renderedFromWorkerWindow}, rendered_worker_total: ${renderedFromWorkerTotal}, queued_ooo_window: ${queuedOutOfOrderDropsWindow}, queued_ooo_total: ${queuedOutOfOrderDropsTotal}, direct_ooo_window: ${directOutOfOrderDropsWindow}, direct_ooo_total: ${directOutOfOrderDropsTotal}, direct_ingress_ooo_window: ${directIngressOutOfOrderDropsWindow}, direct_ingress_ooo_total: ${directIngressOutOfOrderDropsTotal}, direct_response_ooo_window: ${directResponseOutOfOrderDropsWindow}, direct_response_ooo_total: ${directResponseOutOfOrderDropsTotal}, stride_corr_inflight: ${strideWorkerInFlight ? 1 : 0}, stride_corr_pending: ${pendingStrideCorrection ? 1 : 0}, stride_corr_dispatches_window: ${strideCorrectionDispatchesWindow}, stride_corr_dispatches_total: ${strideCorrectionDispatchesTotal}, stride_corr_superseded_window: ${strideCorrectionSupersededDropsWindow}, stride_corr_superseded_total: ${strideCorrectionSupersededDropsTotal}, stride_corr_errors_window: ${strideCorrectionErrorsWindow}, stride_corr_errors_total: ${strideCorrectionErrorsTotal}`, ); frameCount = 0; @@ -650,6 +912,22 @@ export function createImageDataWS( framesDropped = 0; framesSentToWorker = 0; actualRendersCount = 0; + sabFallbackWindowCount = 0; + sabOversizeFallbackWindowCount = 0; + sabRetryLimitFallbackWindowCount = 0; + workerInFlightBackpressureWindowHits = 0; + workerFramesInFlightPeakWindow = workerFramesInFlight; + workerInFlightSupersededDropsWindow = 0; + renderedFromSharedWindow = 0; + renderedFromWorkerWindow = 0; + queuedOutOfOrderDropsWindow = 0; + directOutOfOrderDropsWindow = 0; + directIngressOutOfOrderDropsWindow = 0; + directResponseOutOfOrderDropsWindow = 0; + strideCorrectionDispatchesWindow = 0; + strideCorrectionSupersededDropsWindow = 0; + strideCorrectionErrorsWindow = 0; + sabWriteRetryCount = 0; minFrameTime = Number.MAX_VALUE; maxFrameTime = 0; } @@ -658,261 +936,126 @@ export function createImageDataWS( } lastFrameTime = now; - if (isNv12Format) { - if (mainThreadWebGPU && directCanvas) { - const metadataOffset = buffer.byteLength - 28; - const meta = new DataView(buffer, metadataOffset, 28); - const yStride = meta.getUint32(0, true); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); - const frameNumber = meta.getUint32(12, true); - - if (width > 0 && height > 0) { - const ySize = yStride * height; - const uvSize = width * (height / 2); - const totalSize = ySize + uvSize; - - const frameData = new Uint8ClampedArray(buffer, 0, totalSize); - - if (directCanvas.width !== width || directCanvas.height !== height) { - directCanvas.width = width; - directCanvas.height = height; - } - - renderNv12FrameWebGPU( - mainThreadWebGPU, - frameData, - width, - height, - yStride, - ); - actualRendersCount++; - renderFrameCount++; - - storeRenderedFrame(frameData, width, height, yStride, true); - onmessage({ width, height }); - } - return; - } + const shouldRenderDirect = Boolean( + directCanvas && (mainThreadWebGPU || (directCtx && strideWorker)), + ); + if (!shouldRenderDirect) { + latestDirectAcceptedFrameNumber = null; + lastDirectRenderedFrameNumber = null; + enqueueFrameBuffer(buffer); + return; + } + latestQueuedFrameNumber = null; - if (mainThreadWebGPUInitializing || !directCanvas) { - pendingNv12Frame = buffer; - const metadataOffset = buffer.byteLength - 28; - const meta = new DataView(buffer, metadataOffset, 28); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); - if (width > 0 && height > 0) { - onmessage({ width, height }); - } - return; - } + if (buffer.byteLength < 24) { + return; + } - if (directCanvas && directCtx) { - if (!directCanvas.isConnected) { - const domCanvas = document.getElementById( - "canvas", - ) as HTMLCanvasElement | null; - if (domCanvas && domCanvas !== directCanvas) { - directCanvas = domCanvas; - directCtx = domCanvas.getContext("2d", { alpha: false }); - if (!directCtx) { - console.error( - "[Socket] Failed to get 2D context from DOM canvas", - ); - return; - } - } else { - return; - } - } + const metadataOffset = buffer.byteLength - 24; + const meta = new DataView(buffer, metadataOffset, 24); + const strideBytes = meta.getUint32(0, true); + const height = meta.getUint32(4, true); + const width = meta.getUint32(8, true); + const frameNumber = meta.getUint32(12, true); + const expectedRowBytes = width * 4; + const frameDataSize = strideBytes * height; - const metadataOffset = buffer.byteLength - 28; - const meta = new DataView(buffer, metadataOffset, 28); - const yStride = meta.getUint32(0, true); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); - const frameNumber = meta.getUint32(12, true); - - if (width > 0 && height > 0) { - const ySize = yStride * height; - const uvSize = width * (height / 2); - const totalSize = ySize + uvSize; - - const nv12Data = new Uint8ClampedArray(buffer, 0, totalSize); - const rgbaData = convertNv12ToRgbaMainThread( - nv12Data, - width, - height, - yStride, - ); - - if (directCanvas.width !== width || directCanvas.height !== height) { - directCanvas.width = width; - directCanvas.height = height; - } + if ( + width === 0 || + height === 0 || + strideBytes === 0 || + strideBytes < expectedRowBytes || + buffer.byteLength - 24 < frameDataSize + ) { + return; + } - if ( - !cachedDirectImageData || - cachedDirectWidth !== width || - cachedDirectHeight !== height - ) { - cachedDirectImageData = new ImageData(width, height); - cachedDirectWidth = width; - cachedDirectHeight = height; - } - cachedDirectImageData.data.set(rgbaData); - directCtx.putImageData(cachedDirectImageData, 0, 0); + const directOrderDecision = decideFrameOrder( + frameNumber, + latestDirectAcceptedFrameNumber, + FRAME_ORDER_STALE_WINDOW, + ); + if (directOrderDecision.action === "drop") { + framesDropped += directOrderDecision.dropsIncrement; + directOutOfOrderDropsTotal += directOrderDecision.dropsIncrement; + directOutOfOrderDropsWindow += directOrderDecision.dropsIncrement; + directIngressOutOfOrderDropsTotal += directOrderDecision.dropsIncrement; + directIngressOutOfOrderDropsWindow += directOrderDecision.dropsIncrement; + return; + } + latestDirectAcceptedFrameNumber = directOrderDecision.nextLatestFrameNumber; - storeRenderedFrame(nv12Data, width, height, yStride, true); - actualRendersCount++; - renderFrameCount++; + if (mainThreadWebGPU && directCanvas) { + const frameData = new Uint8ClampedArray(buffer, 0, frameDataSize); - onmessage({ width, height }); - } - return; + if (directCanvas.width !== width || directCanvas.height !== height) { + directCanvas.width = width; + directCanvas.height = height; } - if (isProcessing) { - framesDropped++; - nextFrame = buffer; - } else { - framesSentToWorker++; - pendingFrame = buffer; - processNextFrame(); - } + renderFrameWebGPU( + mainThreadWebGPU, + frameData, + width, + height, + strideBytes, + ); + actualRendersCount++; + storeRenderedFrame(frameData, width, height, strideBytes); + lastDirectRenderedFrameNumber = frameNumber; + onmessage({ width, height }); return; } - if (mainThreadWebGPU && directCanvas && buffer.byteLength >= 24) { - const metadataOffset = buffer.byteLength - 24; - const meta = new DataView(buffer, metadataOffset, 24); - const strideBytes = meta.getUint32(0, true); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); + if (directCanvas && directCtx && strideWorker) { + const needsStrideCorrection = strideBytes !== expectedRowBytes; - if (width > 0 && height > 0) { - const frameDataSize = strideBytes * height; - const frameData = new Uint8ClampedArray(buffer, 0, frameDataSize); + if (!needsStrideCorrection) { + const frameData = new Uint8ClampedArray( + buffer, + 0, + expectedRowBytes * height, + ); if (directCanvas.width !== width || directCanvas.height !== height) { directCanvas.width = width; directCanvas.height = height; } - renderFrameWebGPU( - mainThreadWebGPU, - frameData, + if ( + !cachedDirectImageData || + cachedDirectWidth !== width || + cachedDirectHeight !== height + ) { + cachedDirectImageData = new ImageData(width, height); + cachedDirectWidth = width; + cachedDirectHeight = height; + } + cachedDirectImageData.data.set(frameData); + directCtx.putImageData(cachedDirectImageData, 0, 0); + + storeRenderedFrame( + cachedDirectImageData.data, width, height, - strideBytes, + width * 4, ); actualRendersCount++; - renderFrameCount++; - - storeRenderedFrame(frameData, width, height, strideBytes, false); + lastDirectRenderedFrameNumber = frameNumber; onmessage({ width, height }); + } else { + queueStrideCorrection({ + buffer, + strideBytes, + width, + height, + frameNumber, + }); } return; } - if (directCanvas && directCtx && strideWorker) { - if (buffer.byteLength >= 24) { - const metadataOffset = buffer.byteLength - 24; - const meta = new DataView(buffer, metadataOffset, 24); - const strideBytes = meta.getUint32(0, true); - const height = meta.getUint32(4, true); - const width = meta.getUint32(8, true); - - if (width > 0 && height > 0) { - const expectedRowBytes = width * 4; - const needsStrideCorrection = strideBytes !== expectedRowBytes; - - if (lastFrameTime > 0) { - const delta = now - lastFrameTime; - frameCount++; - frameTimeSum += delta; - minFrameTime = Math.min(minFrameTime, delta); - maxFrameTime = Math.max(maxFrameTime, delta); - if (frameCount % 60 === 0) { - const avgDelta = frameTimeSum / 60; - const elapsedSec = (now - lastLogTime) / 1000; - const mbPerSec = totalBytesReceived / 1_000_000 / elapsedSec; - const actualRenderFps = renderFrameCount / elapsedSec; - console.log( - `[Frame] recv_fps: ${(1000 / avgDelta).toFixed(1)}, render_fps: ${actualRenderFps.toFixed(1)}, mb/s: ${mbPerSec.toFixed(1)}, frame_ms: ${avgDelta.toFixed(1)} (min: ${minFrameTime.toFixed(1)}, max: ${maxFrameTime.toFixed(1)}), size: ${(buffer.byteLength / 1024).toFixed(0)}KB, format: ${isNv12Format ? "NV12" : "RGBA"}`, - ); - frameTimeSum = 0; - totalBytesReceived = 0; - lastLogTime = now; - renderFrameCount = 0; - minFrameTime = Number.MAX_VALUE; - maxFrameTime = 0; - } - } else { - lastLogTime = now; - } - lastFrameTime = now; - - if (!needsStrideCorrection) { - const frameData = new Uint8ClampedArray( - buffer, - 0, - expectedRowBytes * height, - ); - - if ( - directCanvas.width !== width || - directCanvas.height !== height - ) { - directCanvas.width = width; - directCanvas.height = height; - } - - if ( - !cachedDirectImageData || - cachedDirectWidth !== width || - cachedDirectHeight !== height - ) { - cachedDirectImageData = new ImageData(width, height); - cachedDirectWidth = width; - cachedDirectHeight = height; - } - cachedDirectImageData.data.set(frameData); - directCtx.putImageData(cachedDirectImageData, 0, 0); - - storeRenderedFrame( - cachedDirectImageData.data, - width, - height, - width * 4, - false, - ); - renderFrameCount++; - - onmessage({ width, height }); - } else { - strideWorker.postMessage( - { - type: "correct-stride", - buffer, - strideBytes, - width, - height, - }, - [buffer], - ); - } - } - } - return; - } - - if (isProcessing) { - nextFrame = buffer; - } else { - pendingFrame = buffer; - processNextFrame(); - } + enqueueFrameBuffer(buffer); }; return [ws, isConnected, isWorkerReady, canvasControls]; diff --git a/apps/desktop/src/utils/stride-correction-worker.ts b/apps/desktop/src/utils/stride-correction-worker.ts index b98a355bd3..d94ba2cb73 100644 --- a/apps/desktop/src/utils/stride-correction-worker.ts +++ b/apps/desktop/src/utils/stride-correction-worker.ts @@ -4,6 +4,7 @@ interface StrideCorrectionRequest { strideBytes: number; width: number; height: number; + frameNumber: number; } interface StrideCorrectionResponse { @@ -11,6 +12,7 @@ interface StrideCorrectionResponse { buffer: ArrayBuffer; width: number; height: number; + frameNumber: number; } interface ErrorResponse { @@ -24,33 +26,61 @@ let correctionBufferSize = 0; self.onmessage = (e: MessageEvent) => { if (e.data.type !== "correct-stride") return; - const { buffer, strideBytes, width, height } = e.data; - const expectedRowBytes = width * 4; - const expectedLength = expectedRowBytes * height; + try { + const { buffer, strideBytes, width, height, frameNumber } = e.data; + const expectedRowBytes = width * 4; + const expectedLength = expectedRowBytes * height; - if (!correctionBuffer || correctionBufferSize < expectedLength) { - correctionBuffer = new Uint8ClampedArray(expectedLength); - correctionBufferSize = expectedLength; - } + if (width <= 0 || height <= 0 || strideBytes < expectedRowBytes) { + const errorResponse: ErrorResponse = { + type: "error", + message: "Invalid stride correction dimensions", + }; + self.postMessage(errorResponse); + return; + } - const srcData = new Uint8ClampedArray(buffer); - for (let row = 0; row < height; row++) { - const srcStart = row * strideBytes; - const destStart = row * expectedRowBytes; - correctionBuffer.set( - srcData.subarray(srcStart, srcStart + expectedRowBytes), - destStart, - ); - } + const srcData = new Uint8ClampedArray(buffer); + if (srcData.byteLength < strideBytes * height) { + const errorResponse: ErrorResponse = { + type: "error", + message: "Stride correction buffer too small", + }; + self.postMessage(errorResponse); + return; + } + + if (!correctionBuffer || correctionBufferSize < expectedLength) { + correctionBuffer = new Uint8ClampedArray(expectedLength); + correctionBufferSize = expectedLength; + } - const result = correctionBuffer.slice(0, expectedLength); - const response: StrideCorrectionResponse = { - type: "corrected", - buffer: result.buffer, - width, - height, - }; - self.postMessage(response, { transfer: [result.buffer] }); + for (let row = 0; row < height; row++) { + const srcStart = row * strideBytes; + const destStart = row * expectedRowBytes; + correctionBuffer.set( + srcData.subarray(srcStart, srcStart + expectedRowBytes), + destStart, + ); + } + + const result = correctionBuffer.slice(0, expectedLength); + const response: StrideCorrectionResponse = { + type: "corrected", + buffer: result.buffer, + width, + height, + frameNumber, + }; + self.postMessage(response, { transfer: [result.buffer] }); + } catch (error) { + const errorResponse: ErrorResponse = { + type: "error", + message: + error instanceof Error ? error.message : "Stride correction failed", + }; + self.postMessage(errorResponse); + } }; export type { diff --git a/apps/desktop/src/utils/webgpu-renderer.ts b/apps/desktop/src/utils/webgpu-renderer.ts index b33f859e78..ef0f283be1 100644 --- a/apps/desktop/src/utils/webgpu-renderer.ts +++ b/apps/desktop/src/utils/webgpu-renderer.ts @@ -34,45 +34,16 @@ fn fs(@location(0) texCoord: vec2f) -> @location(0) vec4f { } `; -const NV12_FRAGMENT_SHADER = ` -@group(0) @binding(0) var frameSampler: sampler; -@group(0) @binding(1) var yTexture: texture_2d; -@group(0) @binding(2) var uvTexture: texture_2d; - -@fragment -fn fs(@location(0) texCoord: vec2f) -> @location(0) vec4f { - let y = textureSample(yTexture, frameSampler, texCoord).r; - let uv = textureSample(uvTexture, frameSampler, texCoord).rg; - - let yScaled = y - 0.0625; - let u = uv.r - 0.5; - let v = uv.g - 0.5; - - let r = clamp(1.164 * yScaled + 1.596 * v, 0.0, 1.0); - let g = clamp(1.164 * yScaled - 0.391 * u - 0.813 * v, 0.0, 1.0); - let b = clamp(1.164 * yScaled + 2.018 * u, 0.0, 1.0); - - return vec4f(r, g, b, 1.0); -} -`; - export interface WebGPURenderer { device: GPUDevice; context: GPUCanvasContext; pipeline: GPURenderPipeline; - nv12Pipeline: GPURenderPipeline; sampler: GPUSampler; frameTexture: GPUTexture | null; bindGroup: GPUBindGroup | null; bindGroupLayout: GPUBindGroupLayout; - nv12BindGroupLayout: GPUBindGroupLayout; - yTexture: GPUTexture | null; - uvTexture: GPUTexture | null; - nv12BindGroup: GPUBindGroup | null; cachedWidth: number; cachedHeight: number; - cachedNv12Width: number; - cachedNv12Height: number; canvas: OffscreenCanvas; } @@ -134,39 +105,12 @@ export async function initWebGPU( ], }); - const nv12BindGroupLayout = device.createBindGroupLayout({ - entries: [ - { - binding: 0, - visibility: GPUShaderStage.FRAGMENT, - sampler: { type: "filtering" }, - }, - { - binding: 1, - visibility: GPUShaderStage.FRAGMENT, - texture: { sampleType: "float" }, - }, - { - binding: 2, - visibility: GPUShaderStage.FRAGMENT, - texture: { sampleType: "float" }, - }, - ], - }); - const pipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout], }); - const nv12PipelineLayout = device.createPipelineLayout({ - bindGroupLayouts: [nv12BindGroupLayout], - }); - const vertexModule = device.createShaderModule({ code: VERTEX_SHADER }); const fragmentModule = device.createShaderModule({ code: FRAGMENT_SHADER }); - const nv12FragmentModule = device.createShaderModule({ - code: NV12_FRAGMENT_SHADER, - }); const pipeline = device.createRenderPipeline({ layout: pipelineLayout, @@ -184,22 +128,6 @@ export async function initWebGPU( }, }); - const nv12Pipeline = device.createRenderPipeline({ - layout: nv12PipelineLayout, - vertex: { - module: vertexModule, - entryPoint: "vs", - }, - fragment: { - module: nv12FragmentModule, - entryPoint: "fs", - targets: [{ format }], - }, - primitive: { - topology: "triangle-list", - }, - }); - const sampler = device.createSampler({ magFilter: "linear", minFilter: "linear", @@ -211,19 +139,12 @@ export async function initWebGPU( device, context, pipeline, - nv12Pipeline, sampler, frameTexture: null, bindGroup: null, bindGroupLayout, - nv12BindGroupLayout, - yTexture: null, - uvTexture: null, - nv12BindGroup: null, cachedWidth: 0, cachedHeight: 0, - cachedNv12Width: 0, - cachedNv12Height: 0, canvas, }; } @@ -310,128 +231,9 @@ export function renderFrameWebGPU( device.queue.submit([encoder.finish()]); } -export function renderNv12FrameWebGPU( - renderer: WebGPURenderer, - data: Uint8ClampedArray, - width: number, - height: number, - yStride: number, -): void { - const { - device, - context, - nv12Pipeline, - sampler, - nv12BindGroupLayout, - canvas, - } = renderer; - - if (canvas.width !== width || canvas.height !== height) { - canvas.width = width; - canvas.height = height; - const format = navigator.gpu.getPreferredCanvasFormat(); - context.configure({ - device, - format, - alphaMode: "opaque", - }); - } - - if ( - renderer.cachedNv12Width !== width || - renderer.cachedNv12Height !== height - ) { - renderer.yTexture?.destroy(); - renderer.uvTexture?.destroy(); - - renderer.yTexture = device.createTexture({ - size: { width, height }, - format: "r8unorm", - usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, - }); - - renderer.uvTexture = device.createTexture({ - size: { width: width / 2, height: height / 2 }, - format: "rg8unorm", - usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, - }); - - renderer.nv12BindGroup = device.createBindGroup({ - layout: nv12BindGroupLayout, - entries: [ - { binding: 0, resource: sampler }, - { binding: 1, resource: renderer.yTexture.createView() }, - { binding: 2, resource: renderer.uvTexture.createView() }, - ], - }); - - renderer.cachedNv12Width = width; - renderer.cachedNv12Height = height; - } - - if (!renderer.yTexture || !renderer.uvTexture || !renderer.nv12BindGroup) { - return; - } - - const ySize = yStride * height; - const uvWidth = width / 2; - const uvHeight = height / 2; - const uvStride = width; - const uvSize = uvStride * uvHeight; - - if (data.byteLength < ySize + uvSize) { - return; - } - - const yData = data.subarray(0, ySize); - const uvData = data.subarray(ySize, ySize + uvSize); - - device.queue.writeTexture( - { texture: renderer.yTexture }, - yData.buffer as unknown as GPUAllowSharedBufferSource, - { bytesPerRow: yStride, rowsPerImage: height, offset: yData.byteOffset }, - { width, height }, - ); - - device.queue.writeTexture( - { texture: renderer.uvTexture }, - uvData.buffer as unknown as GPUAllowSharedBufferSource, - { - bytesPerRow: uvStride, - rowsPerImage: uvHeight, - offset: uvData.byteOffset, - }, - { width: uvWidth, height: uvHeight }, - ); - - const encoder = device.createCommandEncoder(); - const pass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: context.getCurrentTexture().createView(), - clearValue: { r: 0, g: 0, b: 0, a: 1 }, - loadOp: "clear", - storeOp: "store", - }, - ], - }); - - pass.setPipeline(nv12Pipeline); - pass.setBindGroup(0, renderer.nv12BindGroup); - pass.draw(3); - pass.end(); - - device.queue.submit([encoder.finish()]); -} - export function disposeWebGPU(renderer: WebGPURenderer): void { renderer.frameTexture?.destroy(); renderer.frameTexture = null; renderer.bindGroup = null; - renderer.yTexture?.destroy(); - renderer.yTexture = null; - renderer.uvTexture?.destroy(); - renderer.uvTexture = null; - renderer.nv12BindGroup = null; renderer.device.destroy(); } diff --git a/crates/camera-ffmpeg/src/lib.rs b/crates/camera-ffmpeg/src/lib.rs index c822191664..6de2c5938b 100644 --- a/crates/camera-ffmpeg/src/lib.rs +++ b/crates/camera-ffmpeg/src/lib.rs @@ -8,6 +8,11 @@ mod windows; #[cfg(windows)] pub use windows::*; +#[cfg(not(any(target_os = "macos", windows)))] +#[derive(Debug, thiserror::Error)] +#[error("Camera FFmpeg conversion is unsupported on this platform")] +pub struct AsFFmpegError; + pub trait CapturedFrameExt { /// Creates an ffmpeg video frame from the native frame. /// Only size, format, and data are set. diff --git a/crates/cursor-capture/src/position.rs b/crates/cursor-capture/src/position.rs index bb4ea75719..6a0759c933 100644 --- a/crates/cursor-capture/src/position.rs +++ b/crates/cursor-capture/src/position.rs @@ -1,5 +1,7 @@ use device_query::{DeviceQuery, DeviceState}; -use scap_targets::{Display, bounds::*}; +use scap_targets::Display; +#[cfg(any(windows, target_os = "macos"))] +use scap_targets::bounds::*; // Physical on Windows, Logical on macOS #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -55,6 +57,12 @@ impl RelativeCursorPosition { display, }) } + + #[cfg(not(any(windows, target_os = "macos")))] + { + let _ = (raw, display); + None + } } pub fn display(&self) -> &Display { @@ -97,6 +105,11 @@ impl RelativeCursorPosition { display: self.display, }) } + + #[cfg(not(any(windows, target_os = "macos")))] + { + None + } } } diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a2253d580f..d419cecca5 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -10,7 +10,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst |--------|--------|-----------| | Decoder Init | <200ms | - | | Decode Latency (p95) | <50ms | - | -| Effective FPS | ≥30 fps | ±2 fps | +| Effective FPS | ≥60 fps | ±2 fps | | Decode Jitter | <10ms | - | | A/V Sync (mic↔video) | <100ms | - | | A/V Sync (system↔video) | <100ms | - | @@ -62,6 +62,152 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # With custom FPS and iterations cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --iterations 50 + +# Increase seek sampling per distance for more stable tails +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --seek-iterations 20 + +# Includes duplicate-request burst stats (burst sizes 4/8/16) by default +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 + +# Export decode benchmark rows to CSV for cross-machine analysis +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --seek-iterations 20 --output-csv /tmp/cap-decode-benchmark.csv + +# Add run labels for baseline/candidate grouping +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --output-csv /tmp/cap-decode-benchmark.csv --run-label windows-pass-1 + +# Summarize decode CSV rows grouped by run label and video +cargo run -p cap-editor --example decode-csv-report -- --csv /tmp/cap-decode-benchmark.csv + +# Compare baseline/candidate decode labels +cargo run -p cap-editor --example decode-csv-report -- --csv /tmp/cap-decode-benchmark.csv --baseline-label macos-pass-1 --candidate-label windows-pass-1 + +# Export decode summary/delta rows to CSV +cargo run -p cap-editor --example decode-csv-report -- --csv /tmp/cap-decode-benchmark.csv --baseline-label macos-pass-1 --candidate-label windows-pass-1 --output-csv /tmp/cap-decode-summary.csv +``` + +#### Playback Throughput Benchmark (Linux-compatible) + +```bash +# Simulate real-time playback deadlines from a single video +cargo run -p cap-editor --example playback-benchmark -- --video /path/to/video.mp4 --fps 60 --max-frames 600 + +# Optional audio duration comparison +cargo run -p cap-editor --example playback-benchmark -- --video /path/to/video.mp4 --audio /path/to/audio.ogg --fps 60 + +# Increase seek sample count for stable p95/max seek stats +cargo run -p cap-editor --example playback-benchmark -- --video /path/to/video.mp4 --fps 60 --max-frames 600 --seek-iterations 20 + +# Export playback throughput + seek samples to CSV +cargo run -p cap-editor --example playback-benchmark -- --video /path/to/video.mp4 --fps 60 --max-frames 600 --seek-iterations 20 --output-csv /tmp/cap-playback-benchmark.csv + +# Add run label for cross-machine baseline/candidate grouping +cargo run -p cap-editor --example playback-benchmark -- --video /path/to/video.mp4 --fps 60 --max-frames 600 --output-csv /tmp/cap-playback-benchmark.csv --run-label windows-pass-1 + +# Summarize playback CSV rows grouped by run label and video +cargo run -p cap-editor --example playback-csv-report -- --csv /tmp/cap-playback-benchmark.csv + +# Compare baseline/candidate playback labels +cargo run -p cap-editor --example playback-csv-report -- --csv /tmp/cap-playback-benchmark.csv --baseline-label macos-pass-1 --candidate-label windows-pass-1 + +# Export playback summary/delta rows to CSV +cargo run -p cap-editor --example playback-csv-report -- --csv /tmp/cap-playback-benchmark.csv --baseline-label macos-pass-1 --candidate-label windows-pass-1 --output-csv /tmp/cap-playback-summary.csv +``` + +#### Scrub Burst Benchmark (queue stress) + +```bash +# Simulate rapid scrub bursts and track latest-request latency +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 --fps 60 --bursts 20 --burst-size 12 --sweep-seconds 2.0 + +# Increase sweep distance to exercise medium/long seek-distance buckets explicitly +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 --fps 60 --bursts 20 --burst-size 12 --sweep-seconds 8.0 + +# Aggregate multiple runs (median across runs) for lower-variance comparisons +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 --fps 60 --bursts 10 --burst-size 12 --sweep-seconds 2.0 --runs 3 + +# For even run counts, aggregate medians now average the two middle runs +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 --fps 60 --bursts 10 --burst-size 12 --sweep-seconds 2.0 --runs 2 + +# Runtime tuning for FFmpeg scrub supersession heuristic +CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_PIXELS=2000000 \ +CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_REQUESTS=7 \ +CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES=20 \ +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 + +# Tune latest-request-first activation thresholds independently +CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_REQUESTS=3 \ +CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_SPAN_FRAMES=30 \ +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 + +# Optionally gate latest-request-first ordering to higher-resolution streams +CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_PIXELS=2500000 \ +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 + +# Disable latest-request-first ordering for A/B comparisons +CAP_FFMPEG_SCRUB_LATEST_FIRST_DISABLED=1 \ +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 + +# Export per-run and aggregate scrub metrics to CSV +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 --runs 3 --output-csv /tmp/cap-scrub-benchmark.csv + +# Add explicit run label for cross-machine comparisons +cargo run -p cap-editor --example scrub-benchmark -- --video /path/to/video.mp4 --runs 3 --output-csv /tmp/cap-scrub-benchmark.csv --run-label windows-pass-1 + +# Summarize scrub CSV runs grouped by run label (includes short/medium/long seek p95 columns when present) +cargo run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-benchmark.csv + +# Compare two run labels directly +cargo run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-benchmark.csv --baseline-label macos-pass-1 --candidate-label windows-pass-1 + +# Export scrub summary/delta rows to CSV +cargo run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-benchmark.csv --baseline-label macos-pass-1 --candidate-label windows-pass-1 --output-csv /tmp/cap-scrub-summary.csv +``` + +#### Playback Startup Latency Report (log analysis) + +```bash +# Capture startup traces from desktop editor playback sessions +CAP_PLAYBACK_STARTUP_TRACE_FILE=/tmp/playback-startup.csv pnpm dev:desktop + +# Optional run label embedded in each CSV line +CAP_PLAYBACK_STARTUP_TRACE_FILE=/tmp/playback-startup.csv CAP_PLAYBACK_STARTUP_TRACE_RUN_ID=macos-pass-1 pnpm dev:desktop + +# Force legacy pre-rendered startup path for A/B startup comparisons +CAP_AUDIO_PRERENDER_ONLY=1 CAP_PLAYBACK_STARTUP_TRACE_FILE=/tmp/playback-startup.csv CAP_PLAYBACK_STARTUP_TRACE_RUN_ID=macos-prerender pnpm dev:desktop + +# Force streaming-only startup path (disables pre-render fallback) for startup comparisons +CAP_AUDIO_STREAMING_ONLY=1 CAP_PLAYBACK_STARTUP_TRACE_FILE=/tmp/playback-startup.csv CAP_PLAYBACK_STARTUP_TRACE_RUN_ID=macos-streaming-only pnpm dev:desktop + +# Parse startup timing logs captured from desktop editor sessions +cargo run -p cap-editor --example playback-startup-report -- --log /path/to/editor.log + +# Export startup metric summaries to CSV +cargo run -p cap-editor --example playback-startup-report -- --log /path/to/editor.log --output-csv /tmp/playback-startup-summary.csv + +# Filter startup CSV events to a specific labeled run id +cargo run -p cap-editor --example playback-startup-report -- --log /tmp/playback-startup.csv --run-id macos-pass-1 + +# List run-id sample counts discovered in startup CSV logs +cargo run -p cap-editor --example playback-startup-report -- --log /tmp/playback-startup.csv --list-runs + +# List per-run startup metric summaries (avg/p95 by callback, effective callback, path-selection events + audio startup mode classification) +cargo run -p cap-editor --example playback-startup-report -- --log /tmp/playback-startup.csv --list-run-metrics + +# Export run counts or run metrics to CSV (`run_metric_audio_path` rows include audio startup mode classification) +cargo run -p cap-editor --example playback-startup-report -- --log /tmp/playback-startup.csv --list-runs --output-csv /tmp/playback-startup-run-summary.csv +cargo run -p cap-editor --example playback-startup-report -- --log /tmp/playback-startup.csv --list-run-metrics --output-csv /tmp/playback-startup-run-summary.csv + +# Aggregate multiple session logs +cargo run -p cap-editor --example playback-startup-report -- --log /path/to/macos.log --log /path/to/windows.log + +# Compare candidate logs against baseline logs +cargo run -p cap-editor --example playback-startup-report -- --baseline-log /path/to/baseline.log --candidate-log /path/to/candidate.log + +# Compare specific labeled runs inside shared startup CSV traces +cargo run -p cap-editor --example playback-startup-report -- --baseline-log /tmp/playback-startup.csv --candidate-log /tmp/playback-startup.csv --baseline-run-id macos-pass-1 --candidate-run-id macos-pass-2 + +# Export baseline/candidate deltas to CSV +cargo run -p cap-editor --example playback-startup-report -- --baseline-log /tmp/playback-startup.csv --candidate-log /tmp/playback-startup.csv --baseline-run-id macos-pass-1 --candidate-run-id macos-pass-2 --output-csv /tmp/playback-startup-delta.csv ``` #### Combined Workflow (Recording → Playback) @@ -80,6 +226,1170 @@ cargo run -p cap-recording --example playback-test-runner -- full +### Benchmark Run: 2026-02-14 00:00:00 UTC (scrub even-run median aggregation fix) + +**Environment:** Linux runner, synthetic 1080p60 MP4 asset +**Change under test:** scrub benchmark median aggregation now uses true median for even run counts (average of middle two runs) + +#### Validation commands +- `cargo +1.88.0 test -p cap-editor --example scrub-benchmark --example scrub-csv-report` +- `cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 2 --burst-size 12 --sweep-seconds 2.0 --runs 2 --run-label linux-median-even-check --output-csv /tmp/cap-scrub-median-even.csv` + +#### Output validation highlights +- Per-run last-request averages: + - run 1: **172.02ms** + - run 2: **173.90ms** +- Reported aggregate last-request average: + - **172.96ms** +- Check: + - `(172.02 + 173.90) / 2 = 172.96` +- Result confirms even-run aggregation now reflects the midpoint between both runs rather than selecting only one run. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (latest-first min-pixels metadata capture) + +**Environment:** Linux runner, synthetic 1080p60 MP4 asset +**Change under test:** scrub CSV metadata now includes `CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_PIXELS` for unlabeled config-derived grouping + +#### Validation commands +- `CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_REQUESTS=3 CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_SPAN_FRAMES=30 CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_PIXELS=2500000 cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 2 --burst-size 12 --sweep-seconds 8.0 --runs 1 --output-csv /tmp/cap-scrub-latest-first-threshold-v2.csv` +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-latest-first-threshold-v2.csv` + +#### Output validation highlights +- `scrub-csv-report` config-derived label now includes `latest_first_min_pixels=2500000`: + - `cfg(...,latest_first_min_requests=3,latest_first_min_span=30,latest_first_min_pixels=2500000,latest_first=default)` +- Confirms CSV schema + parser stay aligned for latest-first threshold metadata without requiring explicit run labels. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (latest-first threshold metadata capture) + +**Environment:** Linux runner, synthetic 1080p60 MP4 asset +**Change under test:** scrub benchmark/report CSV schema now persists latest-first threshold env values and includes them in unlabeled config-derived run labels + +#### Validation commands +- `cargo +1.88.0 test -p cap-rendering decoder::ffmpeg::tests:: --lib` +- `cargo +1.88.0 test -p cap-editor --example scrub-benchmark --example scrub-csv-report` +- `CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_REQUESTS=3 CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_SPAN_FRAMES=30 cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 3 --burst-size 12 --sweep-seconds 8.0 --runs 2 --output-csv /tmp/cap-scrub-latest-first-threshold.csv` +- `CAP_FFMPEG_SCRUB_LATEST_FIRST_DISABLED=1 CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_REQUESTS=3 CAP_FFMPEG_SCRUB_LATEST_FIRST_MIN_SPAN_FRAMES=30 cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 3 --burst-size 12 --sweep-seconds 8.0 --runs 2 --output-csv /tmp/cap-scrub-latest-first-threshold.csv` +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-latest-first-threshold.csv` + +#### Output validation highlights +- Unlabeled rows now produce config-derived labels that include: + - `latest_first_min_requests=3` + - `latest_first_min_span=30` + - `latest_first={default|1}` +- This enables direct cross-platform grouping by latest-first threshold policy without custom run labels. +- Example parsed labels: + - `cfg(...,latest_first_min_requests=3,latest_first_min_span=30,latest_first=default)` + - `cfg(...,latest_first_min_requests=3,latest_first_min_span=30,latest_first=1)` + +### Benchmark Run: 2026-02-14 00:00:00 UTC (latest-request prioritization runtime toggle) + +**Environment:** Linux runner, synthetic 1080p60 MP4 asset +**Change under test:** runtime override `CAP_FFMPEG_SCRUB_LATEST_FIRST_DISABLED` for A/B validation of latest-request-first ordering in wide scrub bursts + +#### Validation commands +- `cargo +1.88.0 test -p cap-rendering decoder::ffmpeg::tests:: --lib` +- `cargo +1.88.0 test -p cap-editor --example scrub-benchmark --example scrub-csv-report` +- `cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 4 --burst-size 12 --sweep-seconds 8.0 --runs 3 --run-label linux-latest-first-toggle-enabled-r3 --output-csv /tmp/cap-scrub-distance-buckets.csv` +- `CAP_FFMPEG_SCRUB_LATEST_FIRST_DISABLED=1 cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 4 --burst-size 12 --sweep-seconds 8.0 --runs 3 --run-label linux-latest-first-toggle-disabled-r3 --output-csv /tmp/cap-scrub-distance-buckets.csv` +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-distance-buckets.csv --baseline-label linux-latest-first-toggle-disabled-r3 --candidate-label linux-latest-first-toggle-enabled-r3` + +#### Medium-seek comparison (`sweep_seconds=8.0`, runs=3, candidate - baseline) +- Baseline `linux-latest-first-toggle-disabled-r3`: + - all-request avg **224.09ms**, p95 **518.35ms** + - last-request avg **244.52ms**, p95 **518.35ms** + - medium bucket p95 **465.57ms** +- Candidate `linux-latest-first-toggle-enabled-r3`: + - all-request avg **142.84ms**, p95 **429.08ms** + - last-request avg **112.48ms**, p95 **429.08ms** + - medium bucket p95 **429.08ms** +- Deltas: + - all-request avg **-81.26ms** + - all-request p95 **-89.27ms** + - last-request avg **-132.03ms** + - last-request p95 **-89.27ms** + - medium bucket p95 **-36.49ms** + +#### Result +- Keep latest-request-first ordering enabled by default. +- Keep `CAP_FFMPEG_SCRUB_LATEST_FIRST_DISABLED` as an explicit runtime fallback for platform-specific A/B sweeps and regression triage. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (latest-request prioritization for non-collapsed scrub bursts) + +**Environment:** Linux runner, synthetic 1080p60 MP4 asset +**Change under test:** FFmpeg scrub queue now prioritizes the most recently requested frame first for wide-span pending bursts that do not collapse into single-request supersession + +#### Validation commands +- `cargo +1.88.0 test -p cap-rendering decoder::ffmpeg::tests:: --lib` +- `cargo +1.88.0 test -p cap-editor --example scrub-benchmark --example scrub-csv-report` +- `cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 4 --burst-size 12 --sweep-seconds 8.0 --runs 1 --run-label linux-latest-first-medium --output-csv /tmp/cap-scrub-distance-buckets.csv` +- `cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 6 --burst-size 12 --sweep-seconds 2.0 --runs 2 --run-label linux-latest-first-short --output-csv /tmp/cap-scrub-distance-buckets.csv` +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-distance-buckets.csv --baseline-label linux-distance-medium --candidate-label linux-latest-first-medium` +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-distance-buckets.csv --baseline-label linux-distance-metric --candidate-label linux-latest-first-short` + +#### Medium-seek profile comparison (`sweep_seconds=8.0`) +- Baseline label `linux-distance-medium`: + - all-request avg **231.84ms**, p95 **542.69ms** + - last-request avg **251.89ms**, p95 **542.69ms** + - medium bucket p95 **462.71ms** +- Candidate label `linux-latest-first-medium`: + - all-request avg **153.86ms**, p95 **449.43ms** + - last-request avg **117.32ms**, p95 **449.40ms** + - medium bucket p95 **449.70ms** +- Delta (`candidate - baseline`): + - all-request avg **-77.99ms** + - all-request p95 **-93.26ms** + - last-request avg **-134.57ms** + - last-request p95 **-93.29ms** + - medium bucket p95 **-13.01ms** + +#### Short-seek profile guardrail (`sweep_seconds=2.0`) +- Baseline label `linux-distance-metric` vs candidate label `linux-latest-first-short`: + - all-request avg delta **-1.95ms** + - all-request p95 delta **+0.49ms** + - last-request avg delta **-1.95ms** + - last-request p95 delta **+0.49ms** +- Result: short-seek profile remains effectively neutral while medium-seek burst responsiveness improves materially. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (scrub seek-distance bucket metrics) + +**Environment:** Linux runner, synthetic 1080p60 MP4 asset +**Change under test:** scrub benchmark/report now emit seek-distance bucket latency metrics (`short`, `medium`, `long`) in console and CSV summaries + +#### Validation commands +- `cargo +1.88.0 test -p cap-editor --example scrub-csv-report --example scrub-benchmark` +- `cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 6 --burst-size 12 --sweep-seconds 2.0 --runs 2 --run-label linux-distance-metric --output-csv /tmp/cap-scrub-distance-buckets.csv` +- `cargo +1.88.0 run -p cap-editor --example scrub-benchmark -- --video /tmp/cap-bench-1080p60.mp4 --fps 60 --bursts 4 --burst-size 12 --sweep-seconds 8.0 --runs 1 --run-label linux-distance-medium --output-csv /tmp/cap-scrub-distance-buckets.csv` +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-distance-buckets.csv --baseline-label linux-distance-metric --candidate-label linux-distance-medium` + +#### Scrub benchmark output highlights +- `linux-distance-metric` (`sweep_seconds=2.0`, runs=2): + - all-request avg **163.69ms**, p95 **434.06ms** + - seek buckets: + - short p95 **434.06ms** (134 successful) + - medium p95 **0.00ms** (0 successful) + - long p95 **434.06ms** (10 successful) +- `linux-distance-medium` (`sweep_seconds=8.0`, runs=1): + - all-request avg **231.84ms**, p95 **542.69ms** + - seek buckets: + - short p95 **542.69ms** (21 successful) + - medium p95 **462.71ms** (25 successful) + - long p95 **139.83ms** (2 successful) +- `scrub-csv-report` now prints and deltas short/medium/long seek p95 values: + - delta(short p95): **+108.63ms** + - delta(medium p95): **+462.71ms** + - delta(long p95): **-294.23ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (scrub CSV export) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark --runs 2 --output-csv /tmp/cap-scrub-benchmark.csv` + +#### Scrub Burst Benchmark + CSV — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Successful requests: **192**, failures: **0** +- Median across 2 runs (all-request): avg **191.35ms**, p95 **430.23ms**, p99 **430.23ms**, max **450.58ms** +- Median across 2 runs (last-request): avg **290.53ms**, p95 **450.58ms**, p99 **450.58ms**, max **450.58ms** + +#### Scrub Burst Benchmark + CSV — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Successful requests: **192**, failures: **0** +- Median across 2 runs (all-request): avg **740.11ms**, p95 **1712.02ms**, p99 **1712.02ms**, max **1712.03ms** +- Median across 2 runs (last-request): avg **740.10ms**, p95 **1712.02ms**, p99 **1712.02ms**, max **1712.02ms** + +#### CSV Output +- Output file: `/tmp/cap-scrub-benchmark.csv` +- Rows emitted per invocation: + - one row per run (`scope=run`) + - one aggregate row (`scope=aggregate`) +- Captures runtime supersession env values alongside scrub latency metrics for easier cross-machine sweeps. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (startup report run-id filters) + +**Environment:** Linux runner, startup report parser validation +**Commands:** `playback-startup-report --run-id`, `cargo test -p cap-editor --example playback-startup-report` + +#### Startup Report Parser Validation +- Unit tests: **6 passed**, including: + - CSV parse with and without run-id column + - run-id filtering of startup metrics from mixed-run CSV lines +- CLI smoke run: + - `cargo run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md --run-id sample-run` + - Completed successfully with filtered metric output path active. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (startup report run-id listing + strict filtering) + +**Environment:** Linux runner, startup report parser validation +**Commands:** `playback-startup-report --list-runs`, `playback-startup-report --run-id ...` + +#### Startup Report CLI Validation +- `--list-runs` mode prints grouped run-id sample counts from CSV traces. +- Requesting a `--run-id` with zero matched startup samples now exits with an explicit failure. +- Validation commands: + - `cargo run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md --list-runs` + - `cargo run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md --run-id missing-run` (expected non-zero exit) +- Unit tests remain green: `cargo test -p cap-editor --example playback-startup-report` (**6 passed**). + +### Benchmark Run: 2026-02-14 00:00:00 UTC (startup report CSV export) + +**Environment:** Linux runner, startup report parser validation +**Commands:** `playback-startup-report --output-csv`, `cargo test -p cap-editor --example playback-startup-report` + +#### Startup Report CSV Validation +- Added CSV export for: + - aggregate startup metrics (`mode=aggregate`) + - baseline/candidate deltas (`mode=delta`) +- Unit tests now cover CSV row emission and delta summarization (**8 passed**). +- CLI smoke run: + - `cargo run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md --output-csv /tmp/playback-startup-summary.csv` + - output CSV schema verified with header row. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (startup run-metrics listing) + +**Environment:** Linux runner, startup report parser validation +**Commands:** `playback-startup-report --list-run-metrics`, `cargo test -p cap-editor --example playback-startup-report` + +#### Validation +- Added `--list-run-metrics` mode to print per-run startup metric summaries (avg/p95/samples per event). +- Added CSV export support for `--list-runs` and `--list-run-metrics` modes. +- Unit tests now include run-metrics aggregation and run-mode CSV writer paths (**10 passed** total in example target). +- CLI smoke run: + - `cargo run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md --list-run-metrics` + - confirms mode execution path and empty-run handling output. + - `cargo run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md --list-runs --output-csv /tmp/playback-startup-run-export.csv` + - `cargo run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md --list-run-metrics --output-csv /tmp/playback-startup-run-export.csv` + - verified CSV header/output path is produced in no-run scenarios. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (supersession span retune to 20) + +**Environment:** Linux runner with synthetic 4k60 and 1080p60 MP4 assets +**Commands:** `scrub-benchmark --runs 3`, `playback-benchmark --seek-iterations 10`, `decode-benchmark --seek-iterations 10` +**Change under test:** default `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES` fallback changed from `25` to `20` + +#### 4k scrub span sweep before promoting new default +- Command family: + - `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES={15,20,25,30} scrub-benchmark --runs 3` +- Median last-request latency by span: + - **15**: avg **836.94ms**, p95 **1740.74ms** + - **20**: avg **814.93ms**, p95 **1743.49ms** + - **25**: avg **819.11ms**, p95 **1762.74ms** + - **30**: avg **923.18ms**, p95 **1947.86ms** +- Decision: promote span **20** as new default based on best median average and lower tail than span 25/30. + +#### Scrub Benchmark — default after retune (span=20) +- 4k60 (`/tmp/cap-bench-4k60.mp4`, runs=3): + - Median all-request: avg **832.56ms**, p95 **1732.40ms**, p99 **1732.40ms**, max **1732.41ms** + - Median last-request: avg **836.61ms**, p95 **1732.40ms**, p99 **1732.40ms**, max **1732.40ms** +- 1080p60 (`/tmp/cap-bench-1080p60.mp4`, runs=3): + - Median all-request: avg **222.58ms**, p95 **446.05ms**, p99 **472.21ms**, max **472.21ms** + - Median last-request: avg **326.36ms**, p95 **472.21ms**, p99 **472.21ms**, max **472.21ms** + +#### Regression checks after default retune +- Playback throughput: + - 1080p60: **60.24 fps**, missed deadlines **0**, decode p95 **2.24ms** + - 4k60: **60.18 fps**, missed deadlines **2**, decode p95 **9.67ms** +- Decode benchmark: + - 1080p random access avg **111.79ms**, p95 **337.65ms** + - 4k random access avg **509.26ms**, p95 **1451.87ms** +- Duplicate burst handling remained stable (0 failures for burst sizes 4/8/16). + +### Benchmark Run: 2026-02-14 00:00:00 UTC (supersession min-pixels retune to 2,000,000) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark --runs 3`, `playback-benchmark --seek-iterations 10`, `decode-benchmark --seek-iterations 10` +**Change under test:** default `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_PIXELS` fallback changed from `3_686_400` to `2_000_000` + +#### Min-pixels threshold sweep (with span=20, min_requests=8) +- Baseline (`min_pixels=3_686_400`): + - 1080p median last-request avg **332.72ms**, p95 **480.45ms** + - 4k median last-request avg **855.08ms**, p95 **1769.64ms** +- Candidate (`min_pixels=2_000_000`): + - 1080p median last-request avg **213.36ms**, p95 **449.62ms** + - 4k median last-request avg **814.28ms**, p95 **1716.14ms** +- Decision: promote `min_pixels=2_000_000` as new default; it materially improves 1080p scrub responsiveness while also tightening 4k tails. + +#### Scrub Benchmark — default after retune +- 1080p60 (`/tmp/cap-bench-1080p60.mp4`, runs=3): + - Median all-request: avg **199.10ms**, p95 **429.83ms**, p99 **429.83ms**, max **429.83ms** + - Median last-request: avg **200.14ms**, p95 **429.83ms**, p99 **429.83ms**, max **429.83ms** +- 4k60 (`/tmp/cap-bench-4k60.mp4`, runs=3): + - Median all-request: avg **829.97ms**, p95 **1718.54ms**, p99 **1718.55ms**, max **1718.55ms** + - Median last-request: avg **834.23ms**, p95 **1718.54ms**, p99 **1718.54ms**, max **1718.54ms** + +#### Regression checks after default retune +- Playback throughput: + - 1080p60: **60.23 fps**, missed deadlines **0**, decode p95 **2.29ms** + - 4k60: **60.19 fps**, missed deadlines **1**, decode p95 **7.72ms** +- Decode benchmark: + - 1080p random access avg **116.73ms**, p95 **369.84ms** + - 4k random access avg **522.27ms**, p95 **1514.02ms** + - follow-up 4k run: random access avg **537.60ms** and **522.27ms** (variance envelope maintained) + +### Benchmark Run: 2026-02-14 00:00:00 UTC (scrub CSV run-label tagging) + +**Environment:** Linux runner, synthetic 1080p60 MP4 +**Command:** `scrub-benchmark --runs 2 --output-csv /tmp/cap-scrub-labeled.csv --run-label linux-pass-a` + +#### Result +- Successful requests: **144**, failures: **0** +- Median all-request latency: avg **199.01ms**, p95 **410.34ms** +- Median last-request latency: avg **213.93ms**, p95 **410.34ms** +- CSV output now includes `run_label` column across run and aggregate rows, enabling direct cross-machine merge and grouping. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (supersession min-requests retune to 7) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark --runs 3`, `playback-benchmark --seek-iterations 10`, `decode-benchmark --seek-iterations 10` +**Change under test:** default `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_REQUESTS` fallback changed from `8` to `7` + +#### Min-requests threshold sweep (with span=20, min_pixels=2_000_000) +- Sequential sweep command family: + - `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_REQUESTS={6,7,8} scrub-benchmark --runs 3` +- 1080p median last-request latency: + - **6**: avg **209.99ms**, p95 **444.08ms** + - **7**: avg **211.36ms**, p95 **447.60ms** + - **8**: avg **209.11ms**, p95 **441.08ms** +- 4k median last-request latency: + - **6**: avg **827.29ms**, p95 **1707.63ms** + - **7**: avg **823.15ms**, p95 **1699.04ms** + - **8**: avg **884.74ms**, p95 **1837.32ms** +- Decision: promote `min_requests=7` as the best cross-resolution compromise, preserving 1080p performance while significantly improving 4k tails over `8`. + +#### Scrub Benchmark — default after retune (`min_requests=7`) +- 1080p60 (`/tmp/cap-bench-1080p60.mp4`, runs=3): + - Median all-request: avg **204.34ms**, p95 **432.90ms**, p99 **432.91ms**, max **432.91ms** + - Median last-request: avg **205.46ms**, p95 **432.90ms**, p99 **432.90ms**, max **432.90ms** +- 4k60 (`/tmp/cap-bench-4k60.mp4`, runs=3): + - Median all-request: avg **820.91ms**, p95 **1712.30ms**, p99 **1712.30ms**, max **1712.31ms** + - Median last-request: avg **825.01ms**, p95 **1712.30ms**, p99 **1712.30ms**, max **1712.30ms** + +#### Regression checks after default retune +- Playback throughput: + - 1080p60: **60.24 fps**, missed deadlines **0**, decode p95 **2.14ms** + - 4k60: **60.20 fps**, missed deadlines **0**, decode p95 **8.82ms** +- Decode benchmark: + - 1080p random access avg **115.49ms**, p95 **350.30ms** + - 4k random access avg **511.55ms**, p95 **1394.69ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (span threshold recheck after default retunes) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES={15,20,25} scrub-benchmark --runs 3` +**Context:** defaults already retuned to `min_requests=7`, `min_pixels=2_000_000` + +#### Span sweep medians (last-request latency) +- 1080p: + - **15**: avg **216.43ms**, p95 **457.45ms** + - **20**: avg **209.63ms**, p95 **442.04ms** + - **25**: avg **213.84ms**, p95 **447.71ms** +- 4k: + - **15**: avg **862.02ms**, p95 **1789.73ms** + - **20**: avg **860.43ms**, p95 **1761.25ms** + - **25**: avg **866.03ms**, p95 **1781.42ms** + +#### Decision +- Keep `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES` default at **20**. +- Candidate spans 15 and 25 were rejected; neither improved both 1080p and 4k tails versus 20 under the new defaults. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (fine span sweep 18/20/22, rejected span 22) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark --runs 3`, `scrub-csv-report --baseline-label span20 --candidate-label span22` + +#### Fine sweep medians (single-pass) +- 1080p: + - **18**: avg **303.40ms**, p95 **665.16ms** + - **20**: avg **214.65ms**, p95 **434.74ms** + - **22**: avg **210.83ms**, p95 **442.55ms** +- 4k: + - **18**: avg **897.87ms**, p95 **1891.21ms** + - **20**: avg **967.04ms**, p95 **1897.05ms** + - **22**: avg **829.73ms**, p95 **1714.74ms** + +#### Paired span20 vs span22 labeled sweep +- Using `/tmp/cap-scrub-span-20-22.csv` with run labels: + - 1080p delta (22-20): all_avg **-0.34ms**, all_p95 **+24.13ms**, last_avg **-0.15ms**, last_p95 **+24.13ms** + - 4k delta (22-20): all_avg **-64.97ms**, all_p95 **-227.95ms**, last_avg **-78.37ms**, last_p95 **-296.82ms** + +#### Validation pass on temporary default-22 branch state +- Scrub medians: + - 1080p last-request avg **203.87ms**, p95 **435.18ms** + - 4k last-request avg **847.32ms**, p95 **1797.10ms** +- Playback regression sample: + - 4k effective fps **60.14** with missed deadlines **4** +- Decode regression sample: + - 4k random access avg **511.57ms**, p95 **1456.64ms** + +#### Decision +- Rejected promoting span **22** as default due inconsistent 4k tail behavior across repeated runs and a noisier playback regression sample. +- Keep default `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES` at **20** for stability. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (scrub CSV report tooling) + +**Environment:** Linux runner, CSV analysis utility validation +**Commands:** `scrub-csv-report`, `cargo test -p cap-editor --example scrub-csv-report` + +#### Validation +- New utility parses scrub benchmark CSV aggregate rows and reports median summaries by run label + video. +- Empty run labels now automatically fall back to a derived config label (`min_pixels`, `min_requests`, `min_span`, `disabled`, `latest_first_min_requests`, `latest_first_min_span`, `latest_first_min_pixels`, `latest_first`) so unlabeled sweeps remain distinguishable. +- Smoke run against labeled CSV: + - `cargo run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-labeled.csv --label linux-pass-a` + - output summary: + - all_avg **199.01ms** + - last_avg **213.93ms** + - successful **144**, failed **0** +- Unit tests: **5 passed** (`parses_aggregate_csv_line`, `falls_back_to_config_label_when_run_label_missing`, `summarizes_medians`, `groups_rows_by_label_and_video`, `writes_summary_and_delta_csv_rows`). + +### Benchmark Run: 2026-02-14 00:00:00 UTC (scrub CSV report export) + +**Environment:** Linux runner, scrub CSV analysis utility validation +**Commands:** `scrub-csv-report --output-csv`, `cargo test -p cap-editor --example scrub-csv-report` + +#### Validation +- Added `--output-csv` to write summary and delta rows for downstream reporting. +- Smoke run: + - `cargo run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-span-20-22.csv --baseline-label span20 --candidate-label span22 --output-csv /tmp/cap-scrub-summary.csv` + - output CSV includes: + - summary rows per `(label, video)` + - delta rows per overlapping video +- Utility tests remain green (**5 passed**). + +### Benchmark Run: 2026-02-14 00:00:00 UTC (playback benchmark CSV export) + +**Environment:** Linux runner, synthetic 1080p60 MP4 +**Command:** `playback-benchmark --seek-iterations 10 --output-csv /tmp/cap-playback-benchmark.csv --run-label linux-pass-a` + +#### Result +- Sequential row: + - effective fps **60.23** + - decoded **240**, failed **0**, missed deadlines **0** + - decode avg/p95/p99/max: **1.36 / 2.57 / 4.27 / 5.25ms** +- Seek rows emitted for sampled distances: + - 0.5s avg/p95 **48.31 / 97.18ms** + - 1.0s avg/p95 **69.16 / 148.41ms** + - 2.0s avg/p95 **149.21 / 364.12ms** + - 5.0s avg/p95 **237.82 / 377.19ms** +- CSV output includes `mode=sequential` and `mode=seek` rows with shared run label for downstream aggregation. + +### Benchmark Run: 2026-02-14 00:00:00 UTC (playback CSV report tooling) + +**Environment:** Linux runner, playback CSV analysis utility validation +**Commands:** `playback-csv-report`, `cargo test -p cap-editor --example playback-csv-report` + +#### Validation +- New utility parses playback benchmark CSV sequential + seek rows and groups them by `(run_label, video)`. +- Reports median sequential metrics: + - effective FPS + - decode p95 + - missed deadlines +- Reports median seek metrics per distance: + - seek avg/p95/max + - aggregated seek sample/failure counts +- Supports baseline/candidate run-label deltas across overlapping videos and seek distances. +- Supports `--output-csv` for summary and delta row export. +- Smoke runs: + - `cargo run -p cap-editor --example playback-csv-report -- --csv /tmp/cap-playback-benchmark.csv --label linux-pass-a` + - `cargo run -p cap-editor --example playback-csv-report -- --csv /tmp/cap-playback-benchmark.csv --baseline-label linux-pass-a --candidate-label linux-pass-b --output-csv /tmp/cap-playback-summary.csv` +- Unit tests: **5 passed** (`parses_sequential_csv_line`, `parses_seek_csv_line`, `summarizes_sequential_and_seek_medians`, `groups_rows_by_label_and_video`, `writes_summary_and_delta_csv_rows`). + +### Benchmark Run: 2026-02-14 00:00:00 UTC (rejected adaptive FFmpeg seek-window scaling) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark --seek-iterations 10`, `playback-benchmark --seek-iterations 10` +**Change under test:** adaptive preferred seek window scaling based on forward seek distance in `cap-video-decode` FFmpeg reset path + +#### Decode benchmark results +- 1080p: + - seek avg/p95: + - 0.5s: **47.42 / 93.91ms** + - 1.0s: **72.91 / 158.23ms** + - 2.0s: **163.64 / 370.53ms** + - 5.0s: **232.74 / 371.60ms** + - random access avg/p95: **115.74 / 343.67ms** +- 4k: + - seek avg/p95: + - 0.5s: **191.74 / 382.95ms** + - 1.0s: **322.25 / 621.32ms** + - 2.0s: **606.82 / 1445.27ms** + - 5.0s: **1068.25 / 1734.44ms** + - random access avg/p95: **486.56 / 1407.30ms** + +#### Playback throughput regression checks +- 1080p: **60.23 fps**, missed deadlines **0** +- 4k: **60.15 fps**, missed deadlines **1** +- 4k seek avg/p95: + - 0.5s: **208.79 / 365.07ms** + - 1.0s: **362.89 / 734.88ms** + - 2.0s: **621.30 / 1482.17ms** + - 5.0s: **1007.51 / 1663.69ms** + +#### Decision +- Rejected. While throughput remained ~60fps, long-distance seek tails regressed compared with the current default seek-window profile. + +### Benchmark Run: 2026-02-14 00:00:00 UTC + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark` and `playback-benchmark` + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **6.09ms** +- Sequential decode: **401.9 fps**, avg **2.49ms**, p95 **~2.34ms** +- Seek latency: 0.5s **1.88ms**, 1.0s **1.83ms**, 2.0s **260.87ms**, 5.0s **102.36ms** +- Random access: avg **223.27ms**, p95 **398.42ms**, p99 **443.68ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **28.65ms** +- Sequential decode: **99.4 fps**, avg **10.06ms**, p95 **~8.35ms** +- Seek latency: 0.5s **6.61ms**, 1.0s **6.73ms**, 2.0s **905.03ms**, 5.0s **442.71ms** +- Random access: avg **918.05ms**, p95 **1620.94ms**, p99 **2084.36ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.11** +- Decode: avg **1.23ms**, p95 **2.34ms**, p99 **2.44ms**, max **4.76ms** +- Seek samples: 0.5s **104.51ms**, 1.0s **90.83ms**, 2.0s **144.89ms**, 5.0s **98.70ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **2** +- Effective FPS: **60.11** +- Decode: avg **5.54ms**, p95 **8.35ms**, p99 **12.69ms**, max **17.10ms** +- Seek samples: 0.5s **266.92ms**, 1.0s **306.19ms**, 2.0s **570.41ms**, 5.0s **442.48ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (FFmpeg seek reset tuning) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark` and `playback-benchmark` +**Change under test:** FFmpeg decoder reset now uses forward seek window before fallback seek + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **6.58ms** +- Sequential decode: **367.9 fps**, avg **2.72ms** +- Seek latency: 0.5s **1.88ms**, 1.0s **1.73ms**, 2.0s **5.26ms**, 5.0s **115.42ms** +- Random access: avg **120.87ms**, p95 **366.01ms**, p99 **391.53ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **32.65ms** +- Sequential decode: **88.0 fps**, avg **11.36ms** +- Seek latency: 0.5s **7.52ms**, 1.0s **7.76ms**, 2.0s **12.65ms**, 5.0s **679.52ms** +- Random access: avg **533.65ms**, p95 **1520.65ms**, p99 **1636.44ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.11** +- Decode: avg **1.33ms**, p95 **2.45ms**, p99 **2.51ms**, max **3.99ms** +- Seek samples: 0.5s **11.89ms**, 1.0s **2.71ms**, 2.0s **2.81ms**, 5.0s **138.26ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **1** +- Effective FPS: **60.11** +- Decode: avg **5.41ms**, p95 **7.93ms**, p99 **11.18ms**, max **18.70ms** +- Seek samples: 0.5s **30.06ms**, 1.0s **9.43ms**, 2.0s **9.15ms**, 5.0s **432.97ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (FFmpeg long-seek tuning pass 2) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark` and `playback-benchmark` +**Change under test:** narrower backtrack window for forward seeks with near-target keyframe preference + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **6.18ms** +- Sequential decode: **403.6 fps**, avg **2.48ms** +- Seek latency: 0.5s **1.78ms**, 1.0s **1.79ms**, 2.0s **7.05ms**, 5.0s **142.01ms** +- Random access: avg **114.64ms**, p95 **351.09ms**, p99 **378.21ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **29.37ms** +- Sequential decode: **105.9 fps**, avg **9.44ms** +- Seek latency: 0.5s **6.50ms**, 1.0s **6.53ms**, 2.0s **11.20ms**, 5.0s **559.44ms** +- Random access: avg **525.90ms**, p95 **1489.77ms**, p99 **1628.36ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.11** +- Decode: avg **1.21ms**, p95 **2.26ms**, p99 **2.35ms**, max **4.11ms** +- Seek samples: 0.5s **11.39ms**, 1.0s **2.75ms**, 2.0s **2.55ms**, 5.0s **138.90ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **1** +- Effective FPS: **60.11** +- Decode: avg **4.76ms**, p95 **7.41ms**, p99 **9.82ms**, max **15.94ms** +- Seek samples: 0.5s **29.80ms**, 1.0s **9.01ms**, 2.0s **8.80ms**, 5.0s **410.35ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (FFmpeg long-seek tuning pass 3) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark` and `playback-benchmark` +**Change under test:** seek fallback order adjusted (preferred -> legacy backward -> wide window) + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **5.91ms** +- Sequential decode: **393.7 fps**, avg **2.54ms** +- Seek latency: 0.5s **2.04ms**, 1.0s **1.71ms**, 2.0s **4.61ms**, 5.0s **110.27ms** +- Random access: avg **119.53ms**, p95 **364.02ms**, p99 **404.91ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **29.08ms** +- Sequential decode: **104.1 fps**, avg **9.60ms** +- Seek latency: 0.5s **6.72ms**, 1.0s **6.76ms**, 2.0s **11.48ms**, 5.0s **569.83ms** +- Random access: avg **516.48ms**, p95 **1505.44ms**, p99 **1566.39ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.11** +- Decode: avg **1.27ms**, p95 **2.33ms**, p99 **2.42ms**, max **3.74ms** +- Seek samples: 0.5s **12.01ms**, 1.0s **2.68ms**, 2.0s **2.80ms**, 5.0s **144.54ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.12** +- Decode: avg **4.95ms**, p95 **7.57ms**, p99 **10.04ms**, max **14.18ms** +- Seek samples: 0.5s **30.56ms**, 1.0s **9.45ms**, 2.0s **8.94ms**, 5.0s **430.25ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Startup instrumentation pass) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `playback-benchmark` +**Change under test:** startup timeline instrumentation for first decoded frame, first rendered frame, and audio callback origin aligned to playback start + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.11** +- Decode: avg **1.28ms**, p95 **2.51ms**, p99 **2.63ms**, max **4.70ms** +- Seek samples: 0.5s **14.63ms**, 1.0s **2.68ms**, 2.0s **2.87ms**, 5.0s **145.33ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **480/480**, failures **0** +- Missed deadlines: **1** +- Effective FPS: **60.11** +- Decode: avg **5.54ms**, p95 **8.09ms**, p99 **11.25ms**, max **15.17ms** +- Seek samples: 0.5s **41.73ms**, 1.0s **9.75ms**, 2.0s **8.98ms**, 5.0s **451.74ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Seek benchmark methodology hardening) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark` and `playback-benchmark` with `--seek-iterations 10` +**Change under test:** benchmark seek sampling now uses varied start positions per iteration and reports avg/p95/max tails + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **6.93ms** +- Sequential decode: **393.9 fps**, avg **2.54ms** +- Seek latency (avg / p95 / max): + - 0.5s: **47.25 / 92.23 / 92.23ms** + - 1.0s: **69.24 / 144.81 / 144.81ms** + - 2.0s: **151.47 / 375.69 / 375.69ms** + - 5.0s: **237.30 / 379.66 / 379.66ms** +- Random access: avg **115.46ms**, p95 **351.75ms**, p99 **386.64ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **30.88ms** +- Sequential decode: **100.4 fps**, avg **9.96ms** +- Seek latency (avg / p95 / max): + - 0.5s: **195.41 / 369.35 / 369.35ms** + - 1.0s: **333.83 / 671.86 / 671.86ms** + - 2.0s: **584.19 / 1421.40 / 1421.40ms** + - 5.0s: **925.07 / 1474.59 / 1474.59ms** +- Random access: avg **539.69ms**, p95 **1467.07ms**, p99 **1667.76ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.24** +- Decode: avg **1.17ms**, p95 **2.22ms**, p99 **2.61ms**, max **3.71ms** +- Seek latency (avg / p95 / max): + - 0.5s: **47.74 / 104.77 / 104.77ms** + - 1.0s: **68.99 / 142.64 / 142.64ms** + - 2.0s: **155.51 / 367.99 / 367.99ms** + - 5.0s: **231.63 / 372.21 / 372.21ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.13** +- Decode: avg **5.13ms**, p95 **7.60ms**, p99 **11.15ms**, max **12.78ms** +- Seek latency (avg / p95 / max): + - 0.5s: **202.75 / 361.23 / 361.23ms** + - 1.0s: **320.26 / 617.03 / 617.03ms** + - 2.0s: **589.11 / 1424.54 / 1424.54ms** + - 5.0s: **926.16 / 1460.47 / 1460.47ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Decoder duplicate-request coalescing) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark` and `playback-benchmark` with `--seek-iterations 10` +**Change under test:** FFmpeg decoder request batches now coalesce same-frame requests into a single decode result fan-out + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **6.80ms** +- Sequential decode: **385.6 fps**, avg **2.59ms** +- Seek latency (avg / p95 / max): + - 0.5s: **46.92 / 89.95 / 89.95ms** + - 1.0s: **70.08 / 147.40 / 147.40ms** + - 2.0s: **153.93 / 373.48 / 373.48ms** + - 5.0s: **251.75 / 419.44 / 419.44ms** +- Random access: avg **125.70ms**, p95 **376.36ms**, p99 **426.63ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **30.79ms** +- Sequential decode: **103.4 fps**, avg **9.67ms** +- Seek latency (avg / p95 / max): + - 0.5s: **197.39 / 395.30 / 395.30ms** + - 1.0s: **351.40 / 730.65 / 730.65ms** + - 2.0s: **613.21 / 1398.75 / 1398.75ms** + - 5.0s: **900.60 / 1467.33 / 1467.33ms** +- Random access: avg **517.34ms**, p95 **1493.69ms**, p99 **1622.08ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.24** +- Decode: avg **1.21ms**, p95 **2.14ms**, p99 **2.23ms**, max **3.63ms** +- Seek latency (avg / p95 / max): + - 0.5s: **46.02 / 92.97 / 92.97ms** + - 1.0s: **68.15 / 142.22 / 142.22ms** + - 2.0s: **146.18 / 356.46 / 356.46ms** + - 5.0s: **232.73 / 379.79 / 379.79ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.20** +- Decode: avg **4.81ms**, p95 **7.59ms**, p99 **12.31ms**, max **13.54ms** +- Seek latency (avg / p95 / max): + - 0.5s: **201.18 / 362.15 / 362.15ms** + - 1.0s: **332.09 / 662.63 / 662.63ms** + - 2.0s: **584.79 / 1411.56 / 1411.56ms** + - 5.0s: **1012.17 / 1722.61 / 1722.61ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Duplicate burst metric stabilization) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `decode-benchmark --seek-iterations 10` +**Change under test:** duplicate-request burst benchmark now includes warmup seek to remove first-request cold-start distortion + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **7.31ms** +- Sequential decode: **392.4 fps**, avg **2.55ms** +- Seek latency (avg / p95 / max): + - 0.5s: **45.99 / 87.99 / 87.99ms** + - 1.0s: **69.52 / 146.76 / 146.76ms** + - 2.0s: **148.12 / 359.00 / 359.00ms** + - 5.0s: **231.81 / 375.66 / 375.66ms** +- Random access: avg **115.46ms**, p95 **352.45ms**, p99 **378.86ms** +- Duplicate burst batch avg / p95: + - burst 4: **3.68 / 3.84ms** + - burst 8: **3.68 / 3.74ms** + - burst 16: **2.33 / 3.69ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **30.03ms** +- Sequential decode: **94.3 fps**, avg **10.61ms** +- Seek latency (avg / p95 / max): + - 0.5s: **188.28 / 356.06 / 356.06ms** + - 1.0s: **337.66 / 681.87 / 681.87ms** + - 2.0s: **635.27 / 1455.41 / 1455.41ms** + - 5.0s: **922.75 / 1510.31 / 1510.31ms** +- Random access: avg **527.08ms**, p95 **1481.91ms**, p99 **1649.11ms** +- Duplicate burst batch avg / p95: + - burst 4: **21.25 / 21.98ms** + - burst 8: **21.76 / 21.95ms** + - burst 16: **16.89 / 21.72ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Scrub burst queue stress baseline) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Command:** `scrub-benchmark --bursts 20 --burst-size 12 --sweep-seconds 2.0` +**Goal:** measure latest-request latency under rapid scrub-like request bursts + +#### Scrub Burst Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **217.97ms**, p95 **434.83ms**, p99 **455.72ms**, max **461.85ms** +- Last-request-in-burst latency: avg **312.50ms**, p95 **455.72ms**, p99 **461.85ms**, max **461.85ms** + +#### Scrub Burst Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **1071.64ms**, p95 **2098.98ms**, p99 **2204.29ms**, max **2204.29ms** +- Last-request-in-burst latency: avg **1524.00ms**, p95 **2116.35ms**, p99 **2204.29ms**, max **2204.29ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Scrub supersession heuristic pass) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark`, `decode-benchmark`, `playback-benchmark` +**Change under test:** decoder batch supersession for large-span burst queues (keeps newest request as primary target) + +#### Scrub Burst Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **204.53ms**, p95 **452.60ms**, p99 **622.10ms**, max **622.10ms** +- Last-request-in-burst latency: avg **221.18ms**, p95 **528.20ms**, p99 **622.09ms**, max **622.09ms** + +#### Scrub Burst Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **833.64ms**, p95 **1888.52ms**, p99 **1941.42ms**, max **1954.14ms** +- Last-request-in-burst latency: avg **869.99ms**, p95 **1941.42ms**, p99 **1954.14ms**, max **1954.14ms** + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **7.45ms** +- Sequential decode: **389.5 fps**, avg **2.57ms** +- Seek latency (avg / p95 / max): + - 0.5s: **47.39 / 87.98 / 87.98ms** + - 1.0s: **70.93 / 147.39 / 147.39ms** + - 2.0s: **149.20 / 359.46 / 359.46ms** + - 5.0s: **238.28 / 400.59 / 400.59ms** +- Random access: avg **115.15ms**, p95 **355.59ms**, p99 **371.61ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **30.67ms** +- Sequential decode: **98.4 fps**, avg **10.16ms** +- Seek latency (avg / p95 / max): + - 0.5s: **191.23 / 344.32 / 344.32ms** + - 1.0s: **320.28 / 634.08 / 634.08ms** + - 2.0s: **577.92 / 1399.73 / 1399.73ms** + - 5.0s: **992.08 / 1635.12 / 1635.12ms** +- Random access: avg **500.44ms**, p95 **1480.01ms**, p99 **1531.96ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.23** +- Decode: avg **1.41ms**, p95 **2.51ms**, p99 **2.57ms**, max **4.27ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **1** +- Effective FPS: **60.16** +- Decode: avg **6.40ms**, p95 **8.65ms**, p99 **13.10ms**, max **18.91ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Scrub supersession pass 2: resolution-gated) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark`, `decode-benchmark`, `playback-benchmark` +**Change under test:** supersession heuristic enabled only for higher-resolution streams (`>= 2560x1440`) + +#### Scrub Burst Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **206.84ms**, p95 **409.20ms**, p99 **424.00ms**, max **436.97ms** +- Last-request-in-burst latency: avg **297.67ms**, p95 **427.05ms**, p99 **436.97ms**, max **436.97ms** + +#### Scrub Burst Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **820.24ms**, p95 **1689.13ms**, p99 **1828.91ms**, max **1828.91ms** +- Last-request-in-burst latency: avg **863.94ms**, p95 **1689.13ms**, p99 **1828.91ms**, max **1828.91ms** + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **6.69ms** +- Sequential decode: **414.7 fps**, avg **2.41ms** +- Seek latency (avg / p95 / max): + - 0.5s: **45.48 / 89.37 / 89.37ms** + - 1.0s: **69.15 / 144.09 / 144.09ms** + - 2.0s: **148.41 / 358.91 / 358.91ms** + - 5.0s: **231.79 / 377.04 / 377.04ms** +- Random access: avg **116.19ms**, p95 **350.22ms**, p99 **379.83ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **29.79ms** +- Sequential decode: **105.4 fps**, avg **9.49ms** +- Seek latency (avg / p95 / max): + - 0.5s: **189.31 / 354.05 / 354.05ms** + - 1.0s: **336.64 / 710.24 / 710.24ms** + - 2.0s: **589.34 / 1393.35 / 1393.35ms** + - 5.0s: **898.27 / 1479.23 / 1479.23ms** +- Random access: avg **511.68ms**, p95 **1497.14ms**, p99 **1611.62ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.23** +- Decode: avg **1.20ms**, p95 **2.13ms**, p99 **3.09ms**, max **4.08ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.19** +- Decode: avg **4.99ms**, p95 **7.17ms**, p99 **9.64ms**, max **13.37ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Scrub supersession runtime controls) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark`, `decode-benchmark`, `playback-benchmark` +**Change under test:** FFmpeg scrub supersession thresholds moved to env-configurable runtime controls + +#### Scrub Burst Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **211.38ms**, p95 **417.65ms**, p99 **435.23ms**, max **454.51ms** +- Last-request-in-burst latency: avg **303.76ms**, p95 **435.23ms**, p99 **454.51ms**, max **454.51ms** + +#### Scrub Burst Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Requests: **240 success / 0 failures** +- All-request latency: avg **812.11ms**, p95 **1767.50ms**, p99 **1822.52ms**, max **1822.52ms** +- Last-request-in-burst latency: avg **820.99ms**, p95 **1767.50ms**, p99 **1822.52ms**, max **1822.52ms** + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **6.64ms** +- Sequential decode: **335.5 fps**, avg **2.98ms** +- Seek latency (avg / p95 / max): + - 0.5s: **48.41 / 96.68 / 96.68ms** + - 1.0s: **71.81 / 151.73 / 151.73ms** + - 2.0s: **152.21 / 372.41 / 372.41ms** + - 5.0s: **233.93 / 388.51 / 388.51ms** +- Random access: avg **115.07ms**, p95 **354.67ms**, p99 **399.31ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **32.18ms** +- Sequential decode: **98.7 fps**, avg **10.13ms** +- Seek latency (avg / p95 / max): + - 0.5s: **201.24 / 387.51 / 387.51ms** + - 1.0s: **347.03 / 774.83 / 774.83ms** + - 2.0s: **623.25 / 1499.39 / 1499.39ms** + - 5.0s: **961.84 / 1629.35 / 1629.35ms** +- Random access: avg **524.19ms**, p95 **1485.61ms**, p99 **1619.96ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.22** +- Decode: avg **1.33ms**, p95 **2.49ms**, p99 **2.80ms**, max **3.90ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **2** +- Effective FPS: **60.17** +- Decode: avg **6.43ms**, p95 **8.82ms**, p99 **14.14ms**, max **17.52ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Scrub multi-run aggregation support) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Command:** `scrub-benchmark --bursts 10 --burst-size 12 --sweep-seconds 2.0 --runs 3` +**Change under test:** scrub benchmark now supports repeated runs with median aggregation + +#### Scrub Burst Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Runs: **3**, requests: **360 success / 0 failures** +- Per-run last-request averages: **303.69ms**, **284.95ms**, **310.89ms** +- Median all-request latency: avg **210.56ms**, p95 **429.62ms**, p99 **442.55ms**, max **457.71ms** +- Median last-request latency: avg **303.69ms**, p95 **457.71ms**, p99 **457.71ms**, max **457.71ms** + +#### Scrub Burst Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Runs: **3**, requests: **360 success / 0 failures** +- Per-run last-request averages: **963.69ms**, **887.58ms**, **1001.96ms** +- Median all-request latency: avg **957.47ms**, p95 **2087.13ms**, p99 **2087.15ms**, max **2087.15ms** +- Median last-request latency: avg **963.69ms**, p95 **2087.13ms**, p99 **2087.13ms**, max **2087.13ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Supersession default span set to 25) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Commands:** `scrub-benchmark --runs 3`, `decode-benchmark --seek-iterations 10`, `playback-benchmark --seek-iterations 10` +**Change under test:** default supersession span threshold reduced from 45 to 25 frames + +#### Scrub Burst Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Runs: **3**, requests: **360 success / 0 failures** +- Per-run last-request averages: **304.93ms**, **294.07ms**, **293.85ms** +- Median all-request latency: avg **202.60ms**, p95 **425.68ms**, p99 **450.24ms**, max **455.69ms** +- Median last-request latency: avg **294.07ms**, p95 **455.69ms**, p99 **455.69ms**, max **455.69ms** + +#### Scrub Burst Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Runs: **3**, requests: **360 success / 0 failures** +- Per-run last-request averages: **1008.68ms**, **808.71ms**, **805.92ms** +- Median all-request latency: avg **804.50ms**, p95 **1694.01ms**, p99 **1694.02ms**, max **1694.02ms** +- Median last-request latency: avg **808.71ms**, p95 **1694.01ms**, p99 **1694.01ms**, max **1694.01ms** + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **7.32ms** +- Sequential decode: **375.7 fps**, avg **2.66ms** +- Seek latency (avg / p95 / max): + - 0.5s: **47.99 / 96.34 / 96.34ms** + - 1.0s: **69.90 / 147.03 / 147.03ms** + - 2.0s: **152.95 / 364.03 / 364.03ms** + - 5.0s: **236.14 / 385.37 / 385.37ms** +- Random access: avg **117.85ms**, p95 **367.79ms**, p99 **376.78ms** + +#### Decode Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Decoder init: **35.38ms** +- Sequential decode: **95.5 fps**, avg **10.47ms** +- Seek latency (avg / p95 / max): + - 0.5s: **201.57 / 395.76 / 395.76ms** + - 1.0s: **323.73 / 627.27 / 627.27ms** + - 2.0s: **607.72 / 1500.76 / 1500.76ms** + - 5.0s: **932.14 / 1463.20 / 1463.20ms** +- Random access: avg **539.60ms**, p95 **1516.95ms**, p99 **1707.36ms** + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.22** +- Decode: avg **1.40ms**, p95 **2.51ms**, p99 **2.89ms**, max **4.27ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Effective FPS: **60.18** +- Decode: avg **5.02ms**, p95 **7.18ms**, p99 **11.55ms**, max **15.85ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Decode benchmark CSV export validation) + +**Environment:** Linux runner with synthetic 1080p60 MP4 asset +**Command:** `decode-benchmark --fps 60 --iterations 3 --seek-iterations 2 --output-csv /tmp/cap-decode-benchmark-v2.csv --run-label linux-frame-order-pass-v2` +**Change under test:** decode benchmark CSV export + run-label plumbing + +#### Decode Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Decoder init: **10.28ms** +- Sequential decode: **377.11 fps**, avg **2.65ms**, p95 **3.53ms**, p99 **4.85ms** +- Seek latency (avg / p95 / max): + - 0.5s: **41.52 / 81.16 / 81.16ms** + - 1.0s: **16.61 / 29.90 / 29.90ms** + - 2.0s: **191.45 / 213.25 / 213.25ms** + - 5.0s: **147.94 / 295.55 / 295.55ms** +- Random access: avg **118.94ms**, p95 **355.69ms**, p99 **376.19ms** +- Duplicate burst (batch avg / request avg): + - 4: **3.68 / 3.66ms** + - 8: **4.99 / 4.97ms** + - 16: **4.12 / 4.09ms** +- CSV rows written for modes: + - `decoder_creation` + - `sequential` + - `seek` + - `random_access` + - `duplicate_batch` + - `duplicate_request` + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Decode CSV report baseline/candidate validation) + +**Environment:** Linux runner with synthetic 1080p60 MP4 asset +**Commands:** `decode-benchmark --output-csv /tmp/cap-decode-benchmark-v2.csv --run-label linux-frame-order-pass-v2`, `decode-benchmark --output-csv /tmp/cap-decode-benchmark-v2.csv --run-label linux-frame-order-pass-v2b`, `decode-csv-report --baseline-label linux-frame-order-pass-v2 --candidate-label linux-frame-order-pass-v2b --output-csv /tmp/cap-decode-summary-v2.csv` +**Change under test:** decode CSV reporting utility for grouped summaries and deltas + +#### Decode CSV Report — Summary (1080p60) +- `linux-frame-order-pass-v2`: + - Decoder creation: **10.28ms** + - Sequential FPS: **377.11** + - Sequential decode p95: **3.53ms** + - Random access avg/p95: **118.94 / 355.69ms** +- `linux-frame-order-pass-v2b`: + - Decoder creation: **9.18ms** + - Sequential FPS: **378.80** + - Sequential decode p95: **3.42ms** + - Random access avg/p95: **116.85 / 354.24ms** + +#### Decode CSV Report — Delta (`v2b - v2`) +- Core: + - Decoder creation: **-1.10ms** + - Sequential FPS: **+1.69** + - Sequential decode p95: **-0.11ms** + - Random access avg: **-2.09ms** + - Random access p95: **-1.45ms** +- Seek deltas (avg / p95 / p99 / max): + - 0.5s: **+6.47 / +13.00 / +13.00 / +13.00ms** + - 1.0s: **+2.94 / +5.31 / +5.31 / +5.31ms** + - 2.0s: **+1.84 / +1.49 / +1.49 / +1.49ms** + - 5.0s: **-6.11 / -11.93 / -11.93 / -11.93ms** + +#### CSV Artifacts +- Source decode rows: `/tmp/cap-decode-benchmark-v2.csv` +- Report summary/delta rows: `/tmp/cap-decode-summary-v2.csv` +- Report modes emitted: + - `summary_core` + - `summary_seek` + - `summary_duplicate` + - `delta_core` + - `delta_seek` + - `delta_duplicate` + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Audio streaming-first windows path prep) + +**Environment:** Linux runner with synthetic 1080p60 and 4k60 MP4 assets +**Command:** `playback-benchmark --fps 60 --max-frames 240 --seek-iterations 8` +**Change under test:** audio playback startup path now attempts streaming on all platforms with pre-rendered fallback (`CAP_AUDIO_PRERENDER_ONLY` override) + +#### Playback Throughput Benchmark — 1080p60 (`/tmp/cap-bench-1080p60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **0** +- Effective FPS: **60.23** +- Decode: avg **1.37ms**, p95 **2.52ms**, p99 **2.61ms**, max **4.34ms** +- Seek latency (avg / p95 / max): + - 0.5s: **42.38 / 94.80 / 94.80ms** + - 1.0s: **64.87 / 147.85 / 147.85ms** + - 2.0s: **124.91 / 308.52 / 308.52ms** + - 5.0s: **223.96 / 354.15 / 354.15ms** + +#### Playback Throughput Benchmark — 4k60 (`/tmp/cap-bench-4k60.mp4`) +- Target: **60 fps**, budget **16.67ms** +- Decoded: **240/240**, failures **0** +- Missed deadlines: **11** +- Effective FPS: **60.17** +- Decode: avg **6.95ms**, p95 **12.62ms**, p99 **23.18ms**, max **31.66ms** +- Seek latency (avg / p95 / max): + - 0.5s: **177.49 / 356.81 / 356.81ms** + - 1.0s: **303.60 / 632.02 / 632.02ms** + - 2.0s: **557.40 / 1412.22 / 1412.22ms** + - 5.0s: **1070.92 / 1530.27 / 1530.27ms** + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Startup CSV structured audio path columns) + +**Environment:** Linux runner, startup report parser validation +**Commands:** `playback-startup-report --list-run-metrics --output-csv`, `playback-startup-report --baseline-run-id ... --candidate-run-id ... --output-csv` +**Change under test:** startup report CSV now emits structured audio path columns for aggregate/run-metrics/delta outputs + +#### Validation Dataset +- Source log: `/workspace/tmp-startup-sample.csv` (baseline stream callback, candidate prerender callback) +- Export target: `/tmp/playback-startup-run-export-v2.csv` + +#### Results +- Run metrics output now includes: + - `run_metric_audio_path` rows + - `audio_path`, `audio_stream_samples`, `audio_prerender_samples` columns +- Delta output now includes: + - `delta_audio_path` rows + - baseline + candidate audio path columns: + - `audio_path` / `candidate_audio_path` + - `audio_stream_samples` / `candidate_audio_stream_samples` + - `audio_prerender_samples` / `candidate_audio_prerender_samples` +- Example delta audio path row: + - baseline: `streaming (1 stream / 0 prerender)` + - candidate: `prerendered (0 stream / 1 prerender)` + +### Benchmark Run: 2026-02-14 00:00:00 UTC (Startup path selection events) + +**Environment:** Linux runner, startup report parser validation +**Commands:** `playback-startup-report --list-run-metrics --output-csv` +**Change under test:** startup report now consumes `audio_startup_path_streaming` / `audio_startup_path_prerendered` events even when callback events are missing + +#### Validation Dataset +- Source log: `/workspace/tmp-startup-path-only.csv` (decoded/rendered + `audio_startup_path_prerendered`, no callback rows) +- Export target: `/tmp/playback-startup-path-only.csv` + +#### Results +- `list-run-metrics` output classified run as: + - `audio_path=prerendered stream_samples=0 prerender_samples=1` +- CSV includes `run_metric_audio_path` row with: + - `audio_path=prerendered` + - `audio_stream_samples=0` + - `audio_prerender_samples=1` + --- diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d30b940f21..78e36d3f49 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -72,13 +72,17 @@ ### Active Work Items *(Update this section as you work)* -- [ ] **Test fragmented mode** - Run playback tests on fragmented recordings -- [ ] **Investigate display decoder init time** - 337ms may be optimizable +- [ ] **Capture audio startup latency before/after** - Use new playback log metrics (`Audio streaming callback started`) to validate startup on macOS/Windows +- [ ] **Tune medium/long seek latency** - Reduce 2s+ seek spikes visible in decode and playback benchmarks +- [ ] **Capture scrub benchmark CSV sweeps on macOS/Windows** - Use `--output-csv` plus supersession env values for side-by-side threshold comparisons +- [ ] **Run full desktop editor validation on macOS + Windows** - Confirm in-app FPS and A/V behavior on target platforms ### Completed - [x] **Run initial baseline** - Established current playback performance metrics (2026-01-28) - [x] **Profile decoder init time** - Hardware acceleration confirmed (AVAssetReader) (2026-01-28) - [x] **Identify latency hotspots** - No issues found, p95=3.1ms (2026-01-28) +- [x] **Add Linux-compatible benchmark fallback path** - Added `cap-editor` playback benchmark example and supporting linux compile fallbacks (2026-02-14) +- [x] **Harden seek benchmark methodology** - Added repeated seek sampling with avg/p95/max and de-cached iteration strategy (2026-02-14) --- @@ -106,6 +110,9 @@ cargo run -p cap-recording --example playback-test-runner -- full --benchmark-ou # Combined workflow: record then playback cargo run -p cap-recording --example real-device-test-runner -- baseline --keep-outputs && \ cargo run -p cap-recording --example playback-test-runner -- full + +# Linux-compatible playback throughput benchmark +cargo run -p cap-editor --example playback-benchmark -- --video /path/to/video.mp4 --fps 60 --max-frames 600 ``` **Note**: Playback tests require recordings to exist. Run the recording test runner with `--keep-outputs` first. @@ -122,6 +129,15 @@ cargo run -p cap-recording --example playback-test-runner -- full | `crates/video-decode/src/ffmpeg.rs` | FFmpeg software fallback | | `crates/audio/src/lib.rs` | AudioData loading and sync analysis | | `crates/recording/examples/playback-test-runner.rs` | Playback benchmark runner | +| `crates/editor/examples/decode-benchmark.rs` | Decode benchmark + CSV export | +| `crates/editor/examples/decode-csv-report.rs` | Decode CSV summary + label-delta analysis | +| `crates/editor/examples/playback-benchmark.rs` | Linux-compatible playback throughput benchmark | +| `crates/editor/examples/playback-csv-report.rs` | Playback CSV summary and label-delta analysis | +| `crates/editor/examples/scrub-benchmark.rs` | Scrub burst latency benchmark | +| `crates/editor/examples/scrub-csv-report.rs` | Scrub CSV summary and label-delta analysis | +| `apps/desktop/src/utils/frame-order.ts` | Wrap-safe frame-order comparisons | +| `apps/desktop/src/utils/frame-transport-order.ts` | Shared transport stale-order decision helper | +| `apps/desktop/src/utils/frame-transport-stride.ts` | Shared stride dispatch/coalescing decision helper | --- @@ -324,6 +340,3570 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu --- +### Session 2026-02-14 (Linux benchmark fallback + audio startup path) + +**Goal**: Continue playback optimization with measurable benchmarks in Linux environment and reduce audio startup delay risk + +**What was done**: +1. Unblocked several Linux compile blockers in platform-dependent crates (`scap-targets`, `cap-cursor-capture`, `cap-camera-ffmpeg`, `cap-timestamp`, `scap-ffmpeg`) +2. Verified `cap-recording` benchmark path remains heavily platform-specific on Linux and cannot be fully used without broad recording-stack Linux enablement +3. Added new Linux-compatible benchmark example `crates/editor/examples/playback-benchmark.rs` +4. Ran playback throughput benchmarks on synthetic 1080p60 and 4k60 files +5. Switched editor audio playback startup logic to prefer streaming audio path with fallback to pre-rendered path + +**Changes Made**: +- `crates/scap-targets/src/platform/linux.rs` and related platform exports +- `crates/scap-targets/src/lib.rs` +- `crates/cursor-capture/src/position.rs` +- `crates/camera-ffmpeg/src/lib.rs` +- `crates/timestamp/src/lib.rs` +- `crates/scap-ffmpeg/src/lib.rs` +- `crates/editor/examples/playback-benchmark.rs` +- `crates/editor/src/playback.rs` +- `crates/editor/PLAYBACK-BENCHMARKS.md` + +**Results**: +- Playback benchmark (1080p60 synthetic): 480 decoded / 480, effective 60.11 fps, 0 missed deadlines, decode p95 2.34ms +- Playback benchmark (4k60 synthetic): 480 decoded / 480, effective 60.11 fps, 2 missed deadlines, decode p95 8.35ms +- Decode benchmark confirms persistent seek/random-access hotspots, especially 4k medium/long seeks +- Audio startup path now prefers streaming playback on non-Windows, with automatic fallback to pre-rendered path on stream creation failure + +**Stopping point**: Need targeted measurement of audio startup latency deltas in real editor playback, then continue seek-latency tuning. + +--- + +### Session 2026-02-14 (FFmpeg seek reset tuning) + +**Goal**: Reduce medium-distance seek latency spikes in FFmpeg decode path + +**What was done**: +1. Updated `cap-video-decode` FFmpeg reset logic to use a forward bounded seek window before fallback +2. Re-ran decode and playback throughput benchmarks on synthetic 1080p60 and 4k60 videos + +**Changes Made**: +- `crates/video-decode/src/ffmpeg.rs` + - Added `last_seek_position` tracking + - For forward seeks, attempts `seek(position, min..max)` using a 2-second window + - Falls back to previous `..position` strategy if bounded seek fails + +**Results**: +- 1080p60 decode benchmark: + - 2.0s seek improved from ~260ms to **5.26ms** + - random access avg improved from ~223ms to **120.87ms** +- 4k60 decode benchmark: + - 2.0s seek improved from ~905ms to **12.65ms** + - random access avg improved from ~918ms to **533.65ms** +- Playback throughput remains at ~60fps for both 1080p60 and 4k60 synthetic runs +- Long 5.0s seek latency is still elevated on 4k and remains an active tuning target + +**Stopping point**: Keep current seek tuning; next focus is long-seek (5s+) latency and real desktop A/V startup measurements. + +--- + +### Session 2026-02-14 (Audio startup instrumentation) + +**Goal**: Add measurable startup telemetry for audio output callback timing + +**What was done**: +1. Instrumented audio output callback startup in both streaming and pre-rendered playback paths +2. Added one-time startup latency logs from playback start thread spawn to first output callback invocation + +**Changes Made**: +- `crates/editor/src/playback.rs` + - Added startup timing capture in `AudioPlayback::spawn` + - Logs: + - `Audio streaming callback started` + - `Audio pre-rendered callback started` + - Includes startup latency in milliseconds + +**Results**: +- No compile regressions in `cap-editor` +- Playback now has explicit, low-overhead startup latency telemetry for validating user-reported delayed audio start + +**Stopping point**: Run this instrumentation on macOS and Windows editor sessions to collect before/after startup latency evidence. + +--- + +### Session 2026-02-14 (FFmpeg long-seek tuning pass 2) + +**Goal**: Improve long forward seek latency while preserving medium seek gains + +**What was done**: +1. Adjusted FFmpeg forward-seek behavior to prefer keyframes closer to target time +2. Re-ran decode and playback throughput benchmarks + +**Changes Made**: +- `crates/video-decode/src/ffmpeg.rs` + - forward seek now first tries: + - small backtrack window (0.5s) + - larger forward allowance (2.0s) + - then falls back to wider symmetric window and legacy seek behavior + +**Results**: +- 1080p60: + - random access avg: **120.87ms -> 114.64ms** + - playback 5s seek sample: **138.26ms -> 138.90ms** (flat) +- 4k60: + - random access avg: **533.65ms -> 525.90ms** + - playback 5s seek sample: **432.97ms -> 410.35ms** +- Playback throughput still meets 60fps target in synthetic real-time simulation + +**Stopping point**: Long-seek behavior improved but still high on 4k; next progress requires richer keyframe-aware seek strategy or decoder-pool approach for FFmpeg path. + +--- + +### Session 2026-02-14 (FFmpeg long-seek tuning pass 3) + +**Goal**: Improve long-seek behavior by changing seek fallback ordering + +**What was done**: +1. Changed forward seek fallback order in FFmpeg reset path: + - preferred bounded seek + - legacy backward seek + - wide bounded seek +2. Re-ran decode and playback throughput benchmarks + +**Changes Made**: +- `crates/video-decode/src/ffmpeg.rs` + - reordered fallback sequence in forward seek reset path + +**Results**: +- 1080p: + - 5s decode seek: **142.01ms -> 110.27ms** (improved) + - random access avg: **114.64ms -> 119.53ms** (slight regression/noise) +- 4k: + - random access avg: **525.90ms -> 516.48ms** (small improvement) + - 5s decode seek: **559.44ms -> 569.83ms** (flat/slightly worse) + - 5s playback seek sample: **410.35ms -> 430.25ms** (slight regression) +- Throughput remains ~60fps in playback benchmark for both synthetic clips + +**Stopping point**: pass 3 did not materially improve long 4k seeks; code was reverted to pass 2 strategy and further gains will need a deeper keyframe-aware approach. + +--- + +### Session 2026-02-14 (Playback startup instrumentation alignment) + +**Goal**: Make startup latency logs directly comparable across decode, render, and audio callback milestones + +**What was done**: +1. Added playback startup origin timestamp at playback start. +2. Logged first decoded frame availability in prefetch pipeline against that origin. +3. Logged first rendered frame against the same origin. +4. Switched audio callback startup logging to use the same playback origin timestamp. + +**Changes Made**: +- `crates/editor/src/playback.rs` + - added startup timeline logs: + - `Playback first decoded frame ready` + - `Playback first frame rendered` + - added `startup_instant` to `AudioPlayback` and wired callback logs to playback start origin +- `crates/editor/examples/playback-startup-report.rs` + - added log analysis utility for startup timing markers + - reports avg/p50/p95/min/max for decoded, rendered, and audio callback startup milestones + +**Results**: +- Playback throughput remains at ~60fps in synthetic benchmark after instrumentation: + - 1080p: **60.11 fps**, missed deadlines **0** + - 4k: **60.11 fps**, missed deadlines **1** +- No functional playback regression observed in benchmark pass. + +**Stopping point**: startup timing evidence can now be captured in real editor sessions and compared directly; next required step is collecting macOS and Windows session logs with the new unified timing markers. + +--- + +### Session 2026-02-14 (Startup trace export for cross-platform sessions) + +**Goal**: Make macOS/Windows startup latency collection deterministic and parseable + +**What was done**: +1. Added optional startup trace CSV export from desktop playback path via environment variable. +2. Emitted trace rows for first decoded frame, first rendered frame, and first audio callback milestones. +3. Updated startup report example to parse both tracing logs and CSV trace lines. + +**Changes Made**: +- `crates/editor/src/playback.rs` + - added `CAP_PLAYBACK_STARTUP_TRACE_FILE` writer + - startup milestones now append CSV rows: + - `first_decoded_frame` + - `first_rendered_frame` + - `audio_streaming_callback` + - `audio_prerender_callback` +- `crates/editor/examples/playback-startup-report.rs` + - added CSV event parser support + +**Verification**: +- `cargo +1.88.0 check -p cap-editor` +- `cargo +1.88.0 check -p cap-editor --example playback-startup-report` +- `cargo +1.88.0 test -p cap-editor --example playback-startup-report` +- `cargo +1.88.0 run -p cap-editor --example playback-startup-report -- --log crates/editor/PLAYBACK-BENCHMARKS.md` + +**Stopping point**: next actionable step is running desktop playback sessions on macOS and Windows with `CAP_PLAYBACK_STARTUP_TRACE_FILE` enabled and feeding the resulting logs into `playback-startup-report`. + +--- + +### Session 2026-02-14 (Seek benchmark methodology hardening) + +**Goal**: Improve benchmark evidence quality for seek optimizations by reducing cache-driven false positives + +**What was done**: +1. Updated `decode-benchmark` to support `--seek-iterations` and report per-distance avg/p95/max. +2. Updated seek sampling logic to vary the start position per iteration, keeping constant seek distance while avoiding repeated cache hits. +3. Updated `playback-benchmark` with the same `--seek-iterations` support, distance-tail reporting, and varied start-point strategy. +4. Re-ran 1080p and 4k decode/playback benchmarks with repeated seek sampling. + +**Changes Made**: +- `crates/editor/examples/decode-benchmark.rs` + - added `--seek-iterations` + - added repeated seek stats tables (avg/p95/max/samples/failures) + - varied per-iteration seek start times to avoid de-cached artifacts +- `crates/editor/examples/playback-benchmark.rs` + - added `--seek-iterations` + - added repeated seek stats table output + - varied per-iteration seek start times with from->to measurement +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - updated benchmark command docs and added methodology-hardening benchmark run data + +**Results**: +- ✅ Throughput remains at ~60fps in playback benchmark: + - 1080p: **60.24 fps**, missed deadlines **0** + - 4k: **60.13 fps**, missed deadlines **0** +- ✅ Repeated seek sampling now reveals tail behavior directly: + - 4k decode seeks show high p95 tails up to ~1.47s at 5s distance + - 1080p seeks are substantially lower but still non-trivial at medium/long jumps +- ✅ Benchmark tooling now better discriminates real improvements vs cache effects. + +**Stopping point**: next optimization passes should be evaluated with `--seek-iterations` to prevent regression masking and to target 4k long-seek tail reduction. + +--- + +### Session 2026-02-14 (Rejected FFmpeg seek/thread tuning under hardened benchmarks) + +**Goal**: Test low-risk FFmpeg decode tuning ideas against hardened seek benchmark tails + +**What was done**: +1. Tested backward-only forward-seek window ordering in `cap-video-decode`. +2. Benchmarked 1080p/4k decode with `--seek-iterations 10`. +3. Reverted due regressions, then tested software thread-count cap for 4k decode. +4. Benchmarked again and reverted second experiment due seek-tail regressions. + +**Results**: +- ❌ Backward-only seek preference regressed seek tails and random access: + - 4k seek avg/p95 reached roughly: + - 0.5s: **320 / 407ms** + - 1.0s: **577 / 714ms** + - 2.0s: **1076 / 1670ms** + - 5.0s: **1051 / 1725ms** + - 4k random access avg rose to **~925ms** +- ❌ 4k thread-count cap experiment also worsened seek tails: + - 4k seek avg/p95 reached roughly: + - 0.5s: **224 / 395ms** + - 1.0s: **367 / 734ms** + - 2.0s: **638 / 1479ms** + - 5.0s: **975 / 1523ms** + +**Stopping point**: both candidates reverted. Next viable direction should focus on architecture-level seek improvements (decoder pool/keyframe-aware jump scheduling) rather than small FFmpeg seek-window tweaks. + +--- + +### Session 2026-02-14 (FFmpeg duplicate-request coalescing) + +**Goal**: Reduce wasted decode work during scrub/request bursts that target the same frame + +**What was done**: +1. Added same-frame coalescing in FFmpeg decoder request batches (software + hardware paths). +2. When multiple pending requests resolve to one frame index, decoder now executes one response production and fans the frame out to all waiting reply channels. +3. Re-ran hardened decode/playback benchmarks (`--seek-iterations 10`) to verify throughput and tail stability. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - pending request now stores additional replies for same-frame coalescing + - request intake merges duplicate frame requests in-batch + - frame send path fans out decoded/cached frame to all coalesced replies + +**Results**: +- ✅ Playback throughput remains stable at 60fps-class: + - 1080p playback benchmark: **60.24 fps**, missed deadlines **0** + - 4k playback benchmark: **60.20 fps**, missed deadlines **0** +- ✅ Decode benchmarks stayed within expected variance envelope for current seek-tail profile. +- ✅ No regressions observed in compile/test benchmark runs after coalescing change. + +**Stopping point**: same-frame coalescing landed as a low-risk scrub efficiency improvement; next major improvement still requires reducing long-distance 4k seek tails via deeper decoder strategy. + +--- + +### Session 2026-02-14 (Duplicate burst benchmark signal hardening) + +**Goal**: Stabilize duplicate-request benchmark signal for evaluating coalescing behavior + +**What was done**: +1. Extended `decode-benchmark` with an explicit duplicate-request burst section (burst sizes 4/8/16). +2. Added warmup frame fetch before burst sampling to remove cold-start outlier distortion. +3. Re-ran 1080p and 4k decode benchmarks with hardened seek sampling and burst metrics. + +**Changes Made**: +- `crates/editor/examples/decode-benchmark.rs` + - added duplicate burst metric table output + - added burst warmup call prior to timing iterations +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - recorded stabilized duplicate burst metrics and updated decode-benchmark command notes + +**Results**: +- ✅ Duplicate burst metrics now stable and interpretable: + - 1080p burst batch p95: **~3.7–3.8ms** + - 4k burst batch p95: **~21.7–22.0ms** +- ✅ No failures in duplicate burst requests across tested burst sizes. +- ✅ Existing throughput and seek-tail profile remained consistent with recent runs. + +**Stopping point**: duplicate burst metric is now productionized for ongoing coalescing validation; remaining performance gap is still long-distance 4k seek tails. + +--- + +### Session 2026-02-14 (Scrub burst benchmark baseline) + +**Goal**: Add direct scrub-queue stress evidence for latest-request latency + +**What was done**: +1. Added `scrub-benchmark` example that issues bursty decoder requests over a configurable sweep window. +2. Captured two key metrics: + - all-request latency distribution + - last-request-in-burst latency distribution +3. Ran 1080p and 4k baseline passes with 20 bursts × 12 requests. + +**Changes Made**: +- `crates/editor/examples/scrub-benchmark.rs` + - new benchmark for scrub queue stress behavior +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - added command usage and baseline results for scrub burst runs + +**Results**: +- 1080p scrub burst: + - all-request avg **217.97ms**, p95 **434.83ms** + - last-request avg **312.50ms**, p95 **455.72ms** +- 4k scrub burst: + - all-request avg **1071.64ms**, p95 **2098.98ms** + - last-request avg **1524.00ms**, p95 **2116.35ms** +- ✅ Benchmark now exposes scrub-specific latency that decode/playback sequential tests do not capture. + +**Stopping point**: next optimization pass should target reducing last-request-in-burst latency (especially 4k) and use scrub-benchmark plus seek-iteration benchmarks as acceptance gates. + +--- + +### Session 2026-02-14 (Decoder scrub supersession heuristic) + +**Goal**: Reduce latest-request latency during wide-span scrub bursts without breaking throughput + +**What was done**: +1. Added a burst supersession heuristic in FFmpeg decoder request batching: + - when request queue is large and frame span is wide, collapse batch to the newest request target while fanning responses to waiting receivers. +2. Applied heuristic to both software and hardware FFmpeg decoder paths. +3. Re-ran scrub, decode, and playback benchmarks for validation. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - request metadata now tracks enqueue order + - added `maybe_supersede_scrub_burst` to collapse large-span batches to newest target + - retained same-frame coalescing and response fan-out + +**Results**: +- ✅ Scrub burst latency improved materially for 4k: + - last-request avg: **1524ms -> 870ms** + - all-request avg: **1072ms -> 834ms** + - last-request p95: **2116ms -> 1941ms** +- ✅ 1080p scrub average improved: + - last-request avg: **313ms -> 221ms** +- ⚠️ 1080p scrub tail widened in this pass (p95/p99), so heuristic still needs refinement for consistency. +- ✅ Throughput remains ~60fps in playback benchmark: + - 1080p: **60.23 fps** + - 4k: **60.16 fps** + +**Stopping point**: first pass improved 4k scrub responsiveness but had mixed 1080p tail behavior; moved to resolution-gated supersession in follow-up pass. + +--- + +### Session 2026-02-14 (Decoder scrub supersession heuristic pass 2) + +**Goal**: Retain 4k scrub gains while reducing 1080p side effects + +**What was done**: +1. Gated supersession heuristic to high-resolution streams only (`>= 2560x1440`). +2. Re-ran scrub burst benchmarks for 1080p and 4k. +3. Re-ran decode and playback regression benchmarks for both clips. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - `maybe_supersede_scrub_burst` now accepts an enable flag + - supersession enablement computed from stream resolution in both FFmpeg loops + +**Results**: +- ✅ 4k scrub responsiveness remained improved vs baseline: + - last-request avg: **1524ms -> 864ms** + - last-request p95: **2116ms -> 1689ms** + - all-request avg: **1072ms -> 820ms** +- ✅ 1080p tails improved vs pass 1 while keeping better average: + - last-request avg: **313ms -> 298ms** + - last-request p95: **456ms -> 427ms** +- ✅ Playback throughput remained stable: + - 1080p: **60.23 fps** + - 4k: **60.19 fps** +- ✅ Decode seek/random-access metrics stayed within expected variance envelope. + +**Stopping point**: resolution-gated supersession is currently the best scrub-latency configuration; next work should focus on reducing 4k long-seek tails further without regressing these burst-latency gains. + +--- + +### Session 2026-02-14 (Supersession runtime configurability) + +**Goal**: Enable faster cross-platform tuning of scrub supersession without code edits + +**What was done**: +1. Added environment-driven controls for FFmpeg scrub supersession behavior: + - `CAP_FFMPEG_SCRUB_SUPERSEDE_DISABLED` + - `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_PIXELS` + - `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_REQUESTS` + - `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES` +2. Kept default behavior equivalent to current tuned path. +3. Re-ran scrub, decode, and playback benchmarks with defaults to verify no functional regressions. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - added `ScrubSupersessionConfig` with `OnceLock` initialization + - replaced hard-coded supersession thresholds with config values +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - added command examples for runtime supersession tuning + - added validation benchmark run for the configurable defaults + +**Results**: +- ✅ Scrub supersession behavior preserved with defaults: + - 4k last-request avg **~821ms**, p95 **~1768ms** + - 1080p last-request avg **~304ms**, p95 **~435ms** +- ✅ Playback throughput remains at 60fps-class: + - 1080p: **60.22 fps** + - 4k: **60.17 fps** +- ✅ Decode benchmark metrics remain in expected variance envelope after config refactor. + +**Stopping point**: supersession tuning is now runtime-configurable, enabling platform-specific calibration runs (especially macOS/Windows) without recompiling. + +--- + +### Session 2026-02-14 (Supersession default span tuning) + +**Goal**: Promote a better default supersession span without requiring env overrides + +**What was done**: +1. Benchmarked supersession configs with multi-run scrub reports (`--runs 3`) to reduce noise. +2. Compared default behavior against candidate span thresholds. +3. Set default `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES` fallback to `25`. +4. Re-ran scrub/decode/playback benchmarks with the new default. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - changed default supersession span fallback from `FRAME_CACHE_SIZE / 2` to `25` + - kept runtime override support intact +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - added benchmark run section for the new default tuning pass + +**Results**: +- ✅ Scrub median improvements vs previous default: + - 1080p last-request avg: **~319.76ms -> ~294.07ms** + - 4k last-request avg: **~967.21ms -> ~808.71ms** + - 4k last-request p95: **~1881ms -> ~1694ms** +- ✅ Playback remained 60fps-class in regression runs: + - 1080p: **60.22 fps** + - 4k: **60.18 fps** (best run in pass) +- ✅ Decode metrics remained in expected variance envelope after default change. + +**Stopping point**: supersession now ships with a stronger default profile while remaining fully runtime-tunable for platform-specific calibration. + +--- + +### Session 2026-02-14 (Supersession min-request threshold sweep) + +**Goal**: Validate whether lowering supersession queue threshold improves scrub latency further + +**What was done**: +1. Ran 3-run scrub benchmarks for candidate `min_requests=6`, `min_span_frames=25`. +2. Compared medians against current default (`min_requests=8`, `min_span_frames=25`). + +**Results**: +- 1080p improved with threshold 6: + - median last-request avg: **~294ms -> ~286ms** + - median last-request p95: **~456ms -> ~428ms** +- 4k regressed vs threshold 8: + - median last-request avg: **~809ms -> ~842ms** + - median last-request p95: **~1694ms -> ~1744ms** + +**Decision**: keep default `min_requests=8` because it gives better 4k scrub responsiveness while still materially improving 1080p over the original baseline. + +**Stopping point**: defaults remain `min_requests=8`, `min_span_frames=25`, with runtime overrides available for platform-specific tuning. + +--- + +### Session 2026-02-14 (Supersession span threshold retune to 20) + +**Goal**: Re-evaluate supersession span threshold with CSV-backed multi-run sweeps and improve 4k scrub medians + +**What was done**: +1. Ran a 4-way sweep over `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES={15,20,25,30}` with `scrub-benchmark --runs 3`. +2. Compared median last-request latency and p95 tails from CSV outputs. +3. Updated FFmpeg supersession default span fallback from `25` to `20`. +4. Re-ran scrub, playback, and decode regression benchmarks after the default change. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - changed default `min_span_frames` fallback from `25` to `20` +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - updated command examples to show span `20` + - added benchmark entry for the threshold sweep and post-change regression runs + +**Results**: +- 4k sweep medians (last-request avg / p95): + - span 15: **836.94ms / 1740.74ms** + - span 20: **814.93ms / 1743.49ms** + - span 25: **819.11ms / 1762.74ms** + - span 30: **923.18ms / 1947.86ms** +- Post-change default (span 20) validation: + - 4k scrub median last-request avg **836.61ms**, p95 **1732.40ms** + - playback throughput remains 60fps-class: + - 1080p: **60.24 fps** + - 4k: **60.18 fps** + - decode metrics remain in expected variance envelope: + - 1080p random avg **111.79ms** + - 4k random avg **509.26ms** + +**Decision**: keep defaults at `min_requests=8`, `min_span_frames=20`. + +**Stopping point**: supersession defaults now favor a slightly more aggressive span threshold while preserving 60fps throughput and stable decode behavior. + +--- + +### Session 2026-02-14 (Supersession min-pixels threshold retune to 2,000,000) + +**Goal**: Validate whether enabling supersession for 1080p-class streams improves scrub latency without harming 4k behavior + +**What was done**: +1. Ran baseline scrub benchmarks with current defaults (`min_pixels=3_686_400`, `min_span_frames=20`). +2. Ran candidate scrub benchmarks with `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_PIXELS=2_000_000`. +3. Compared 1080p and 4k median run aggregates from `--runs 3`. +4. Updated FFmpeg supersession default min-pixels fallback to `2_000_000`. +5. Re-ran scrub + playback + decode regression benchmarks after default promotion. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - changed default supersession `min_pixels` fallback from `3_686_400` to `2_000_000` +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - updated runtime tuning command examples + - added benchmark history section for min-pixels sweep and post-retune regression checks + +**Results**: +- Sweep medians (last-request avg / p95): + - baseline min_pixels=3_686_400: + - 1080p: **332.72ms / 480.45ms** + - 4k: **855.08ms / 1769.64ms** + - candidate min_pixels=2_000_000: + - 1080p: **213.36ms / 449.62ms** + - 4k: **814.28ms / 1716.14ms** +- Post-change default validation: + - 1080p scrub median last-request avg **200.14ms**, p95 **429.83ms** + - 4k scrub median last-request avg **834.23ms**, p95 **1718.54ms** + - playback remains 60fps-class: + - 1080p: **60.23 fps** + - 4k: **60.19 fps** + +**Decision**: keep defaults at `min_requests=8`, `min_span_frames=20`, `min_pixels=2_000_000`. + +**Stopping point**: supersession now benefits both 1080p and 4k scrub paths under the same default policy while preserving playback throughput targets. + +--- + +### Session 2026-02-14 (Supersession min-requests threshold retune to 7) + +**Goal**: Re-check request-burst threshold using updated defaults (`min_span_frames=20`, `min_pixels=2_000_000`) + +**What was done**: +1. Ran a sequential threshold sweep for `min_requests={6,7,8}` on 1080p and 4k scrub benchmarks (`--runs 3`). +2. Compared median last-request latency and p95 tails across both resolutions. +3. Updated FFmpeg supersession default `min_requests` fallback from `8` to `7`. +4. Re-ran scrub + playback + decode regression benchmarks after promoting the new default. + +**Changes Made**: +- `crates/rendering/src/decoder/ffmpeg.rs` + - changed default supersession `min_requests` fallback from `8` to `7` +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - updated runtime tuning command examples to use `min_requests=7` + - added benchmark history section for threshold sweep and regression checks + +**Results**: +- Sequential sweep medians (last-request avg / p95): + - 1080p: + - req 6: **209.99ms / 444.08ms** + - req 7: **211.36ms / 447.60ms** + - req 8: **209.11ms / 441.08ms** + - 4k: + - req 6: **827.29ms / 1707.63ms** + - req 7: **823.15ms / 1699.04ms** + - req 8: **884.74ms / 1837.32ms** +- Post-change default (`min_requests=7`) validation: + - 1080p scrub median last-request avg **205.46ms**, p95 **432.90ms** + - 4k scrub median last-request avg **825.01ms**, p95 **1712.30ms** + - playback remains 60fps-class: + - 1080p: **60.24 fps** + - 4k: **60.20 fps** + +**Decision**: keep defaults at `min_requests=7`, `min_span_frames=20`, `min_pixels=2_000_000`. + +**Stopping point**: supersession defaults now balance 1080p and 4k scrub responsiveness better than the previous `min_requests=8` profile while preserving throughput targets. + +--- + +### Session 2026-02-14 (Rejected span threshold changes after default retunes) + +**Goal**: Verify whether span threshold should move again after adopting `min_requests=7` and `min_pixels=2_000_000` + +**What was done**: +1. Re-ran span sweep with `CAP_FFMPEG_SCRUB_SUPERSEDE_MIN_SPAN_FRAMES={15,20,25}`. +2. Executed 1080p and 4k scrub benchmarks (`--runs 3`) for each span candidate. +3. Compared median last-request averages and p95 tails. + +**Results**: +- 1080p (avg / p95): + - span 15: **216.43ms / 457.45ms** + - span 20: **209.63ms / 442.04ms** + - span 25: **213.84ms / 447.71ms** +- 4k (avg / p95): + - span 15: **862.02ms / 1789.73ms** + - span 20: **860.43ms / 1761.25ms** + - span 25: **866.03ms / 1781.42ms** + +**Decision**: keep `min_span_frames=20`; candidates 15 and 25 were rejected. + +**Stopping point**: supersession defaults remain `min_requests=7`, `min_span_frames=20`, `min_pixels=2_000_000`. + +--- + +### Session 2026-02-14 (Rejected fine span retune to 22) + +**Goal**: Validate whether a finer span adjustment (`22`) outperforms the current default (`20`) + +**What was done**: +1. Ran fine span sweep (`18`, `20`, `22`) on 1080p and 4k with `--runs 3`. +2. Ran paired span20/span22 sweeps with explicit run labels and compared via `scrub-csv-report`. +3. Temporarily switched default span to `22` and executed scrub/playback/decode regression checks. + +**Results**: +- Fine sweep signal: + - 1080p favored `20` on tails (span 22 raised p95 vs span 20 in sampled runs). + - 4k often favored `22` in paired delta comparisons. +- Paired labeled deltas (`span22 - span20`): + - 1080p: p95 worsened by about **+24ms** + - 4k: avg and p95 improved materially in that paired sample +- Temporary default-22 regressions: + - 4k scrub sample still showed heavy tails (**~1797ms p95**) + - playback regression sample had higher missed deadlines (**4**) + - decode remained in variance envelope but with no clear stability gain + +**Decision**: rejected promoting `min_span_frames=22` due inconsistent tail behavior across reruns. + +**Stopping point**: keep defaults at `min_requests=7`, `min_span_frames=20`, `min_pixels=2_000_000`. + +--- + +### Session 2026-02-14 (Scrub CSV report utility) + +**Goal**: Provide a lightweight analysis tool for cross-machine scrub CSV comparisons + +**What was done**: +1. Added a new CSV report example for scrub benchmarks. +2. Implemented aggregate-row parsing with run-label and video grouping. +3. Added baseline/candidate label delta reporting per overlapping video. +4. Added derived config-label fallback for rows without explicit run labels. +5. Added `--output-csv` to persist summary and delta rows. +6. Added unit tests for CSV parsing, config-label fallback, median summarization, grouping, and CSV writing. + +**Changes Made**: +- `crates/editor/examples/scrub-csv-report.rs` + - new CLI args: + - `--csv ` (repeatable) + - `--label ` + - `--baseline-label ` + - `--candidate-label ` + - `--output-csv ` + - reports median summaries per run label from aggregate rows + - auto-labels unlabeled rows with config-derived keys + - computes candidate-minus-baseline deltas for all/last request avg and p95 per video + - writes summary/delta rows for downstream reporting when output path is provided +- `crates/editor/PLAYBACK-BENCHMARKS.md` + - added command usage and validation run output for the new utility + +**Verification**: +- `cargo +1.88.0 check -p cap-editor --example scrub-csv-report` +- `cargo +1.88.0 test -p cap-editor --example scrub-csv-report` (5 tests) +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-labeled.csv --label linux-pass-a` +- `cargo +1.88.0 run -p cap-editor --example scrub-csv-report -- --csv /tmp/cap-scrub-span-20-22.csv --baseline-label span20 --candidate-label span22 --output-csv /tmp/cap-scrub-summary.csv` + +**Results**: +- ✅ Cross-machine scrub CSVs can now be summarized and compared without manual spreadsheet work. +- ✅ Unlabeled sweeps now group correctly by supersession config defaults/overrides. +- ✅ Summary/delta exports can now be archived as machine-readable artifacts. +- ✅ Utility test suite passing (5/5). + +**Stopping point**: startup and scrub evidence collection on macOS/Windows now has matching run-label analysis tools on Linux for post-capture evaluation. + +--- + +### Session 2026-02-14 (Playback benchmark CSV export) + +**Goal**: Persist playback throughput benchmark outputs in machine-readable format for cross-platform comparisons + +**What was done**: +1. Added optional CSV export to `playback-benchmark`. +2. Added optional run labeling for exported playback benchmark rows. +3. Emitted sequential and per-seek rows in a single CSV schema. + +**Changes Made**: +- `crates/editor/examples/playback-benchmark.rs` + - new CLI args: + - `--output-csv ` + - `--run-label