Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 193 additions & 36 deletions Runtime/Scripts/MicrophoneSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,45 @@ sealed public class MicrophoneSource : RtcAudioSource
// contiguous stream is reconstructed from it.
//
// The clip's data rate is clip.frequency (verified: fragments play at correct pitch), so
// captured samples are resampled from clip.frequency to the fixed native-source rate.
private const uint TargetSampleRate = 48000;
// captured samples are resampled from clip.frequency to the native source's configured rate
// (RtcAudioSource.ExpectedSampleRate, set from the constructor below).
private const float PreRollSeconds = 0.3f;
private const float SettleSeconds = 0.1f; // discard the counter's startup burst before measuring
// Engaging fragmented mode discards (stride - valid) samples per stride, so a false
// positive guarantees audio loss while a false negative only risks mild artifacts. The
// observed pathological device measures k=3.2; healthy devices measure ~1.0 with up to a
// few percent of startup noise. Keep a wide margin between the two.
private const double FragmentedKThreshold = 1.5;
private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall

private readonly string _deviceName;
private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall
private const float DeviceLostTimeoutSeconds = 1f; // no counter advance for this long = device gone
private const float RecoverRetrySeconds = 1f;
private const float DeviceRemovalTimeoutSeconds = 2f; // wait up to this for a lost device to leave the list
private const float RecoverSettleSeconds = 0.3f; // let the replacement device's driver come up before starting it
private const float PollIntervalSeconds = 0.05f; // cadence for PollUntil readiness checks

// Cached yield instructions; WaitForSeconds is immutable, so reusing one avoids a per-yield
// allocation (and silences the analyzer hint).
private static readonly WaitForSeconds WaitPoll = new(PollIntervalSeconds);
private static readonly WaitForSeconds WaitDeviceRemoval = new(0.1f);
private static readonly WaitForSeconds WaitRecoverSettle = new(RecoverSettleSeconds);
private static readonly WaitForSeconds WaitRecoverRetry = new(RecoverRetrySeconds);

private string _deviceName;

public override event Action<float[], int, int> AudioRead;

private bool _disposed = false;
private bool _started = false;
private volatile bool _capturing = false;
private bool _switching = false;
private bool _paused = false;
private int _captureGeneration = 0;

private StreamingResampler _resampler;

/// <summary>The microphone device currently being captured.</summary>
public string DeviceName => _deviceName;

/// <summary>
/// Creates a new microphone source for the given device.
/// </summary>
Expand All @@ -52,19 +70,19 @@ sealed public class MicrophoneSource : RtcAudioSource
/// <param name="sourceObject">Unused; retained for compatibility. The microphone clip is read
/// directly, so no scene GameObject/AudioSource is required.</param>
public MicrophoneSource(string deviceName, GameObject sourceObject)
: base(RtcAudioSourceType.AudioSourceMicrophone, TargetSampleRate, 1)
: base(RtcAudioSourceType.AudioSourceMicrophone, DefaultSampleRate, 1)
{
_deviceName = deviceName;
}

// The rate requested from Microphone.Start (a hint the platform may not honor), clamped to
// the device's reported range. The authoritative data rate is clip.frequency afterwards.
private static int ResolveRequestedSampleRate(string deviceName)
private int ResolveRequestedSampleRate(string deviceName)
{
Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq);
if (minFreq == 0 && maxFreq == 0)
return (int)TargetSampleRate;
return Mathf.Clamp((int)TargetSampleRate, minFreq, maxFreq);
return (int)ExpectedSampleRate;
return Mathf.Clamp((int)ExpectedSampleRate, minFreq, maxFreq);
}

/// <summary>
Expand All @@ -91,7 +109,14 @@ public override void Start()
_started = true;
}

private IEnumerator StartMicrophone()
// quietFailure: during automatic recovery a failed start is expected (the replacement
// device's driver may not be startable yet) and is retried, so transient failures are
// logged at debug level instead of error to avoid spamming the console mid-handoff. Note:
// when Microphone.Start itself fails, Unity's native FMOD layer still logs its own error
// (e.g. "Starting microphone failed ... (80)"); that line originates inside the engine and
// cannot be suppressed from here — the settle delay before recovery's first attempt is what
// reduces how often it fires.
private IEnumerator StartMicrophone(bool quietFailure = false)
{
// Verify microphone is still authorized (could change during background)
if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
Expand All @@ -100,73 +125,208 @@ private IEnumerator StartMicrophone()
yield break;
}

// Capture the device locally so a concurrent SwitchDevice can't mix two devices
// within one start sequence.
var device = _deviceName;

AudioClip clip = null;
int requestedRate = ResolveRequestedSampleRate(_deviceName);
int requestedRate = ResolveRequestedSampleRate(device);
try
{
clip = Microphone.Start(
_deviceName,
device,
loop: true,
lengthSec: 2,
frequency: requestedRate
);
}
catch (Exception e)
{
Utils.Error($"MicrophoneSource: Exception starting microphone: {e.Message}");
LogStartIssue(quietFailure, $"MicrophoneSource: Exception starting microphone: {e.Message}");
yield break;
}

if (clip == null)
{
Utils.Error("MicrophoneSource: Microphone.Start returned null, audio session may not be ready");
LogStartIssue(quietFailure, "MicrophoneSource: Microphone.Start returned null, audio session may not be ready");
yield break;
}

// Wait for microphone to actually start producing data with a timeout
const float timeout = 2f;
yield return PollUntil(() => Microphone.GetPosition(device) > 0, timeout);

if (Microphone.GetPosition(device) <= 0)
{
LogStartIssue(quietFailure, $"MicrophoneSource: Microphone did not start producing data after {timeout}s");
yield break;
}

Utils.Info($"MicrophoneSource device='{device}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={requestedRate}Hz target={ExpectedSampleRate}Hz");

_capturing = true;
MonoBehaviourContext.RunCoroutine(CaptureLoop(clip, device, ++_captureGeneration));
}

private static void LogStartIssue(bool quiet, string msg)
{
if (quiet) Utils.Debug(msg);
else Utils.Error(msg);
}

// Polls a condition every PollIntervalSeconds until it holds or the timeout elapses.
// Callers re-check the condition afterwards to distinguish success from timeout.
private IEnumerator PollUntil(Func<bool> done, float timeout)
{
float elapsed = 0f;
while (Microphone.GetPosition(_deviceName) <= 0 && elapsed < timeout)
while (!done() && elapsed < timeout)
{
yield return new WaitForSeconds(0.05f);
elapsed += 0.05f;
yield return WaitPoll;
elapsed += PollIntervalSeconds;
}
}

if (Microphone.GetPosition(_deviceName) <= 0)
/// <summary>
/// Switches capture to a different microphone device while the published track keeps
/// working. The native source's format is fixed (48kHz mono) and captured audio is
/// resampled to it, so the track and its subscribers are unaffected; there is only a brief
/// capture gap (~0.5s) while the new device starts and its rate is measured.
/// </summary>
/// <remarks>
/// Internal for now: device loss is handled automatically (see RecoverRoutine); this is
/// the manual primitive should a public device-picker API be needed later.
/// </remarks>
/// <param name="deviceName">The device to switch to. Use <see cref="Microphone.devices"/> to
/// get the list of available devices.</param>
internal void SwitchDevice(string deviceName)
{
if (_disposed) return;
if (deviceName == _deviceName) return;
if (_switching)
{
Utils.Error($"MicrophoneSource: Microphone did not start producing data after {timeout}s");
yield break;
Utils.Warning("MicrophoneSource: device switch already in progress, ignoring");
return;
}

Utils.Info($"MicrophoneSource device='{_deviceName}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={requestedRate}Hz target={TargetSampleRate}Hz");
var previous = _deviceName;
_deviceName = deviceName;

_capturing = true;
MonoBehaviourContext.RunCoroutine(CaptureLoop(clip));
// Not capturing yet: the next Start() simply uses the new device.
if (!_started) return;

_switching = true;
MonoBehaviourContext.RunCoroutine(SwitchRoutine(previous));
}

private IEnumerator SwitchRoutine(string previousDevice)
{
_capturing = false;
if (Microphone.IsRecording(previousDevice))
Microphone.End(previousDevice);

yield return StartMicrophone();
_switching = false;
Utils.Info($"MicrophoneSource: switched capture to device '{_deviceName}'");
}

// Recovers capture after the active device disappeared mid-call (e.g. a Bluetooth headset
// disconnected). Retries until a device is available: the original device is preferred if
// it comes back, otherwise capture falls back to the system default microphone. The
// published track is unaffected throughout — the native source's fixed format never
// changes, there is simply a capture gap until a device is acquired.
private IEnumerator RecoverRoutine(string lostDevice, int generation)
{
if (Microphone.IsRecording(lostDevice))
Microphone.End(lostDevice);

// Wait for the OS to finish removing the lost device before re-entering the audio
// subsystem. Touching Microphone (GetDeviceCaps / Start) mid-teardown makes FMOD log
// "Failed to get recording driver capabilities"; once the device drops out of the list
// the subsystem has settled. Reading the device list itself does not initialize a
// device, so it stays quiet. The retry loop below still covers slower replacements.
if (!string.IsNullOrEmpty(lostDevice))
{
float waited = 0f;
while (waited < DeviceRemovalTimeoutSeconds
&& Array.IndexOf(Microphone.devices, lostDevice) >= 0
&& _started && !_disposed && !_paused && generation == _captureGeneration)
{
yield return WaitDeviceRemoval;
waited += 0.1f;
}

// The lost device has left the list, but the OS is still promoting the replacement
// default and FMOD has yet to make its recording driver startable. Starting now is
// what trips FMOD error 80; a brief settle lets the new driver come up so the first
// attempt usually succeeds rather than failing and retrying.
yield return WaitRecoverSettle;
}

while (_started && !_disposed && !_paused && generation == _captureGeneration)
{
var devices = Microphone.devices;
if (devices.Length > 0)
{
// Prefer the original device if it reappeared; otherwise use the system default.
_deviceName = Array.IndexOf(devices, lostDevice) >= 0 ? lostDevice : null;

int generationBefore = _captureGeneration;
yield return StartMicrophone(quietFailure: true);
if (_captureGeneration != generationBefore)
{
// A new CaptureLoop is running; recovery succeeded.
Utils.Info($"MicrophoneSource: recovered capture on device '{_deviceName ?? "(default)"}'");
yield break;
}
}
yield return WaitRecoverRetry;
}
}

// Reads new samples from the clip's ring buffer each frame and pushes them to the native
// source via AudioRead. MicClipReader decides what to read (including reconstructing
// fragmented buffers); this loop is the thin Unity shell around it. Runs on the main
// thread; the native source's queue absorbs the per-frame pacing jitter.
private IEnumerator CaptureLoop(AudioClip clip)
// thread; the native source's queue absorbs the per-frame pacing jitter. The generation
// token retires this loop when a newer capture (restart or device switch) supersedes it.
private IEnumerator CaptureLoop(AudioClip clip, string device, int generation)
{
int clipFrames = clip.samples;
int channels = clip.channels;
int dataRate = clip.frequency > 0 ? clip.frequency : (int)DefaultMicrophoneSampleRate;

var reader = new MicClipReader(clipFrames, dataRate, PreRollSeconds, FragmentedKThreshold, MaxBacklogSeconds, SettleSeconds);
_resampler = new StreamingResampler(dataRate, (int)TargetSampleRate);
_resampler = new StreamingResampler(dataRate, (int)ExpectedSampleRate);
var ranges = new List<MicClipReader.ReadRange>();
var clock = System.Diagnostics.Stopwatch.StartNew();
bool announced = false;
long reportedDrops = 0;

while (_capturing && !_disposed)
// Device-loss detection: the position counter advances continuously while a device is
// alive (even in silence), so a stalled counter or IsRecording dropping to false means
// the device disappeared (e.g. a Bluetooth headset disconnected mid-call).
int lastCounter = Microphone.GetPosition(device);
double lastAdvance = clock.Elapsed.TotalSeconds;

while (_capturing && !_disposed && generation == _captureGeneration)
{
yield return null;

int counter = Microphone.GetPosition(device);
double now = clock.Elapsed.TotalSeconds;
if (counter != lastCounter)
{
lastCounter = counter;
lastAdvance = now;
}
else if (now - lastAdvance > DeviceLostTimeoutSeconds || !Microphone.IsRecording(device))
{
Utils.Warning($"MicrophoneSource: device '{device}' stopped delivering audio; attempting recovery");
MonoBehaviourContext.RunCoroutine(RecoverRoutine(device, generation));
yield break;
}

ranges.Clear();
reader.Update(Microphone.GetPosition(_deviceName), clock.Elapsed.TotalSeconds, ranges);
reader.Update(counter, now, ranges);

if (!announced && reader.Ready)
{
Expand All @@ -189,7 +349,7 @@ private IEnumerator CaptureLoop(AudioClip clip)
}

// Reads a contiguous range, downmixes to mono, resamples clip.frequency ->
// TargetSampleRate (the resampler carries state across calls, so fragment junctions stay
// ExpectedSampleRate (the resampler carries state across calls, so fragment junctions stay
// continuous), and fires AudioRead.
private void ReadAndPush(AudioClip clip, int channels, int start, int count)
{
Expand Down Expand Up @@ -217,7 +377,7 @@ private void ReadAndPush(AudioClip clip, int channels, int start, int count)

var output = _resampler.Process(mono, count);
if (output.Length > 0)
AudioRead?.Invoke(output, 1, (int)TargetSampleRate);
AudioRead?.Invoke(output, 1, (int)ExpectedSampleRate);
}

/// <summary>
Expand All @@ -244,6 +404,8 @@ private IEnumerator StopMicrophone()

private void OnApplicationPause(bool pause)
{
_paused = pause;

if (!_started)
return;

Expand Down Expand Up @@ -277,15 +439,10 @@ private IEnumerator WaitForMicrophoneReady()
// Wait for microphone devices to become available again after iOS audio session interruption.
// This is more reliable than a fixed delay because we wait for actual system readiness.
const float timeout = 2f;
float elapsed = 0f;

// On iOS, Microphone.devices may be empty immediately after resume while
// AVAudioSession is recovering from interruption. Wait until devices are available.
while (Microphone.devices.Length == 0 && elapsed < timeout)
{
yield return new WaitForSeconds(0.05f);
elapsed += 0.05f;
}
yield return PollUntil(() => Microphone.devices.Length > 0, timeout);

if (Microphone.devices.Length == 0)
{
Expand Down
7 changes: 7 additions & 0 deletions Runtime/Scripts/RtcAudioSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ private sealed class PendingAudioFrame
private readonly uint _expectedSampleRate;
private readonly uint _expectedChannels;

/// <summary>The sample rate the native source was configured with; the format captured
/// audio must match (subclasses resample to this).</summary>
protected uint ExpectedSampleRate => _expectedSampleRate;

/// <summary>The channel count the native source was configured with.</summary>
protected uint ExpectedChannels => _expectedChannels;

internal readonly FfiHandle Handle;
protected AudioSourceInfo _info;

Expand Down
Loading