diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index b424e2a5..53a92b6b 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -23,8 +23,8 @@ sealed public class MicrophoneSource : RtcAudioSource // contiguous stream is reconstructed from it. // // The clip's data rate is clip.frequency (verified: fragments play at correct pitch), so - // captured samples are resampled from clip.frequency to the fixed native-source rate. - private const uint TargetSampleRate = 48000; + // captured samples are resampled from clip.frequency to the native source's configured rate + // (RtcAudioSource.ExpectedSampleRate, set from the constructor below). private const float PreRollSeconds = 0.3f; private const float SettleSeconds = 0.1f; // discard the counter's startup burst before measuring // Engaging fragmented mode discards (stride - valid) samples per stride, so a false @@ -32,18 +32,36 @@ sealed public class MicrophoneSource : RtcAudioSource // observed pathological device measures k=3.2; healthy devices measure ~1.0 with up to a // few percent of startup noise. Keep a wide margin between the two. private const double FragmentedKThreshold = 1.5; - private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall - - private readonly string _deviceName; + private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall + private const float DeviceLostTimeoutSeconds = 1f; // no counter advance for this long = device gone + private const float RecoverRetrySeconds = 1f; + private const float DeviceRemovalTimeoutSeconds = 2f; // wait up to this for a lost device to leave the list + private const float RecoverSettleSeconds = 0.3f; // let the replacement device's driver come up before starting it + private const float PollIntervalSeconds = 0.05f; // cadence for PollUntil readiness checks + + // Cached yield instructions; WaitForSeconds is immutable, so reusing one avoids a per-yield + // allocation (and silences the analyzer hint). + private static readonly WaitForSeconds WaitPoll = new(PollIntervalSeconds); + private static readonly WaitForSeconds WaitDeviceRemoval = new(0.1f); + private static readonly WaitForSeconds WaitRecoverSettle = new(RecoverSettleSeconds); + private static readonly WaitForSeconds WaitRecoverRetry = new(RecoverRetrySeconds); + + private string _deviceName; public override event Action AudioRead; private bool _disposed = false; private bool _started = false; private volatile bool _capturing = false; + private bool _switching = false; + private bool _paused = false; + private int _captureGeneration = 0; private StreamingResampler _resampler; + /// The microphone device currently being captured. + public string DeviceName => _deviceName; + /// /// Creates a new microphone source for the given device. /// @@ -52,19 +70,19 @@ sealed public class MicrophoneSource : RtcAudioSource /// Unused; retained for compatibility. The microphone clip is read /// directly, so no scene GameObject/AudioSource is required. public MicrophoneSource(string deviceName, GameObject sourceObject) - : base(RtcAudioSourceType.AudioSourceMicrophone, TargetSampleRate, 1) + : base(RtcAudioSourceType.AudioSourceMicrophone, DefaultSampleRate, 1) { _deviceName = deviceName; } // The rate requested from Microphone.Start (a hint the platform may not honor), clamped to // the device's reported range. The authoritative data rate is clip.frequency afterwards. - private static int ResolveRequestedSampleRate(string deviceName) + private int ResolveRequestedSampleRate(string deviceName) { Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq); if (minFreq == 0 && maxFreq == 0) - return (int)TargetSampleRate; - return Mathf.Clamp((int)TargetSampleRate, minFreq, maxFreq); + return (int)ExpectedSampleRate; + return Mathf.Clamp((int)ExpectedSampleRate, minFreq, maxFreq); } /// @@ -91,7 +109,14 @@ public override void Start() _started = true; } - private IEnumerator StartMicrophone() + // quietFailure: during automatic recovery a failed start is expected (the replacement + // device's driver may not be startable yet) and is retried, so transient failures are + // logged at debug level instead of error to avoid spamming the console mid-handoff. Note: + // when Microphone.Start itself fails, Unity's native FMOD layer still logs its own error + // (e.g. "Starting microphone failed ... (80)"); that line originates inside the engine and + // cannot be suppressed from here — the settle delay before recovery's first attempt is what + // reduces how often it fires. + private IEnumerator StartMicrophone(bool quietFailure = false) { // Verify microphone is still authorized (could change during background) if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) @@ -100,12 +125,16 @@ private IEnumerator StartMicrophone() yield break; } + // Capture the device locally so a concurrent SwitchDevice can't mix two devices + // within one start sequence. + var device = _deviceName; + AudioClip clip = null; - int requestedRate = ResolveRequestedSampleRate(_deviceName); + int requestedRate = ResolveRequestedSampleRate(device); try { clip = Microphone.Start( - _deviceName, + device, loop: true, lengthSec: 2, frequency: requestedRate @@ -113,60 +142,191 @@ private IEnumerator StartMicrophone() } catch (Exception e) { - Utils.Error($"MicrophoneSource: Exception starting microphone: {e.Message}"); + LogStartIssue(quietFailure, $"MicrophoneSource: Exception starting microphone: {e.Message}"); yield break; } if (clip == null) { - Utils.Error("MicrophoneSource: Microphone.Start returned null, audio session may not be ready"); + LogStartIssue(quietFailure, "MicrophoneSource: Microphone.Start returned null, audio session may not be ready"); yield break; } // Wait for microphone to actually start producing data with a timeout const float timeout = 2f; + yield return PollUntil(() => Microphone.GetPosition(device) > 0, timeout); + + if (Microphone.GetPosition(device) <= 0) + { + LogStartIssue(quietFailure, $"MicrophoneSource: Microphone did not start producing data after {timeout}s"); + yield break; + } + + Utils.Info($"MicrophoneSource device='{device}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={requestedRate}Hz target={ExpectedSampleRate}Hz"); + + _capturing = true; + MonoBehaviourContext.RunCoroutine(CaptureLoop(clip, device, ++_captureGeneration)); + } + + private static void LogStartIssue(bool quiet, string msg) + { + if (quiet) Utils.Debug(msg); + else Utils.Error(msg); + } + + // Polls a condition every PollIntervalSeconds until it holds or the timeout elapses. + // Callers re-check the condition afterwards to distinguish success from timeout. + private IEnumerator PollUntil(Func done, float timeout) + { float elapsed = 0f; - while (Microphone.GetPosition(_deviceName) <= 0 && elapsed < timeout) + while (!done() && elapsed < timeout) { - yield return new WaitForSeconds(0.05f); - elapsed += 0.05f; + yield return WaitPoll; + elapsed += PollIntervalSeconds; } + } - if (Microphone.GetPosition(_deviceName) <= 0) + /// + /// Switches capture to a different microphone device while the published track keeps + /// working. The native source's format is fixed (48kHz mono) and captured audio is + /// resampled to it, so the track and its subscribers are unaffected; there is only a brief + /// capture gap (~0.5s) while the new device starts and its rate is measured. + /// + /// + /// Internal for now: device loss is handled automatically (see RecoverRoutine); this is + /// the manual primitive should a public device-picker API be needed later. + /// + /// The device to switch to. Use to + /// get the list of available devices. + internal void SwitchDevice(string deviceName) + { + if (_disposed) return; + if (deviceName == _deviceName) return; + if (_switching) { - Utils.Error($"MicrophoneSource: Microphone did not start producing data after {timeout}s"); - yield break; + Utils.Warning("MicrophoneSource: device switch already in progress, ignoring"); + return; } - Utils.Info($"MicrophoneSource device='{_deviceName}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={requestedRate}Hz target={TargetSampleRate}Hz"); + var previous = _deviceName; + _deviceName = deviceName; - _capturing = true; - MonoBehaviourContext.RunCoroutine(CaptureLoop(clip)); + // Not capturing yet: the next Start() simply uses the new device. + if (!_started) return; + + _switching = true; + MonoBehaviourContext.RunCoroutine(SwitchRoutine(previous)); + } + + private IEnumerator SwitchRoutine(string previousDevice) + { + _capturing = false; + if (Microphone.IsRecording(previousDevice)) + Microphone.End(previousDevice); + + yield return StartMicrophone(); + _switching = false; + Utils.Info($"MicrophoneSource: switched capture to device '{_deviceName}'"); + } + + // Recovers capture after the active device disappeared mid-call (e.g. a Bluetooth headset + // disconnected). Retries until a device is available: the original device is preferred if + // it comes back, otherwise capture falls back to the system default microphone. The + // published track is unaffected throughout — the native source's fixed format never + // changes, there is simply a capture gap until a device is acquired. + private IEnumerator RecoverRoutine(string lostDevice, int generation) + { + if (Microphone.IsRecording(lostDevice)) + Microphone.End(lostDevice); + + // Wait for the OS to finish removing the lost device before re-entering the audio + // subsystem. Touching Microphone (GetDeviceCaps / Start) mid-teardown makes FMOD log + // "Failed to get recording driver capabilities"; once the device drops out of the list + // the subsystem has settled. Reading the device list itself does not initialize a + // device, so it stays quiet. The retry loop below still covers slower replacements. + if (!string.IsNullOrEmpty(lostDevice)) + { + float waited = 0f; + while (waited < DeviceRemovalTimeoutSeconds + && Array.IndexOf(Microphone.devices, lostDevice) >= 0 + && _started && !_disposed && !_paused && generation == _captureGeneration) + { + yield return WaitDeviceRemoval; + waited += 0.1f; + } + + // The lost device has left the list, but the OS is still promoting the replacement + // default and FMOD has yet to make its recording driver startable. Starting now is + // what trips FMOD error 80; a brief settle lets the new driver come up so the first + // attempt usually succeeds rather than failing and retrying. + yield return WaitRecoverSettle; + } + + while (_started && !_disposed && !_paused && generation == _captureGeneration) + { + var devices = Microphone.devices; + if (devices.Length > 0) + { + // Prefer the original device if it reappeared; otherwise use the system default. + _deviceName = Array.IndexOf(devices, lostDevice) >= 0 ? lostDevice : null; + + int generationBefore = _captureGeneration; + yield return StartMicrophone(quietFailure: true); + if (_captureGeneration != generationBefore) + { + // A new CaptureLoop is running; recovery succeeded. + Utils.Info($"MicrophoneSource: recovered capture on device '{_deviceName ?? "(default)"}'"); + yield break; + } + } + yield return WaitRecoverRetry; + } } // Reads new samples from the clip's ring buffer each frame and pushes them to the native // source via AudioRead. MicClipReader decides what to read (including reconstructing // fragmented buffers); this loop is the thin Unity shell around it. Runs on the main - // thread; the native source's queue absorbs the per-frame pacing jitter. - private IEnumerator CaptureLoop(AudioClip clip) + // thread; the native source's queue absorbs the per-frame pacing jitter. The generation + // token retires this loop when a newer capture (restart or device switch) supersedes it. + private IEnumerator CaptureLoop(AudioClip clip, string device, int generation) { int clipFrames = clip.samples; int channels = clip.channels; int dataRate = clip.frequency > 0 ? clip.frequency : (int)DefaultMicrophoneSampleRate; var reader = new MicClipReader(clipFrames, dataRate, PreRollSeconds, FragmentedKThreshold, MaxBacklogSeconds, SettleSeconds); - _resampler = new StreamingResampler(dataRate, (int)TargetSampleRate); + _resampler = new StreamingResampler(dataRate, (int)ExpectedSampleRate); var ranges = new List(); var clock = System.Diagnostics.Stopwatch.StartNew(); bool announced = false; long reportedDrops = 0; - while (_capturing && !_disposed) + // Device-loss detection: the position counter advances continuously while a device is + // alive (even in silence), so a stalled counter or IsRecording dropping to false means + // the device disappeared (e.g. a Bluetooth headset disconnected mid-call). + int lastCounter = Microphone.GetPosition(device); + double lastAdvance = clock.Elapsed.TotalSeconds; + + while (_capturing && !_disposed && generation == _captureGeneration) { yield return null; + int counter = Microphone.GetPosition(device); + double now = clock.Elapsed.TotalSeconds; + if (counter != lastCounter) + { + lastCounter = counter; + lastAdvance = now; + } + else if (now - lastAdvance > DeviceLostTimeoutSeconds || !Microphone.IsRecording(device)) + { + Utils.Warning($"MicrophoneSource: device '{device}' stopped delivering audio; attempting recovery"); + MonoBehaviourContext.RunCoroutine(RecoverRoutine(device, generation)); + yield break; + } + ranges.Clear(); - reader.Update(Microphone.GetPosition(_deviceName), clock.Elapsed.TotalSeconds, ranges); + reader.Update(counter, now, ranges); if (!announced && reader.Ready) { @@ -189,7 +349,7 @@ private IEnumerator CaptureLoop(AudioClip clip) } // Reads a contiguous range, downmixes to mono, resamples clip.frequency -> - // TargetSampleRate (the resampler carries state across calls, so fragment junctions stay + // ExpectedSampleRate (the resampler carries state across calls, so fragment junctions stay // continuous), and fires AudioRead. private void ReadAndPush(AudioClip clip, int channels, int start, int count) { @@ -217,7 +377,7 @@ private void ReadAndPush(AudioClip clip, int channels, int start, int count) var output = _resampler.Process(mono, count); if (output.Length > 0) - AudioRead?.Invoke(output, 1, (int)TargetSampleRate); + AudioRead?.Invoke(output, 1, (int)ExpectedSampleRate); } /// @@ -244,6 +404,8 @@ private IEnumerator StopMicrophone() private void OnApplicationPause(bool pause) { + _paused = pause; + if (!_started) return; @@ -277,15 +439,10 @@ private IEnumerator WaitForMicrophoneReady() // Wait for microphone devices to become available again after iOS audio session interruption. // This is more reliable than a fixed delay because we wait for actual system readiness. const float timeout = 2f; - float elapsed = 0f; // On iOS, Microphone.devices may be empty immediately after resume while // AVAudioSession is recovering from interruption. Wait until devices are available. - while (Microphone.devices.Length == 0 && elapsed < timeout) - { - yield return new WaitForSeconds(0.05f); - elapsed += 0.05f; - } + yield return PollUntil(() => Microphone.devices.Length > 0, timeout); if (Microphone.devices.Length == 0) { diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index 43f5c102..9876744b 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -63,6 +63,13 @@ private sealed class PendingAudioFrame private readonly uint _expectedSampleRate; private readonly uint _expectedChannels; + /// The sample rate the native source was configured with; the format captured + /// audio must match (subclasses resample to this). + protected uint ExpectedSampleRate => _expectedSampleRate; + + /// The channel count the native source was configured with. + protected uint ExpectedChannels => _expectedChannels; + internal readonly FfiHandle Handle; protected AudioSourceInfo _info;