diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs index 3b63680b..8193090d 100644 --- a/Runtime/Scripts/BasicAudioSource.cs +++ b/Runtime/Scripts/BasicAudioSource.cs @@ -19,9 +19,11 @@ sealed public class BasicAudioSource : RtcAudioSource /// Creates a new basic audio source for the given in the scene. /// /// The to capture from. - /// The number of channels to capture. /// The type of audio source. - public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType) + /// + /// The sample rate and channel count are taken from Unity's audio configuration. + /// + public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType) { _source = source; } diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index 904b8da7..25626da4 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -14,12 +14,20 @@ namespace LiveKit sealed public class MicrophoneSource : RtcAudioSource { private readonly GameObject _sourceObject; + + // The device requested by the caller. Empty/null means "follow the OS default". private readonly string _deviceName; + // The device the microphone is actually recording from right now. This can differ from + // _deviceName when the preferred device is unavailable and we fall back to the OS default, + // so all Microphone.* calls (IsRecording/GetPosition/End) must use this name. + private string _activeDeviceName; + public override event Action AudioRead; private bool _disposed = false; private bool _started = false; + private bool _restarting = false; /// /// Creates a new microphone source for the given device. @@ -28,7 +36,7 @@ sealed public class MicrophoneSource : RtcAudioSource /// get the list of available devices. /// The GameObject to attach the AudioSource to. The object must be kept in the scene /// for the duration of the source's lifetime. - public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone) + public MicrophoneSource(string deviceName, GameObject sourceObject) : base(RtcAudioSourceType.AudioSourceMicrophone) { _deviceName = deviceName; _sourceObject = sourceObject; @@ -54,6 +62,10 @@ public override void Start() throw new InvalidOperationException("Microphone access not authorized"); MonoBehaviourContext.OnApplicationPauseEvent += OnApplicationPause; + // Restart capture when the system audio device changes (e.g. a Bluetooth headset is + // unplugged). Unity rebuilds its audio graph on a device change, which both detaches + // the AudioProbe tap and leaves Microphone.Start bound to a now-gone device. + AudioSettings.OnAudioConfigurationChanged += OnAudioConfigurationChanged; MonoBehaviourContext.RunCoroutine(StartMicrophone()); _started = true; @@ -75,14 +87,19 @@ private IEnumerator StartMicrophone() yield break; } + // Resolve which device to record from. Falls back to the OS default when the + // preferred device is gone, so an unplugged headset transparently hands off to the + // built-in microphone. + _activeDeviceName = ResolveCaptureDevice(); + AudioClip clip = null; try { clip = Microphone.Start( - _deviceName, + _activeDeviceName, loop: true, lengthSec: 1, - frequency: (int)DefaultMicrophoneSampleRate + frequency: (int)_expectedSampleRate ); } catch (Exception e) @@ -123,20 +140,20 @@ private IEnumerator StartMicrophone() // Wait for microphone to actually start producing data with a timeout const float timeout = 2f; float elapsed = 0f; - while (Microphone.GetPosition(_deviceName) <= 0 && elapsed < timeout) + while (Microphone.GetPosition(_activeDeviceName) <= 0 && elapsed < timeout) { yield return new WaitForSeconds(0.05f); elapsed += 0.05f; } - if (Microphone.GetPosition(_deviceName) <= 0) + if (Microphone.GetPosition(_activeDeviceName) <= 0) { Utils.Error($"MicrophoneSource: Microphone did not start producing data after {timeout}s"); yield break; } source.Play(); - Utils.Debug($"MicrophoneSource device='{_deviceName}' started successfully"); + Utils.Debug($"MicrophoneSource device='{_activeDeviceName ?? ""}' started successfully"); } /// @@ -147,13 +164,14 @@ public override void Stop() base.Stop(); MonoBehaviourContext.RunCoroutine(StopMicrophone()); MonoBehaviourContext.OnApplicationPauseEvent -= OnApplicationPause; + AudioSettings.OnAudioConfigurationChanged -= OnAudioConfigurationChanged; _started = false; } private IEnumerator StopMicrophone() { - if (Microphone.IsRecording(_deviceName)) - Microphone.End(_deviceName); + if (Microphone.IsRecording(_activeDeviceName)) + Microphone.End(_activeDeviceName); // Check if GameObject is still valid before trying to access components if (_sourceObject != null) @@ -170,7 +188,7 @@ private IEnumerator StopMicrophone() UnityEngine.Object.Destroy(source); } - Utils.Debug($"MicrophoneSource device='{_deviceName}' stopped"); + Utils.Debug($"MicrophoneSource device='{_activeDeviceName ?? ""}' stopped"); yield return null; } @@ -197,16 +215,73 @@ private void OnApplicationPause(bool pause) } } - private IEnumerator RestartMicrophone() + // Picks the device name to pass to Microphone.Start. An empty preferred name, or a + // preferred device that is no longer connected, resolves to null so Unity records from + // the current OS default device. + private string ResolveCaptureDevice() + { + if (string.IsNullOrEmpty(_deviceName)) + return null; + + if (Array.IndexOf(Microphone.devices, _deviceName) >= 0) + return _deviceName; + + Utils.Debug($"MicrophoneSource: preferred device '{_deviceName}' is no longer available, falling back to the OS default"); + return null; + } + + // Fires on the main thread when Unity's audio configuration changes, including when the + // system capture/playback device changes (e.g. unplugging a Bluetooth headset). Mirrors + // AudioStream.OnAudioConfigurationChanged on the playback side. + private void OnAudioConfigurationChanged(bool deviceWasChanged) + { + if (!_started) + return; + + // The native source rejects frames whose rate/channels don't match how it was + // created. If the device change moved Unity's output format, the source must be + // recreated at the new format (and its track re-bound) — otherwise restarting capture + // alone won't recover audio. RtcAudioSource.Reconfigure handles the recreation; we + // run it inside the restart while capture is paused. + var (newRate, newChannels) = ResolveDeviceFormat(); + bool formatChanged = newRate != _expectedSampleRate || newChannels != _expectedChannels; + + if (formatChanged) + { + Utils.Debug($"MicrophoneSource: DSP format changed to {newRate}/{newChannels}, recreating native source and restarting capture"); + MonoBehaviourContext.RunCoroutine(RestartMicrophone(newRate, newChannels)); + } + else if (deviceWasChanged) + { + Utils.Debug("MicrophoneSource: audio device changed, restarting capture on the current default device"); + MonoBehaviourContext.RunCoroutine(RestartMicrophone()); + } + } + + private IEnumerator RestartMicrophone(uint reconfigureRate = 0, uint reconfigureChannels = 0) { + // The device-change event can fire several times around a single hardware swap; + // ignore re-entrant restarts so overlapping Stop/Start coroutines don't race. + if (_restarting) + yield break; + _restarting = true; + yield return StopMicrophone(); + // With capture stopped (no AudioRead callbacks in flight), it's safe to recreate the + // native source at the new format. This raises FormatChanged so the owning track is + // re-bound to the new handle. + if (reconfigureRate > 0 && reconfigureChannels > 0) + Reconfigure(reconfigureRate, reconfigureChannels); + // Wait for iOS audio session to be ready before attempting to restart. // On iOS, after app resumes from background, the audio session needs time to // recover from interruption. Poll for readiness instead of using arbitrary delay. yield return WaitForMicrophoneReady(); yield return StartMicrophone(); + + _restarting = false; } private IEnumerator WaitForMicrophoneReady() diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index a9af8a0a..f9e22b98 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -46,26 +46,25 @@ private sealed class PendingAudioFrame /// public abstract event Action AudioRead; -#if UNITY_IOS && !UNITY_EDITOR - // iOS microphone sample rate is 24k - public static uint DefaultMicrophoneSampleRate = 24000; - - public static uint DefaultSampleRate = 48000; -#else - public static uint DefaultSampleRate = 48000; - public static uint DefaultMicrophoneSampleRate = DefaultSampleRate; -#endif - public static uint DefaultChannels = 2; - private readonly RtcAudioSourceType _sourceType; public RtcAudioSourceType SourceType => _sourceType; private readonly int _debugId = Interlocked.Increment(ref nextDebugId); - private readonly uint _expectedSampleRate; - private readonly uint _expectedChannels; - internal readonly FfiHandle Handle; + // The format the native source is configured for. Mutable because Reconfigure() can + // recreate the source at a new format when the audio device's rate/channels change. + internal uint _expectedSampleRate; + internal uint _expectedChannels; + + internal FfiHandle Handle; protected AudioSourceInfo _info; + /// + /// Raised after the native audio source has been recreated at a new format (see + /// ). The source's changes, so any track + /// bound to the previous handle must be recreated against the new one. + /// + public event Action FormatChanged; + // CaptureAudioFrame is asynchronous: the native side can continue reading from the PCM // pointer after request.Send() returns and encode it later on another queue. Because of // that, a single reusable NativeArray is unsafe here; the next AudioRead callback can @@ -83,20 +82,41 @@ private sealed class PendingAudioFrame private volatile bool _disposed = false; private int _audioReadCount = 0; - protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom) + // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of + // time — it is whatever Unity's audio graph delivers. They use this constructor, which + // configures the native source from Unity's current output configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType) + : this(audioSourceType, 0, 0) { } + + // Sources that generate a fixed, known format (e.g. test signal generators) declare it + // directly. Passing 0 for either value falls back to the device configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels) { _sourceType = audioSourceType; - _expectedChannels = (uint)channels; + if (sampleRate > 0 && channels > 0) + { + _expectedSampleRate = sampleRate; + _expectedChannels = channels; + } + else + { + (_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat(); + } + + CreateNativeSource(); + } + + // Creates the native FFI audio source for the current _expectedSampleRate/_expectedChannels + // and stores its handle. Called once from the constructor and again from Reconfigure() when + // the format changes. + private void CreateNativeSource() + { using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; newAudioSource.Type = AudioSourceType.AudioSourceNative; - newAudioSource.NumChannels = (uint)channels; - newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ? - DefaultMicrophoneSampleRate : DefaultSampleRate; - _expectedSampleRate = newAudioSource.SampleRate; - - Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}"); + newAudioSource.NumChannels = _expectedChannels; + newAudioSource.SampleRate = _expectedSampleRate; newAudioSource.Options = request.TempResource(); newAudioSource.Options.EchoCancellation = true; @@ -109,6 +129,67 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); } + /// + /// Recreates the native audio source at a new format. The Rust FFI source does not + /// resample and rejects frames whose rate/channels differ from how it was created, so when + /// the capture device moves Unity's output format we must build a fresh source. + /// + /// + /// Must be called while capture is paused (no callbacks in flight), + /// because it disposes and replaces . Raises + /// on success so the owner can re-bind any track to the new handle. + /// + /// True if the source was recreated; false if the format was unchanged or invalid. + public bool Reconfigure(uint sampleRate, uint channels) + { + if (_disposed) return false; + if (sampleRate == 0 || channels == 0) return false; + if (sampleRate == _expectedSampleRate && channels == _expectedChannels) return false; + + Utils.Debug($"{DebugTag} reconfigure {_expectedSampleRate}/{_expectedChannels} -> {sampleRate}/{channels}"); + + // The native source stays alive as long as a track references it, so disposing our + // handle here is safe even before the old track is unpublished. + Handle?.Dispose(); + _expectedSampleRate = sampleRate; + _expectedChannels = channels; + CreateNativeSource(); + + FormatChanged?.Invoke(); + return true; + } + + // Reads Unity's actual output audio configuration. The capture path delivers buffers at the + // DSP output rate/channel count (see AudioProbe), so this is the format the native source + // must match. Falls back to the platform defaults when Unity cannot report a configuration + // (e.g. batch mode without an audio device). + protected (uint sampleRate, uint channels) ResolveDeviceFormat() + { + var config = UnityEngine.AudioSettings.GetConfiguration(); + var sampleRate = (uint)config.sampleRate; + var configuredChannels = SpeakerModeChannels(config.speakerMode); + var channels = configuredChannels; + + Utils.Info($"Configured native audio source with sampleRate {sampleRate} and channels {channels}"); + + return (sampleRate, channels); + } + + private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode) + { + switch (mode) + { + case UnityEngine.AudioSpeakerMode.Mono: return 1; + case UnityEngine.AudioSpeakerMode.Stereo: return 2; + case UnityEngine.AudioSpeakerMode.Quad: return 4; + case UnityEngine.AudioSpeakerMode.Surround: return 5; + case UnityEngine.AudioSpeakerMode.Mode5point1: return 6; + case UnityEngine.AudioSpeakerMode.Mode7point1: return 8; + case UnityEngine.AudioSpeakerMode.Prologic: return 2; + default: return 0; + } + } + /// /// Begin capturing audio samples from the underlying source. /// diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs index 225c7a0c..241e47aa 100644 --- a/Samples~/Meet/Assets/Runtime/MeetManager.cs +++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs @@ -453,8 +453,9 @@ private IEnumerator PublishLocalMicrophone() { if (_audioObjects.ContainsKey(LocalAudioTrackName)) yield break; + // Start the microphone here for early iOS permission request and android getting access to Microphone.devices Microphone.Start(null, true, 10, 44100); - + var audioObject = new GameObject($"My Microphone: {Microphone.devices[0]}"); audioObject.transform.SetParent(_audioTrackParent); @@ -476,14 +477,53 @@ private IEnumerator PublishLocalMicrophone() _microphoneActive = true; _audioObjects[LocalAudioTrackName] = audioObject; _localRtcAudioSource = rtcSource; + // When the capture device changes to one with a different sample rate, the source + // recreates its native handle; re-bind the published track to the new handle. + rtcSource.FormatChanged += OnLocalMicrophoneFormatChanged; rtcSource.Start(); if (_participantTiles.TryGetValue(_localId, out var tile)) tile.SetMicMuted(false); } + // Raised (on the main thread) after the local microphone source recreated its native handle + // at a new format. The old track is bound to the now-disposed handle, so republish. + private void OnLocalMicrophoneFormatChanged() + { + StartCoroutine(RepublishLocalMicrophone()); + } + + private IEnumerator RepublishLocalMicrophone() + { + if (_localRtcAudioSource == null || _room == null) yield break; + + if (_localAudioTrack != null) + { + _room.LocalParticipant.UnpublishTrack(_localAudioTrack, false); + _localAudioTrack = null; + } + + _localAudioTrack = LocalAudioTrack.CreateAudioTrack(LocalAudioTrackName, _localRtcAudioSource, _room); + + var options = new TrackPublishOptions + { + AudioEncoding = new AudioEncoding { MaxBitrate = 64000 }, + Source = TrackSource.SourceMicrophone + }; + + var publish = _room.LocalParticipant.PublishTrack(_localAudioTrack, options); + yield return publish; + + if (publish.IsError) + Debug.LogError("Failed to republish local microphone after format change"); + else + Debug.Log("Republished local microphone track after audio format change"); + } + private void UnpublishLocalMicrophone() { + if (_localRtcAudioSource != null) + _localRtcAudioSource.FormatChanged -= OnLocalMicrophoneFormatChanged; DisposeSource(ref _localRtcAudioSource); if (_audioObjects.TryGetValue(LocalAudioTrackName, out var obj)) @@ -561,6 +601,8 @@ private static void DisposeSource(ref T source) where T : class, System.IDisp private void CleanUpAllTracks() { + if (_localRtcAudioSource != null) + _localRtcAudioSource.FormatChanged -= OnLocalMicrophoneFormatChanged; DisposeSource(ref _localRtcAudioSource); DisposeSource(ref _localRtcVideoSource); diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs index 907e9ccc..2337615b 100644 --- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs +++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs @@ -31,7 +31,7 @@ public SineWaveAudioSource( int sampleRate = 48000, double frequencyHz = 440.0, float amplitude = 0.1f) - : base(channels, RtcAudioSourceType.AudioSourceCustom) + : base(RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate, (uint)channels) { _channels = channels; _sampleRate = sampleRate;