Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Runtime/Scripts/BasicAudioSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ sealed public class BasicAudioSource : RtcAudioSource
/// Creates a new basic audio source for the given <see cref="AudioSource"/> in the scene.
/// </summary>
/// <param name="source">The <see cref="AudioSource"/> to capture from.</param>
/// <param name="channels">The number of channels to capture.</param>
/// <param name="sourceType">The type of audio source.</param>
public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType)
/// <remarks>
/// The sample rate and channel count are taken from Unity's audio configuration.
/// </remarks>
public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType)
{
_source = source;
}
Expand Down
95 changes: 85 additions & 10 deletions Runtime/Scripts/MicrophoneSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,20 @@ namespace LiveKit
sealed public class MicrophoneSource : RtcAudioSource
{
private readonly GameObject _sourceObject;

// The device requested by the caller. Empty/null means "follow the OS default".
private readonly string _deviceName;

// The device the microphone is actually recording from right now. This can differ from
// _deviceName when the preferred device is unavailable and we fall back to the OS default,
// so all Microphone.* calls (IsRecording/GetPosition/End) must use this name.
private string _activeDeviceName;

public override event Action<float[], int, int> AudioRead;

private bool _disposed = false;
private bool _started = false;
private bool _restarting = false;

/// <summary>
/// Creates a new microphone source for the given device.
Expand All @@ -28,7 +36,7 @@ sealed public class MicrophoneSource : RtcAudioSource
/// get the list of available devices.</param>
/// <param name="sourceObject">The GameObject to attach the AudioSource to. The object must be kept in the scene
/// for the duration of the source's lifetime.</param>
public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone)
public MicrophoneSource(string deviceName, GameObject sourceObject) : base(RtcAudioSourceType.AudioSourceMicrophone)
{
_deviceName = deviceName;
_sourceObject = sourceObject;
Expand All @@ -54,6 +62,10 @@ public override void Start()
throw new InvalidOperationException("Microphone access not authorized");

MonoBehaviourContext.OnApplicationPauseEvent += OnApplicationPause;
// Restart capture when the system audio device changes (e.g. a Bluetooth headset is
// unplugged). Unity rebuilds its audio graph on a device change, which both detaches
// the AudioProbe tap and leaves Microphone.Start bound to a now-gone device.
AudioSettings.OnAudioConfigurationChanged += OnAudioConfigurationChanged;
MonoBehaviourContext.RunCoroutine(StartMicrophone());

_started = true;
Expand All @@ -75,14 +87,19 @@ private IEnumerator StartMicrophone()
yield break;
}

// Resolve which device to record from. Falls back to the OS default when the
// preferred device is gone, so an unplugged headset transparently hands off to the
// built-in microphone.
_activeDeviceName = ResolveCaptureDevice();

AudioClip clip = null;
try
{
clip = Microphone.Start(
_deviceName,
_activeDeviceName,
loop: true,
lengthSec: 1,
frequency: (int)DefaultMicrophoneSampleRate
frequency: (int)_expectedSampleRate
);
}
catch (Exception e)
Expand Down Expand Up @@ -123,20 +140,20 @@ private IEnumerator StartMicrophone()
// Wait for microphone to actually start producing data with a timeout
const float timeout = 2f;
float elapsed = 0f;
while (Microphone.GetPosition(_deviceName) <= 0 && elapsed < timeout)
while (Microphone.GetPosition(_activeDeviceName) <= 0 && elapsed < timeout)
{
yield return new WaitForSeconds(0.05f);
elapsed += 0.05f;
}

if (Microphone.GetPosition(_deviceName) <= 0)
if (Microphone.GetPosition(_activeDeviceName) <= 0)
{
Utils.Error($"MicrophoneSource: Microphone did not start producing data after {timeout}s");
yield break;
}

source.Play();
Utils.Debug($"MicrophoneSource device='{_deviceName}' started successfully");
Utils.Debug($"MicrophoneSource device='{_activeDeviceName ?? "<default>"}' started successfully");
}

/// <summary>
Expand All @@ -147,13 +164,14 @@ public override void Stop()
base.Stop();
MonoBehaviourContext.RunCoroutine(StopMicrophone());
MonoBehaviourContext.OnApplicationPauseEvent -= OnApplicationPause;
AudioSettings.OnAudioConfigurationChanged -= OnAudioConfigurationChanged;
_started = false;
}

private IEnumerator StopMicrophone()
{
if (Microphone.IsRecording(_deviceName))
Microphone.End(_deviceName);
if (Microphone.IsRecording(_activeDeviceName))
Microphone.End(_activeDeviceName);

// Check if GameObject is still valid before trying to access components
if (_sourceObject != null)
Expand All @@ -170,7 +188,7 @@ private IEnumerator StopMicrophone()
UnityEngine.Object.Destroy(source);
}

Utils.Debug($"MicrophoneSource device='{_deviceName}' stopped");
Utils.Debug($"MicrophoneSource device='{_activeDeviceName ?? "<default>"}' stopped");
yield return null;
}

Expand All @@ -197,16 +215,73 @@ private void OnApplicationPause(bool pause)
}
}

private IEnumerator RestartMicrophone()
// Picks the device name to pass to Microphone.Start. An empty preferred name, or a
// preferred device that is no longer connected, resolves to null so Unity records from
// the current OS default device.
private string ResolveCaptureDevice()
{
if (string.IsNullOrEmpty(_deviceName))
return null;

if (Array.IndexOf(Microphone.devices, _deviceName) >= 0)
return _deviceName;

Utils.Debug($"MicrophoneSource: preferred device '{_deviceName}' is no longer available, falling back to the OS default");
return null;
}

// Fires on the main thread when Unity's audio configuration changes, including when the
// system capture/playback device changes (e.g. unplugging a Bluetooth headset). Mirrors
// AudioStream.OnAudioConfigurationChanged on the playback side.
private void OnAudioConfigurationChanged(bool deviceWasChanged)
{
if (!_started)
return;

// The native source rejects frames whose rate/channels don't match how it was
// created. If the device change moved Unity's output format, the source must be
// recreated at the new format (and its track re-bound) — otherwise restarting capture
// alone won't recover audio. RtcAudioSource.Reconfigure handles the recreation; we
// run it inside the restart while capture is paused.
var (newRate, newChannels) = ResolveDeviceFormat();
bool formatChanged = newRate != _expectedSampleRate || newChannels != _expectedChannels;

if (formatChanged)
{
Utils.Debug($"MicrophoneSource: DSP format changed to {newRate}/{newChannels}, recreating native source and restarting capture");
MonoBehaviourContext.RunCoroutine(RestartMicrophone(newRate, newChannels));
}
else if (deviceWasChanged)
{
Utils.Debug("MicrophoneSource: audio device changed, restarting capture on the current default device");
MonoBehaviourContext.RunCoroutine(RestartMicrophone());
}
}

private IEnumerator RestartMicrophone(uint reconfigureRate = 0, uint reconfigureChannels = 0)
{
// The device-change event can fire several times around a single hardware swap;
// ignore re-entrant restarts so overlapping Stop/Start coroutines don't race.
if (_restarting)
yield break;
_restarting = true;

yield return StopMicrophone();

// With capture stopped (no AudioRead callbacks in flight), it's safe to recreate the
// native source at the new format. This raises FormatChanged so the owning track is
// re-bound to the new handle.
if (reconfigureRate > 0 && reconfigureChannels > 0)
Reconfigure(reconfigureRate, reconfigureChannels);

// Wait for iOS audio session to be ready before attempting to restart.
// On iOS, after app resumes from background, the audio session needs time to
// recover from interruption. Poll for readiness instead of using arbitrary delay.
yield return WaitForMicrophoneReady();

yield return StartMicrophone();

_restarting = false;
}

private IEnumerator WaitForMicrophoneReady()
Expand Down
125 changes: 103 additions & 22 deletions Runtime/Scripts/RtcAudioSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,25 @@ private sealed class PendingAudioFrame
/// </remarks>
public abstract event Action<float[], int, int> AudioRead;

#if UNITY_IOS && !UNITY_EDITOR
// iOS microphone sample rate is 24k
public static uint DefaultMicrophoneSampleRate = 24000;

public static uint DefaultSampleRate = 48000;
#else
public static uint DefaultSampleRate = 48000;
public static uint DefaultMicrophoneSampleRate = DefaultSampleRate;
#endif
public static uint DefaultChannels = 2;

private readonly RtcAudioSourceType _sourceType;
public RtcAudioSourceType SourceType => _sourceType;
private readonly int _debugId = Interlocked.Increment(ref nextDebugId);
private readonly uint _expectedSampleRate;
private readonly uint _expectedChannels;

internal readonly FfiHandle Handle;
// The format the native source is configured for. Mutable because Reconfigure() can
// recreate the source at a new format when the audio device's rate/channels change.
internal uint _expectedSampleRate;
internal uint _expectedChannels;

internal FfiHandle Handle;
protected AudioSourceInfo _info;

/// <summary>
/// Raised after the native audio source has been recreated at a new format (see
/// <see cref="Reconfigure"/>). The source's <see cref="Handle"/> changes, so any track
/// bound to the previous handle must be recreated against the new one.
/// </summary>
public event Action FormatChanged;

// CaptureAudioFrame is asynchronous: the native side can continue reading from the PCM
// pointer after request.Send() returns and encode it later on another queue. Because of
// that, a single reusable NativeArray is unsafe here; the next AudioRead callback can
Expand All @@ -83,20 +82,41 @@ private sealed class PendingAudioFrame
private volatile bool _disposed = false;
private int _audioReadCount = 0;

protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom)
// Device-capture sources (microphone, AudioSource taps) don't know their format ahead of
// time — it is whatever Unity's audio graph delivers. They use this constructor, which
// configures the native source from Unity's current output configuration.
protected RtcAudioSource(RtcAudioSourceType audioSourceType)
: this(audioSourceType, 0, 0) { }

// Sources that generate a fixed, known format (e.g. test signal generators) declare it
// directly. Passing 0 for either value falls back to the device configuration.
protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels)
{
_sourceType = audioSourceType;
_expectedChannels = (uint)channels;

if (sampleRate > 0 && channels > 0)
{
_expectedSampleRate = sampleRate;
_expectedChannels = channels;
}
else
{
(_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat();
}

CreateNativeSource();
}

// Creates the native FFI audio source for the current _expectedSampleRate/_expectedChannels
// and stores its handle. Called once from the constructor and again from Reconfigure() when
// the format changes.
private void CreateNativeSource()
{
using var request = FFIBridge.Instance.NewRequest<NewAudioSourceRequest>();
var newAudioSource = request.request;
newAudioSource.Type = AudioSourceType.AudioSourceNative;
newAudioSource.NumChannels = (uint)channels;
newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ?
DefaultMicrophoneSampleRate : DefaultSampleRate;
_expectedSampleRate = newAudioSource.SampleRate;

Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}");
newAudioSource.NumChannels = _expectedChannels;
newAudioSource.SampleRate = _expectedSampleRate;

newAudioSource.Options = request.TempResource<AudioSourceOptions>();
newAudioSource.Options.EchoCancellation = true;
Expand All @@ -109,6 +129,67 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType =
Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}");
}

/// <summary>
/// Recreates the native audio source at a new format. The Rust FFI source does not
/// resample and rejects frames whose rate/channels differ from how it was created, so when
/// the capture device moves Unity's output format we must build a fresh source.
/// </summary>
/// <remarks>
/// Must be called while capture is paused (no <see cref="AudioRead"/> callbacks in flight),
/// because it disposes and replaces <see cref="Handle"/>. Raises <see cref="FormatChanged"/>
/// on success so the owner can re-bind any track to the new handle.
/// </remarks>
/// <returns>True if the source was recreated; false if the format was unchanged or invalid.</returns>
public bool Reconfigure(uint sampleRate, uint channels)
{
if (_disposed) return false;
if (sampleRate == 0 || channels == 0) return false;
if (sampleRate == _expectedSampleRate && channels == _expectedChannels) return false;

Utils.Debug($"{DebugTag} reconfigure {_expectedSampleRate}/{_expectedChannels} -> {sampleRate}/{channels}");

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this change, we can have:

LiveKit: MicrophoneSource: audio device change moved the DSP output rate to 44100Hz, but the native source is fixed at 48000Hz. Captured frames will be dropped until the track is recreated at the new rate.
UnityEngine.Logger:LogWarning (string,object)
LiveKit.Internal.Utils:Warning (object) (at /Users/maxheimbrock/dev/unity/client-sdk-unity/Runtime/Scripts/Internal/Utils.cs:41)
LiveKit.MicrophoneSource:OnAudioConfigurationChanged (bool) (at /Users/maxheimbrock/dev/unity/client-sdk-unity/Runtime/Scripts/MicrophoneSource.cs:248)
UnityEngine.AudioSettings:InvokeOnAudioConfigurationChanged (bool) (at /Users/bokken/build/output/unity/unity/Modules/Audio/Public/ScriptBindings/Audio.bindings.cs:413)


// The native source stays alive as long as a track references it, so disposing our
// handle here is safe even before the old track is unpublished.
Handle?.Dispose();
_expectedSampleRate = sampleRate;
_expectedChannels = channels;
CreateNativeSource();

FormatChanged?.Invoke();
return true;
}

// Reads Unity's actual output audio configuration. The capture path delivers buffers at the
// DSP output rate/channel count (see AudioProbe), so this is the format the native source
// must match. Falls back to the platform defaults when Unity cannot report a configuration
// (e.g. batch mode without an audio device).
protected (uint sampleRate, uint channels) ResolveDeviceFormat()
{
var config = UnityEngine.AudioSettings.GetConfiguration();
var sampleRate = (uint)config.sampleRate;
var configuredChannels = SpeakerModeChannels(config.speakerMode);
var channels = configuredChannels;

Utils.Info($"Configured native audio source with sampleRate {sampleRate} and channels {channels}");

return (sampleRate, channels);
}

private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode)
{
switch (mode)
{
case UnityEngine.AudioSpeakerMode.Mono: return 1;
case UnityEngine.AudioSpeakerMode.Stereo: return 2;
case UnityEngine.AudioSpeakerMode.Quad: return 4;
case UnityEngine.AudioSpeakerMode.Surround: return 5;
case UnityEngine.AudioSpeakerMode.Mode5point1: return 6;
case UnityEngine.AudioSpeakerMode.Mode7point1: return 8;
case UnityEngine.AudioSpeakerMode.Prologic: return 2;
default: return 0;
}
}

/// <summary>
/// Begin capturing audio samples from the underlying source.
/// </summary>
Expand Down
Loading
Loading