From edec6a1be94c5f0ff9d9cf5097883cb8304f1d01 Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Sun, 31 May 2026 14:09:13 +0900 Subject: [PATCH] oto: add a non-cgo ALSA fallback for the PulseAudio driver The PulseAudio driver requires a running PulseAudio server, which is absent on many minimal, container, kiosk, and embedded systems. When no server is reachable, fall back to ALSA by loading libasound dynamically through purego, keeping the fallback Cgo-free and preserving FreeBSD and NetBSD support. The context now picks a backend at startup much like the Windows driver chooses between WASAPI and WinMM. Closes #282 Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 8 + driver_alsa_unix.go | 348 ++++++++++++++++++++++++++++++++++++++ driver_pulseaudio_unix.go | 152 +++++++++++++++++ driver_unix.go | 155 ++++++----------- 4 files changed, 563 insertions(+), 100 deletions(-) create mode 100644 driver_alsa_unix.go create mode 100644 driver_pulseaudio_unix.go diff --git a/README.md b/README.md index 5958cc7..20831c6 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,14 @@ though BSD systems are not tested well. If the PulseAudio server is not discoverable automatically, set `PULSE_SERVER`. +When no PulseAudio server is reachable, Oto falls back to ALSA. This fallback also requires no Cgo: +`libasound.so.2` is loaded dynamically at runtime, so no ALSA development headers are needed to +build, though `libasound.so.2` itself must be present at runtime. + +On FreeBSD, building with `CGO_ENABLED=0` (for example when cross-compiling) additionally requires +`-gcflags="github.com/ebitengine/purego/internal/fakecgo=-std"`; native FreeBSD builds, where Cgo +is enabled by default, need nothing extra. + ## Usage The two main components of Oto are a `Context` and `Players`. The context handles interactions with diff --git a/driver_alsa_unix.go b/driver_alsa_unix.go new file mode 100644 index 0000000..7589809 --- /dev/null +++ b/driver_alsa_unix.go @@ -0,0 +1,348 @@ +// Copyright 2026 The Oto Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build ((linux && !android) || freebsd || netbsd) && !nintendosdk && !playstation5 + +package oto + +import ( + "fmt" + "strings" + "sync" + "unsafe" + + "github.com/ebitengine/purego" + + "github.com/ebitengine/oto/v3/internal/mux" +) + +const ( + _SND_PCM_STREAM_PLAYBACK = 0 + _SND_PCM_FORMAT_FLOAT_LE = 14 + _SND_PCM_ACCESS_RW_INTERLEAVED = 3 +) + +var ( + _snd_strerror func(errnum int32) string + _snd_pcm_open func(pcm *uintptr, name string, stream int32, mode int32) int32 + _snd_pcm_close func(pcm uintptr) int32 + _snd_pcm_hw_params_malloc func(ptr *uintptr) int32 + _snd_pcm_hw_params_free func(obj uintptr) + _snd_pcm_hw_params_any func(pcm, params uintptr) int32 + _snd_pcm_hw_params_set_access func(pcm, params uintptr, access uint32) int32 + _snd_pcm_hw_params_set_format func(pcm, params uintptr, format int32) int32 + _snd_pcm_hw_params_set_channels func(pcm, params uintptr, val uint32) int32 + _snd_pcm_hw_params_set_rate_resample func(pcm, params uintptr, val uint32) int32 + _snd_pcm_hw_params_set_rate_near func(pcm, params uintptr, val *uint32, dir *int32) int32 + _snd_pcm_hw_params_set_buffer_size_near func(pcm, params uintptr, val *uint) int32 + _snd_pcm_hw_params_set_period_size_near func(pcm, params uintptr, val *uint, dir *int32) int32 + _snd_pcm_hw_params func(pcm, params uintptr) int32 + _snd_pcm_writei func(pcm uintptr, buf []float32, size uint) int + _snd_pcm_recover func(pcm uintptr, err int32, silent int32) int32 + + _snd_device_name_hint func(card int32, iface string, hints *unsafe.Pointer) int32 + _snd_device_name_free_hint func(hints unsafe.Pointer) int32 + _snd_device_name_get_hint func(hint unsafe.Pointer, id string) unsafe.Pointer + + _free func(ptr unsafe.Pointer) +) + +func init() { + newALSAContext = func(sampleRate, channelCount int, mux *mux.Mux, bufferSizeInBytes int) (unixBackend, error) { + c, err := newALSAContextImpl(sampleRate, channelCount, mux, bufferSizeInBytes) + if err != nil { + return nil, err + } + return c, nil + } +} + +// loadALSA loads libasound and binds the functions above. A context is created at most once +// per process (see NewContext), so this runs at most once and needs no synchronization. +func loadALSA() error { + var handle uintptr + var err error + for _, name := range []string{"libasound.so.2", "libasound.so"} { + handle, err = purego.Dlopen(name, purego.RTLD_LAZY|purego.RTLD_GLOBAL) + if err == nil { + break + } + } + if handle == 0 { + return fmt.Errorf("oto: failed to load libasound: %w", err) + } + + purego.RegisterLibFunc(&_snd_strerror, handle, "snd_strerror") + purego.RegisterLibFunc(&_snd_pcm_open, handle, "snd_pcm_open") + purego.RegisterLibFunc(&_snd_pcm_close, handle, "snd_pcm_close") + purego.RegisterLibFunc(&_snd_pcm_hw_params_malloc, handle, "snd_pcm_hw_params_malloc") + purego.RegisterLibFunc(&_snd_pcm_hw_params_free, handle, "snd_pcm_hw_params_free") + purego.RegisterLibFunc(&_snd_pcm_hw_params_any, handle, "snd_pcm_hw_params_any") + purego.RegisterLibFunc(&_snd_pcm_hw_params_set_access, handle, "snd_pcm_hw_params_set_access") + purego.RegisterLibFunc(&_snd_pcm_hw_params_set_format, handle, "snd_pcm_hw_params_set_format") + purego.RegisterLibFunc(&_snd_pcm_hw_params_set_channels, handle, "snd_pcm_hw_params_set_channels") + purego.RegisterLibFunc(&_snd_pcm_hw_params_set_rate_resample, handle, "snd_pcm_hw_params_set_rate_resample") + purego.RegisterLibFunc(&_snd_pcm_hw_params_set_rate_near, handle, "snd_pcm_hw_params_set_rate_near") + purego.RegisterLibFunc(&_snd_pcm_hw_params_set_buffer_size_near, handle, "snd_pcm_hw_params_set_buffer_size_near") + purego.RegisterLibFunc(&_snd_pcm_hw_params_set_period_size_near, handle, "snd_pcm_hw_params_set_period_size_near") + purego.RegisterLibFunc(&_snd_pcm_hw_params, handle, "snd_pcm_hw_params") + purego.RegisterLibFunc(&_snd_pcm_writei, handle, "snd_pcm_writei") + purego.RegisterLibFunc(&_snd_pcm_recover, handle, "snd_pcm_recover") + purego.RegisterLibFunc(&_snd_device_name_hint, handle, "snd_device_name_hint") + purego.RegisterLibFunc(&_snd_device_name_free_hint, handle, "snd_device_name_free_hint") + purego.RegisterLibFunc(&_snd_device_name_get_hint, handle, "snd_device_name_get_hint") + + // libc free, for the strings snd_device_name_get_hint allocates. It resolves through the + // libasound handle, whose dependency tree includes libc, so no OS-specific libc name is needed. + purego.RegisterLibFunc(&_free, handle, "free") + return nil +} + +type alsaContext struct { + channelCount int + + suspended bool + + handle uintptr + + cond *sync.Cond + + mux *mux.Mux + err atomicError +} + +func newALSAContextImpl(sampleRate int, channelCount int, mux *mux.Mux, bufferSizeInBytes int) (*alsaContext, error) { + if channelCount != 1 && channelCount != 2 { + return nil, fmt.Errorf("oto: ALSA backend supports only mono or stereo output: %d", channelCount) + } + if err := loadALSA(); err != nil { + return nil, err + } + + c := &alsaContext{ + channelCount: channelCount, + cond: sync.NewCond(&sync.Mutex{}), + mux: mux, + } + + // Open a default ALSA audio device for blocking stream playback. + var openErrs []string + var handle uintptr + var found bool + for _, name := range deviceCandidates() { + if err := _snd_pcm_open(&handle, name, _SND_PCM_STREAM_PLAYBACK, 0); err < 0 { + openErrs = append(openErrs, fmt.Sprintf("%q: %s", name, _snd_strerror(err))) + continue + } + found = true + break + } + if !found { + return nil, fmt.Errorf("oto: ALSA error at snd_pcm_open: %s", strings.Join(openErrs, ", ")) + } + c.handle = handle + + // TODO: Should snd_pcm_hw_params_set_periods be called explicitly? + const periods = 2 + var periodSize uint + if bufferSizeInBytes != 0 { + periodSize = uint(bufferSizeInBytes / (channelCount * 4 * periods)) + if periodSize == 0 { + periodSize = 1 + } + } else { + periodSize = 1024 + } + bufferSize := periodSize * periods + if err := c.alsaPCMHwParams(sampleRate, channelCount, &bufferSize, &periodSize); err != nil { + _snd_pcm_close(c.handle) + return nil, err + } + + go func() { + // The loop only returns when readAndWrite hits a permanent error, so close the + // handle here to avoid leaking it after a terminal audio failure. + defer _snd_pcm_close(c.handle) + buf32 := make([]float32, int(periodSize)*channelCount) + for { + if !c.readAndWrite(buf32) { + return + } + } + }() + + return c, nil +} + +func (c *alsaContext) alsaPCMHwParams(sampleRate, channelCount int, bufferSize, periodSize *uint) error { + var params uintptr + if err := _snd_pcm_hw_params_malloc(¶ms); err < 0 { + return alsaError("snd_pcm_hw_params_malloc", err) + } + defer _snd_pcm_hw_params_free(params) + + if err := _snd_pcm_hw_params_any(c.handle, params); err < 0 { + return alsaError("snd_pcm_hw_params_any", err) + } + if err := _snd_pcm_hw_params_set_access(c.handle, params, _SND_PCM_ACCESS_RW_INTERLEAVED); err < 0 { + return alsaError("snd_pcm_hw_params_set_access", err) + } + if err := _snd_pcm_hw_params_set_format(c.handle, params, _SND_PCM_FORMAT_FLOAT_LE); err < 0 { + return alsaError("snd_pcm_hw_params_set_format", err) + } + if err := _snd_pcm_hw_params_set_channels(c.handle, params, uint32(channelCount)); err < 0 { + return alsaError("snd_pcm_hw_params_set_channels", err) + } + if err := _snd_pcm_hw_params_set_rate_resample(c.handle, params, 1); err < 0 { + return alsaError("snd_pcm_hw_params_set_rate_resample", err) + } + sr := uint32(sampleRate) + if err := _snd_pcm_hw_params_set_rate_near(c.handle, params, &sr, nil); err < 0 { + return alsaError("snd_pcm_hw_params_set_rate_near", err) + } + if err := _snd_pcm_hw_params_set_buffer_size_near(c.handle, params, bufferSize); err < 0 { + return alsaError("snd_pcm_hw_params_set_buffer_size_near", err) + } + if err := _snd_pcm_hw_params_set_period_size_near(c.handle, params, periodSize, nil); err < 0 { + return alsaError("snd_pcm_hw_params_set_period_size_near", err) + } + if err := _snd_pcm_hw_params(c.handle, params); err < 0 { + return alsaError("snd_pcm_hw_params", err) + } + return nil +} + +func (c *alsaContext) readAndWrite(buf32 []float32) bool { + c.cond.L.Lock() + defer c.cond.L.Unlock() + + for c.suspended && c.err.Load() == nil { + c.cond.Wait() + } + if c.err.Load() != nil { + return false + } + + c.mux.ReadFloat32s(buf32) + + buf := buf32 + for len(buf) > 0 { + n := _snd_pcm_writei(c.handle, buf, uint(len(buf)/c.channelCount)) + if n < 0 { + n = int(_snd_pcm_recover(c.handle, int32(n), 1)) + } + if n < 0 { + c.err.TryStore(alsaError("snd_pcm_writei or snd_pcm_recover", int32(n))) + return false + } + buf = buf[n*c.channelCount:] + } + return true +} + +func (c *alsaContext) Suspend() error { + c.cond.L.Lock() + defer c.cond.L.Unlock() + + if err := c.err.Load(); err != nil { + return err + } + + c.suspended = true + + // Do not use snd_pcm_pause as not all devices support this. + // Do not use snd_pcm_drop as this might hang (https://github.com/libsdl-org/SDL/blob/a5c610b0a3857d3138f3f3da1f6dc3172c5ea4a8/src/audio/alsa/SDL_alsa_audio.c#L478). + return nil +} + +func (c *alsaContext) Resume() error { + c.cond.L.Lock() + defer c.cond.L.Unlock() + + if err := c.err.Load(); err != nil { + return err + } + + c.suspended = false + c.cond.Signal() + return nil +} + +func (c *alsaContext) Err() error { + return c.err.Load() +} + +func alsaError(name string, errno int32) error { + return fmt.Errorf("oto: ALSA error at %s: %s", name, _snd_strerror(errno)) +} + +func deviceCandidates() []string { + const getAllDevices = -1 + + var hints unsafe.Pointer + if _snd_device_name_hint(getAllDevices, "pcm", &hints) != 0 { + return []string{"default", "plug:default"} + } + defer _snd_device_name_free_hint(hints) + + var devices []string + ptrSize := unsafe.Sizeof(uintptr(0)) + for i := uintptr(0); ; i++ { + hint := *(*unsafe.Pointer)(unsafe.Pointer(uintptr(hints) + i*ptrSize)) + if hint == nil { + break + } + + if hintString(hint, "IOID") == "Input" { + continue + } + + name := hintString(hint, "NAME") + switch name { + case "", "null", "default": + continue + } + devices = append(devices, name) + } + + return append([]string{"default", "plug:default"}, devices...) +} + +// hintString returns the value of the given hint key as a Go string, freeing the C string that +// libasound allocated for it. +func hintString(hint unsafe.Pointer, id string) string { + p := _snd_device_name_get_hint(hint, id) + if p == nil { + return "" + } + s := goString(p) + _free(p) + return s +} + +// goString copies a NUL-terminated C string into a Go string. +func goString(p unsafe.Pointer) string { + if p == nil { + return "" + } + var bs []byte + for i := uintptr(0); ; i++ { + b := *(*byte)(unsafe.Pointer(uintptr(p) + i)) + if b == 0 { + break + } + bs = append(bs, b) + } + return string(bs) +} diff --git a/driver_pulseaudio_unix.go b/driver_pulseaudio_unix.go new file mode 100644 index 0000000..20b1a71 --- /dev/null +++ b/driver_pulseaudio_unix.go @@ -0,0 +1,152 @@ +// Copyright 2026 The Oto Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !android && !darwin && !js && !windows && !nintendosdk && !playstation5 + +package oto + +import ( + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/jfreymuth/pulse" + + "github.com/ebitengine/oto/v3/internal/mux" +) + +type pulseContext struct { + client *pulse.Client + stream *pulse.PlaybackStream + + suspended bool + cond *sync.Cond + + mux *mux.Mux + err atomicError +} + +func newPulseContext(sampleRate int, channelCount int, mux *mux.Mux, bufferSizeInBytes int, applicationName string) (*pulseContext, error) { + c := &pulseContext{ + cond: sync.NewCond(&sync.Mutex{}), + mux: mux, + } + + if applicationName == "" { + if name, _ := os.Executable(); name != "" { + applicationName = filepath.Base(name) + } else { + applicationName = "Oto" + } + } + + client, err := pulse.NewClient(pulse.ClientApplicationName(applicationName)) + if err != nil { + return nil, fmt.Errorf("oto: PulseAudio client initialization failed: %w", err) + } + c.client = client + + options := []pulse.PlaybackOption{ + pulse.PlaybackMediaName(applicationName), + } + switch channelCount { + case 1: + options = append(options, pulse.PlaybackMono) + case 2: + options = append(options, pulse.PlaybackStereo) + default: + c.client.Close() + return nil, fmt.Errorf("oto: PulseAudio backend supports only mono or stereo output: %d", channelCount) + } + options = append(options, pulse.PlaybackSampleRate(sampleRate)) + { + latency := float64(bufferSizeInBytes) / float64(sampleRate*channelCount*4) + if latency <= 0 { + // If no buffer size is specified, default to a 100ms latency. + // Without this, PulseAudio uses its own large default buffer (~2s), + // which causes a noticeable delay before audio starts playing. + latency = 0.1 + } + options = append(options, pulse.PlaybackLatency(latency)) + } + + stream, err := c.client.NewPlayback(pulse.Float32Reader(c.read), options...) + if err != nil { + c.client.Close() + return nil, fmt.Errorf("oto: PulseAudio playback initialization failed: %w", err) + } + c.stream = stream + c.stream.Start() + + return c, nil +} + +func (c *pulseContext) read(buf []float32) (int, error) { + c.cond.L.Lock() + defer c.cond.L.Unlock() + + for c.suspended && c.err.Load() == nil { + c.cond.Wait() + } + if err := c.err.Load(); err != nil { + return 0, err + } + + c.mux.ReadFloat32s(buf) + return len(buf), nil +} + +func (c *pulseContext) Suspend() error { + c.cond.L.Lock() + defer c.cond.L.Unlock() + + if err := c.err.Load(); err != nil { + return err + } + if err := c.stream.Error(); err != nil { + return fmt.Errorf("oto: PulseAudio error: %w", err) + } + + c.suspended = true + c.stream.Pause() + return nil +} + +func (c *pulseContext) Resume() error { + c.cond.L.Lock() + defer c.cond.L.Unlock() + + if err := c.err.Load(); err != nil { + return err + } + if err := c.stream.Error(); err != nil { + return fmt.Errorf("oto: PulseAudio error: %w", err) + } + + c.suspended = false + c.stream.Resume() + c.cond.Signal() + return nil +} + +func (c *pulseContext) Err() error { + if err := c.err.Load(); err != nil { + return err + } + if err := c.stream.Error(); err != nil { + return fmt.Errorf("oto: PulseAudio error: %w", err) + } + return nil +} diff --git a/driver_unix.go b/driver_unix.go index 4dfa253..6e27a25 100644 --- a/driver_unix.go +++ b/driver_unix.go @@ -18,129 +18,77 @@ package oto import ( "fmt" - "os" - "path/filepath" - "sync" - - "github.com/jfreymuth/pulse" "github.com/ebitengine/oto/v3/internal/mux" ) -type context struct { - client *pulse.Client - stream *pulse.PlaybackStream +// unixBackend is the part of a context that talks to the actual audio device. +type unixBackend interface { + Suspend() error + Resume() error + Err() error +} - suspended bool - cond *sync.Cond +// newALSAContext creates the ALSA fallback backend. driver_alsa_unix.go's init sets it on the +// platforms where that file is built; it is nil elsewhere (e.g. OpenBSD), in which case only the +// PulseAudio backend is attempted. +var newALSAContext func(sampleRate, channelCount int, mux *mux.Mux, bufferSizeInBytes int) (unixBackend, error) + +type context struct { + mux *mux.Mux + backend unixBackend - mux *mux.Mux - err atomicError + ready chan struct{} + err atomicError } -func newContext(sampleRate int, channelCount int, format mux.Format, bufferSizeInBytes int, applicationName string) (client *context, ready chan struct{}, err error) { - client = &context{ - cond: sync.NewCond(&sync.Mutex{}), - mux: mux.New(sampleRate, channelCount, format), +func newContext(sampleRate int, channelCount int, format mux.Format, bufferSizeInBytes int, applicationName string) (*context, chan struct{}, error) { + ctx := &context{ + mux: mux.New(sampleRate, channelCount, format), + ready: make(chan struct{}), } - ready = make(chan struct{}) - close(ready) - defer func() { - if client != nil && client.client != nil && err != nil { - client.client.Close() - } - }() - if applicationName == "" { - if name, _ := os.Executable(); name != "" { - applicationName = filepath.Base(name) - } else { - applicationName = "Oto" - } - } + // Initializing a driver might take some time, so do it asynchronously. + // PulseAudio is the default; if no server is reachable, fall back to ALSA. + go func() { + defer close(ctx.ready) - client.client, err = pulse.NewClient(pulse.ClientApplicationName(applicationName)) - if err != nil { - return nil, ready, fmt.Errorf("oto: PulseAudio client initialization failed: %w", err) - } - - options := []pulse.PlaybackOption{ - pulse.PlaybackMediaName(applicationName), - } - switch channelCount { - case 1: - options = append(options, pulse.PlaybackMono) - case 2: - options = append(options, pulse.PlaybackStereo) - default: - return nil, ready, fmt.Errorf("oto: PulseAudio backend supports only mono or stereo output: %d", channelCount) - } - options = append(options, pulse.PlaybackSampleRate(sampleRate)) - { - latency := float64(bufferSizeInBytes) / float64(sampleRate*channelCount*4) - if latency <= 0 { - // If no buffer size is specified, default to a 100ms latency. - // Without this, PulseAudio uses its own large default buffer (~2s), - // which causes a noticeable delay before audio starts playing. - latency = 0.1 + pc, err0 := newPulseContext(sampleRate, channelCount, ctx.mux, bufferSizeInBytes, applicationName) + if err0 == nil { + ctx.backend = pc + return } - options = append(options, pulse.PlaybackLatency(latency)) - } - client.stream, err = client.client.NewPlayback(pulse.Float32Reader(client.read), options...) - if err != nil { - return nil, ready, fmt.Errorf("oto: PulseAudio playback initialization failed: %w", err) - } - client.stream.Start() - - return client, ready, nil -} + if newALSAContext == nil { + ctx.err.TryStore(err0) + return + } -func (c *context) read(buf []float32) (int, error) { - c.cond.L.Lock() - defer c.cond.L.Unlock() + ac, err1 := newALSAContext(sampleRate, channelCount, ctx.mux, bufferSizeInBytes) + if err1 == nil { + ctx.backend = ac + return + } - for c.suspended && c.err.Load() == nil { - c.cond.Wait() - } - if err := c.err.Load(); err != nil { - return 0, err - } + ctx.err.TryStore(fmt.Errorf("oto: initialization failed: PulseAudio: %w; ALSA: %w", err0, err1)) + }() - c.mux.ReadFloat32s(buf) - return len(buf), nil + return ctx, ctx.ready, nil } func (c *context) Suspend() error { - c.cond.L.Lock() - defer c.cond.L.Unlock() - - if err := c.err.Load(); err != nil { - return err + <-c.ready + if c.backend != nil { + return c.backend.Suspend() } - if err := c.stream.Error(); err != nil { - return fmt.Errorf("oto: PulseAudio error: %w", err) - } - - c.suspended = true - c.stream.Pause() return nil } func (c *context) Resume() error { - c.cond.L.Lock() - defer c.cond.L.Unlock() - - if err := c.err.Load(); err != nil { - return err + <-c.ready + if c.backend != nil { + return c.backend.Resume() } - if err := c.stream.Error(); err != nil { - return fmt.Errorf("oto: PulseAudio error: %w", err) - } - - c.suspended = false - c.stream.Resume() - c.cond.Signal() return nil } @@ -148,8 +96,15 @@ func (c *context) Err() error { if err := c.err.Load(); err != nil { return err } - if err := c.stream.Error(); err != nil { - return fmt.Errorf("oto: PulseAudio error: %w", err) + + select { + case <-c.ready: + default: + return nil + } + + if c.backend != nil { + return c.backend.Err() } return nil }