diff --git a/.github/workflows/plugins.yml b/.github/workflows/plugins.yml index ff07b04a..3c59a851 100644 --- a/.github/workflows/plugins.yml +++ b/.github/workflows/plugins.yml @@ -55,6 +55,10 @@ jobs: working-directory: plugins/native/nllb run: cargo fmt -- --check + - name: Check formatting - Moonshine + working-directory: plugins/native/moonshine + run: cargo fmt -- --check + # Lint plugins that can build without pre-installed native libraries lint-simple: name: Lint (Simple Plugins) @@ -163,6 +167,34 @@ jobs: working-directory: plugins/native/nllb run: cargo clippy -- -D warnings + # Lint Moonshine plugin (builds moonshine C++ core from source, needs ORT) + lint-moonshine: + name: Lint (Moonshine) + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v5 + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake pkg-config libclang-dev + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: "1.92.0" + components: clippy + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: | + plugins/native/moonshine + cache-on-failure: true + + - name: Clippy - Moonshine + working-directory: plugins/native/moonshine + run: cargo clippy -- -D warnings + # Lint sherpa-onnx based plugins (Kokoro, Piper, Matcha, SenseVoice) lint-sherpa: name: Lint (Sherpa-ONNX Plugins) diff --git a/plugins/native/moonshine/Cargo.lock b/plugins/native/moonshine/Cargo.lock index 1664cfbd..df8e0b15 100644 --- a/plugins/native/moonshine/Cargo.lock +++ b/plugins/native/moonshine/Cargo.lock @@ -25,12 +25,28 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "dyn-clone" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "futures-core" version = "0.3.32" @@ -59,6 +75,7 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" name = "moonshine-plugin-native" version = "0.1.0" dependencies = [ + "cc", "serde", "serde_json", "streamkit-plugin-sdk-native", @@ -194,6 +211,12 @@ dependencies = [ "zmij", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.15.1" diff --git a/plugins/native/moonshine/Cargo.toml b/plugins/native/moonshine/Cargo.toml index f3978a7d..69f163fb 100644 --- a/plugins/native/moonshine/Cargo.toml +++ b/plugins/native/moonshine/Cargo.toml @@ -18,6 +18,9 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" tracing = "0.1" +[build-dependencies] +cc = "1.2" + [lints.clippy] # Categories pedantic = { level = "warn", priority = -1 } diff --git a/plugins/native/moonshine/build.rs b/plugins/native/moonshine/build.rs index 28cc1b41..3306f1c9 100644 --- a/plugins/native/moonshine/build.rs +++ b/plugins/native/moonshine/build.rs @@ -2,19 +2,302 @@ // // SPDX-License-Identifier: MPL-2.0 -// Allow: println! in build.rs is the standard way to communicate with Cargo, not logging -#![allow(clippy::disallowed_macros)] +//! Build script for the Moonshine native plugin. +//! +//! Compiles the Moonshine C++ core from source as a static library, eliminating +//! the need for users to pre-install libmoonshine. At runtime, the plugin still +//! needs libonnxruntime.so (typically bundled alongside the plugin .so). +//! +//! Environment variables: +//! MOONSHINE_SRC_DIR - Path to a local moonshine source checkout (skips download) +//! ORT_LIB_DIR - Path to directory containing libonnxruntime.so (skips search) + +// Allow: println! in build.rs is the standard way to communicate with Cargo, not logging. +// Allow: expect/unwrap are standard in build scripts — panicking IS the error handling. +#![allow(clippy::disallowed_macros, clippy::expect_used, clippy::unwrap_used)] + +use std::env; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +/// Moonshine C API version to build from source. +const MOONSHINE_VERSION: &str = "0.0.49"; + +/// ONNX Runtime version compatible with Moonshine v0.0.49. +const ORT_VERSION: &str = "1.23.2"; fn main() { - // Link against libmoonshine (the Moonshine C API library) - println!("cargo:rustc-link-lib=moonshine"); + println!("cargo:rerun-if-env-changed=MOONSHINE_SRC_DIR"); + println!("cargo:rerun-if-env-changed=ORT_LIB_DIR"); + println!("cargo:rerun-if-changed=build.rs"); + + let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); + + // Step 1: Get moonshine source (download or use local checkout) + let moonshine_src = get_moonshine_source(&out_dir); + let core_dir = moonshine_src.join("core"); + + // Step 2: Find or download ONNX Runtime shared library + let ort_lib_dir = find_or_download_onnxruntime(&out_dir); + + // Step 3: Compile moonshine C++ core into a static archive + println!("cargo:warning=Compiling Moonshine C++ core (17 files, may take a few minutes)..."); + build_moonshine_static(&core_dir); + + // Step 4: Link against onnxruntime dynamically (for ORT symbols used by moonshine) + println!("cargo:rustc-link-search=native={}", ort_lib_dir.display()); + println!("cargo:rustc-link-lib=dylib=onnxruntime"); + + // $ORIGIN rpath so the plugin finds libonnxruntime.so next to itself at runtime + println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN"); +} + +// --------------------------------------------------------------------------- +// Moonshine source acquisition +// --------------------------------------------------------------------------- + +/// Returns the path to the moonshine source root directory. +/// +/// If `MOONSHINE_SRC_DIR` is set, uses that path directly. Otherwise downloads +/// the source tarball from GitHub. +fn get_moonshine_source(out_dir: &Path) -> PathBuf { + if let Ok(src_dir) = env::var("MOONSHINE_SRC_DIR") { + let path = PathBuf::from(src_dir); + assert!( + path.join("core/moonshine-c-api.h").exists(), + "MOONSHINE_SRC_DIR does not contain core/moonshine-c-api.h" + ); + return path; + } + + let extract_dir = out_dir.join(format!("moonshine-{MOONSHINE_VERSION}")); + if extract_dir.join("core/moonshine-c-api.h").exists() { + return extract_dir; + } + + let tarball_url = format!( + "https://github.com/moonshine-ai/moonshine/archive/refs/tags/v{MOONSHINE_VERSION}.tar.gz" + ); + let tarball_path = out_dir.join(format!("moonshine-v{MOONSHINE_VERSION}.tar.gz")); + + println!( + "cargo:warning=Downloading Moonshine v{MOONSHINE_VERSION} source (first build only)..." + ); + run_command( + Command::new("curl").args(["--fail", "-L", "-o"]).arg(&tarball_path).arg(&tarball_url), + ); + + println!("cargo:warning=Extracting Moonshine source..."); + run_command(Command::new("tar").arg("xf").arg(&tarball_path).arg("-C").arg(out_dir)); + + assert!( + extract_dir.join("core/moonshine-c-api.h").exists(), + "Extracted moonshine source missing core/moonshine-c-api.h at {}", + extract_dir.display() + ); + + extract_dir +} + +// --------------------------------------------------------------------------- +// ONNX Runtime discovery / download +// --------------------------------------------------------------------------- + +/// Required ORT major version (derived from `ORT_VERSION`). +const ORT_MAJOR: u32 = 1; +const ORT_MINOR: u32 = 23; + +/// Finds a compatible onnxruntime installation or downloads one. +/// +/// Search order: +/// 1. `ORT_LIB_DIR` environment variable (must contain compatible version) +/// 2. `/usr/local/lib` (only if version matches) +/// 3. `/usr/lib/x86_64-linux-gnu` (only if version matches) +/// 4. `/usr/lib` (only if version matches) +/// 5. Download from GitHub releases into OUT_DIR +#[allow(clippy::similar_names)] // path vs patch are semantically distinct +fn find_or_download_onnxruntime(out_dir: &Path) -> PathBuf { + if let Ok(dir) = env::var("ORT_LIB_DIR") { + let path = PathBuf::from(&dir); + assert!(has_onnxruntime(&path), "ORT_LIB_DIR={dir} does not contain libonnxruntime.so*"); + match ort_version_from_dir(&path) { + Some((major, minor, _)) if major == ORT_MAJOR && minor == ORT_MINOR => { + println!("cargo:warning=Using ORT_LIB_DIR={dir} (v{major}.{minor})"); + return path; + }, + Some((major, minor, patch)) => { + panic!( + "ORT_LIB_DIR={dir} contains ORT {major}.{minor}.{patch} \ + but moonshine requires {ORT_MAJOR}.{ORT_MINOR}.x" + ); + }, + None => { + // Can't determine version — trust the user + println!("cargo:warning=Using ORT_LIB_DIR={dir} (version unknown)"); + return path; + }, + } + } + + let search_paths = ["/usr/local/lib", "/usr/lib/x86_64-linux-gnu", "/usr/lib"]; + for dir in &search_paths { + let path = PathBuf::from(dir); + if !has_onnxruntime(&path) { + continue; + } + match ort_version_from_dir(&path) { + Some((major, minor, _)) if major == ORT_MAJOR && minor == ORT_MINOR => { + println!("cargo:warning=Found compatible onnxruntime {major}.{minor} at {dir}"); + return path; + }, + Some((major, minor, _)) => { + println!( + "cargo:warning=Skipping onnxruntime {major}.{minor} at {dir} \ + (need {ORT_MAJOR}.{ORT_MINOR})" + ); + }, + None => { + println!( + "cargo:warning=Skipping onnxruntime at {dir} (could not determine version)" + ); + }, + } + } + + // No compatible version on system — download it + download_onnxruntime(out_dir) +} + +/// Downloads the ONNX Runtime shared library from GitHub releases. +#[allow(clippy::similar_names)] // ort_extract_dir vs out_dir are semantically distinct +fn download_onnxruntime(out_dir: &Path) -> PathBuf { + let ort_dir_name = format!("onnxruntime-linux-x64-{ORT_VERSION}"); + let ort_extract_dir = out_dir.join(&ort_dir_name); + let lib_dir = ort_extract_dir.join("lib"); + + if has_onnxruntime(&lib_dir) { + println!("cargo:warning=Using cached onnxruntime at {}", lib_dir.display()); + return lib_dir; + } + + let tarball_url = format!( + "https://github.com/microsoft/onnxruntime/releases/download/v{ORT_VERSION}/{ort_dir_name}.tgz" + ); + let tarball_path = out_dir.join(format!("onnxruntime-{ORT_VERSION}.tgz")); + + println!("cargo:warning=Downloading ONNX Runtime v{ORT_VERSION} (first build only)..."); + run_command( + Command::new("curl").args(["--fail", "-L", "-o"]).arg(&tarball_path).arg(&tarball_url), + ); + + println!("cargo:warning=Extracting ONNX Runtime..."); + run_command(Command::new("tar").arg("xf").arg(&tarball_path).arg("-C").arg(out_dir)); + + assert!( + has_onnxruntime(&lib_dir), + "Downloaded onnxruntime missing libonnxruntime.so in {}", + lib_dir.display() + ); + + lib_dir +} + +/// Checks if a directory contains an onnxruntime shared library. +fn has_onnxruntime(dir: &Path) -> bool { + if !dir.is_dir() { + return false; + } + if let Ok(entries) = fs::read_dir(dir) { + for entry in entries.flatten() { + let name = entry.file_name(); + let name = name.to_string_lossy(); + if name == "libonnxruntime.so" || name.starts_with("libonnxruntime.so.") { + return true; + } + } + } + false +} + +/// Attempts to extract the ORT version from versioned symlinks in a directory. +/// +/// Looks for files named `libonnxruntime.so.X.Y.Z` and parses out the version. +/// Returns `None` if no versioned file is found. +fn ort_version_from_dir(dir: &Path) -> Option<(u32, u32, u32)> { + let entries = fs::read_dir(dir).ok()?; + for entry in entries.flatten() { + let name = entry.file_name(); + let name = name.to_string_lossy(); + // Match libonnxruntime.so.X.Y.Z (the most specific versioned name) + if let Some(ver_str) = name.strip_prefix("libonnxruntime.so.") { + let parts: Vec<&str> = ver_str.split('.').collect(); + if parts.len() == 3 { + if let (Ok(major), Ok(minor), Ok(patch)) = + (parts[0].parse::(), parts[1].parse::(), parts[2].parse::()) + { + return Some((major, minor, patch)); + } + } + } + } + None +} + +// --------------------------------------------------------------------------- +// C++ compilation +// --------------------------------------------------------------------------- + +/// Compiles the Moonshine C++ core into a static library using the `cc` crate. +fn build_moonshine_static(core_dir: &Path) { + cc::Build::new() + .cpp(true) + .flag("-std=c++20") + .pic(true) + .warnings(false) // suppress warnings from third-party code + .opt_level_str("2") + // ---- Main moonshine source files ---- + .file(core_dir.join("moonshine-c-api.cpp")) + .file(core_dir.join("cosine-distance.cpp")) + .file(core_dir.join("moonshine-model.cpp")) + .file(core_dir.join("moonshine-streaming-model.cpp")) + .file(core_dir.join("voice-activity-detector.cpp")) + .file(core_dir.join("silero-vad.cpp")) + .file(core_dir.join("resampler.cpp")) + .file(core_dir.join("transcriber.cpp")) + .file(core_dir.join("gemma-embedding-model.cpp")) + .file(core_dir.join("intent-recognizer.cpp")) + .file(core_dir.join("speaker-embedding-model.cpp")) + .file(core_dir.join("speaker-embedding-model-data.cpp")) + .file(core_dir.join("online-clusterer.cpp")) + // ---- ort-utils sub-library ---- + .file(core_dir.join("ort-utils/ort-utils.cpp")) + .file(core_dir.join("ort-utils/moonshine-ort-allocator.cpp")) + .file(core_dir.join("ort-utils/moonshine-tensor-view.cpp")) + .file(core_dir.join("ort-utils/moonshine-tensor.cpp")) + // ---- bin-tokenizer sub-library ---- + .file(core_dir.join("bin-tokenizer/bin-tokenizer.cpp")) + // ---- moonshine-utils sub-library ---- + .file(core_dir.join("moonshine-utils/string-utils.cpp")) + .file(core_dir.join("moonshine-utils/debug-utils.cpp")) + // ---- Include directories ---- + .include(core_dir) + .include(core_dir.join("moonshine-utils")) + .include(core_dir.join("ort-utils")) + .include(core_dir.join("bin-tokenizer")) + .include(core_dir.join("third-party/onnxruntime/include")) + .include(core_dir.join("third-party/utf-8")) + .compile("moonshine_core"); +} - // Common library search paths - println!("cargo:rustc-link-search=native=/usr/local/lib"); - println!("cargo:rustc-link-search=native=/usr/lib"); - println!("cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu"); - println!("cargo:rustc-link-search=native=/opt/homebrew/lib"); +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- - // Add rpath so the plugin can find libmoonshine at runtime - println!("cargo:rustc-link-arg=-Wl,-rpath,/usr/local/lib"); +/// Runs a command, panicking with a helpful message on failure. +fn run_command(cmd: &mut Command) { + let status = cmd + .status() + .unwrap_or_else(|e| panic!("Failed to run {}: {e}", cmd.get_program().display())); + assert!(status.success(), "Command {} failed with {status}", cmd.get_program().display()); } diff --git a/samples/pipelines/oneshot/moonshine-stt.yml b/samples/pipelines/oneshot/moonshine-stt.yml index e537fbc7..d0bef33b 100644 --- a/samples/pipelines/oneshot/moonshine-stt.yml +++ b/samples/pipelines/oneshot/moonshine-stt.yml @@ -19,8 +19,8 @@ steps: - kind: plugin::native::moonshine params: - model_dir: models/moonshine-tiny-en - model_arch: tiny + model_dir: models/moonshine-base-en + model_arch: base - kind: core::json_serialize params: diff --git a/scripts/marketplace/verify_bundles.py b/scripts/marketplace/verify_bundles.py index bb263d58..32b3e234 100644 --- a/scripts/marketplace/verify_bundles.py +++ b/scripts/marketplace/verify_bundles.py @@ -112,6 +112,14 @@ def main() -> int: f"{plugin_id}: missing libonnxruntime.so in bundle" ) + # Moonshine plugin bundles onnxruntime (moonshine C++ is linked statically) + if plugin_id == "moonshine" and "libonnxruntime.so" in needed: + onnx_lib = tmp_path / "libonnxruntime.so" + if not onnx_lib.exists(): + errors.append( + f"{plugin_id}: missing libonnxruntime.so in bundle" + ) + if errors: print("Portability verification failed:") for err in errors: diff --git a/ui/src/views/ConvertView.tsx b/ui/src/views/ConvertView.tsx index 88009cd5..c359274b 100644 --- a/ui/src/views/ConvertView.tsx +++ b/ui/src/views/ConvertView.tsx @@ -464,6 +464,7 @@ const checkIfTranscriptionPipeline = (yaml: string): boolean => { return ( lowerYaml.includes('plugin::native::whisper') || lowerYaml.includes('plugin::native::sensevoice') || + lowerYaml.includes('plugin::native::moonshine') || lowerYaml.includes('transcription') ); };