From b9eb09938558869d09a01e5b0e30937e8746ec97 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 23 Jun 2026 20:21:45 +0000 Subject: [PATCH] fix(huggingface-transformers): resolve server device to undefined instead of "auto" Passing device "auto" to onnxruntime-node makes it probe the CUDA execution provider, which throws "OrtSessionOptionsAppendExecutionProvider_Cuda: Failed to load shared library" on hosts without CUDA (CPU-only CI runners), failing every HFT integration test as a PermanentJobError. Resolve "auto" (and the browser-only "wasm"/"webgpu") to undefined on the server so onnxruntime-node defaults to the CPU execution provider. Concrete server devices (cpu/gpu/metal) still pass through. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01N2vdcSPQJz63DPoiD9JSY2 --- .../test/src/test/ai-provider-hft/HFT_Device.test.ts | 4 ++-- .../src/ai/common/HFT_Device.ts | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts b/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts index ccbbf16f8..0a9c209a5 100644 --- a/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts +++ b/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts @@ -18,8 +18,8 @@ describe("resolveHftPipelineDevice", () => { } }); - it("passes auto through on the server", () => { - expect(resolveHftPipelineDevice("auto")).toBe("auto"); + it("resolves auto to undefined on the server", () => { + expect(resolveHftPipelineDevice("auto")).toBeUndefined(); expect(resolveHftPipelineDevice("cpu")).toBe("cpu"); expect(resolveHftPipelineDevice("gpu")).toBe("gpu"); expect(resolveHftPipelineDevice(undefined)).toBeUndefined(); diff --git a/providers/huggingface-transformers/src/ai/common/HFT_Device.ts b/providers/huggingface-transformers/src/ai/common/HFT_Device.ts index 31f1f87f3..06994a181 100644 --- a/providers/huggingface-transformers/src/ai/common/HFT_Device.ts +++ b/providers/huggingface-transformers/src/ai/common/HFT_Device.ts @@ -18,7 +18,7 @@ export function isHftBrowserEnv(): boolean { * Browser builds only accept `wasm` or `webgpu`; `auto` is our cross-platform * stored default, and should prefer WebGPU in the browser. */ -export function resolveHftPipelineDevice(raw: string | undefined): string { +export function resolveHftPipelineDevice(raw: string | undefined): string | undefined { if (isHftBrowserEnv()) { if (raw === "gpu") return "webgpu"; if (raw === "cpu") return "wasm"; @@ -27,7 +27,10 @@ export function resolveHftPipelineDevice(raw: string | undefined): string { return raw; } - // On the server, let transformers.js/onnxruntime-node choose the best EP. - if (raw === "wasm" || raw === "webgpu") return "auto"; - return raw || "auto"; + // On the server, resolve to undefined so onnxruntime-node defaults to the CPU + // execution provider instead of probing CUDA (which throws when the CUDA + // shared libraries are absent, e.g. CPU-only CI runners). "wasm"/"webgpu" are + // browser-only and stripped here as well. + if (!raw || raw === "auto" || raw === "wasm" || raw === "webgpu") return undefined; + return raw; }