From b9eb09938558869d09a01e5b0e30937e8746ec97 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 23 Jun 2026 20:21:45 +0000
Subject: [PATCH] fix(huggingface-transformers): resolve server device to
 undefined instead of "auto"

Passing device "auto" to onnxruntime-node makes it probe the CUDA execution
provider, which throws "OrtSessionOptionsAppendExecutionProvider_Cuda: Failed to
load shared library" on hosts without CUDA (CPU-only CI runners), failing every
HFT integration test as a PermanentJobError.

Resolve "auto" (and the browser-only "wasm"/"webgpu") to undefined on the server
so onnxruntime-node defaults to the CPU execution provider. Concrete server
devices (cpu/gpu/metal) still pass through.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01N2vdcSPQJz63DPoiD9JSY2
---
 .../test/src/test/ai-provider-hft/HFT_Device.test.ts  |  4 ++--
 .../src/ai/common/HFT_Device.ts                       | 11 +++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts b/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts
index ccbbf16f8..0a9c209a5 100644
--- a/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts
+++ b/packages/test/src/test/ai-provider-hft/HFT_Device.test.ts
@@ -18,8 +18,8 @@ describe("resolveHftPipelineDevice", () => {
     }
   });
 
-  it("passes auto through on the server", () => {
-    expect(resolveHftPipelineDevice("auto")).toBe("auto");
+  it("resolves auto to undefined on the server", () => {
+    expect(resolveHftPipelineDevice("auto")).toBeUndefined();
     expect(resolveHftPipelineDevice("cpu")).toBe("cpu");
     expect(resolveHftPipelineDevice("gpu")).toBe("gpu");
     expect(resolveHftPipelineDevice(undefined)).toBeUndefined();
diff --git a/providers/huggingface-transformers/src/ai/common/HFT_Device.ts b/providers/huggingface-transformers/src/ai/common/HFT_Device.ts
index 31f1f87f3..06994a181 100644
--- a/providers/huggingface-transformers/src/ai/common/HFT_Device.ts
+++ b/providers/huggingface-transformers/src/ai/common/HFT_Device.ts
@@ -18,7 +18,7 @@ export function isHftBrowserEnv(): boolean {
  * Browser builds only accept `wasm` or `webgpu`; `auto` is our cross-platform
  * stored default, and should prefer WebGPU in the browser.
  */
-export function resolveHftPipelineDevice(raw: string | undefined): string {
+export function resolveHftPipelineDevice(raw: string | undefined): string | undefined {
   if (isHftBrowserEnv()) {
     if (raw === "gpu") return "webgpu";
     if (raw === "cpu") return "wasm";
@@ -27,7 +27,10 @@ export function resolveHftPipelineDevice(raw: string | undefined): string {
     return raw;
   }
 
-  // On the server, let transformers.js/onnxruntime-node choose the best EP.
-  if (raw === "wasm" || raw === "webgpu") return "auto";
-  return raw || "auto";
+  // On the server, resolve to undefined so onnxruntime-node defaults to the CPU
+  // execution provider instead of probing CUDA (which throws when the CUDA
+  // shared libraries are absent, e.g. CPU-only CI runners). "wasm"/"webgpu" are
+  // browser-only and stripped here as well.
+  if (!raw || raw === "auto" || raw === "wasm" || raw === "webgpu") return undefined;
+  return raw;
 }